packetsnitch 1.5.599

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.eslintrc.json +28 -0
  2. package/.webpack/x64/main/index.js +2 -0
  3. package/.webpack/x64/main/index.js.map +1 -0
  4. package/.webpack/x64/renderer/assets/css/rubikglitch.woff2 +0 -0
  5. package/.webpack/x64/renderer/assets/css/style.css +1916 -0
  6. package/.webpack/x64/renderer/assets/images/loading.gif +0 -0
  7. package/.webpack/x64/renderer/assets/images/logo.webp +0 -0
  8. package/.webpack/x64/renderer/assets/images/packet-snitch-tag.webp +0 -0
  9. package/.webpack/x64/renderer/main_window/index.html +3 -0
  10. package/.webpack/x64/renderer/main_window/index.js +3 -0
  11. package/.webpack/x64/renderer/main_window/index.js.LICENSE.txt +36 -0
  12. package/.webpack/x64/renderer/main_window/index.js.map +1 -0
  13. package/.webpack/x64/renderer/main_window/preload.js +2 -0
  14. package/.webpack/x64/renderer/main_window/preload.js.map +1 -0
  15. package/backend/common/GeoLite2-City.mmdb +0 -0
  16. package/backend/common/mac-vendors-export.csv +56923 -0
  17. package/backend/common/service-names-port-numbers.csv +15368 -0
  18. package/backend/requirements.txt +14 -0
  19. package/backend/snitch.py +3611 -0
  20. package/forge.config.js +80 -0
  21. package/package.json +102 -0
  22. package/ps-icon.ico +0 -0
  23. package/snitch.spec +44 -0
  24. package/src/assets/css/rubikglitch.woff2 +0 -0
  25. package/src/assets/css/style.css +1916 -0
  26. package/src/assets/images/loading.gif +0 -0
  27. package/src/assets/images/logo.webp +0 -0
  28. package/src/assets/images/packet-snitch-tag.webp +0 -0
  29. package/src/back-comm.js +70 -0
  30. package/src/decoders.js +579 -0
  31. package/src/filter.js +461 -0
  32. package/src/front.js +10 -0
  33. package/src/index.html +1036 -0
  34. package/src/logging.js +150 -0
  35. package/src/main.js +571 -0
  36. package/src/preload.js +73 -0
  37. package/src/renderer.js +30 -0
  38. package/src/ui/common-frontend.js +13 -0
  39. package/src/ui/context-menu.js +88 -0
  40. package/src/ui/decoders.js +1 -0
  41. package/src/ui/main-frontend.js +4957 -0
  42. package/src/ui/panels/crypt-panel.js +565 -0
  43. package/src/ui/panels/data-panel.js +151 -0
  44. package/src/ui/panels/data-tools-panel.js +939 -0
  45. package/src/ui/panels/install-screen.js +59 -0
  46. package/src/ui/panels/keystore-panel.js +1248 -0
  47. package/src/ui/panels/list-panel.js +403 -0
  48. package/src/ui/panels/stats-panel.js +351 -0
  49. package/src/ui/panels/summary-panel.js +63 -0
  50. package/webpack.main.config.js +11 -0
  51. package/webpack.plugins.js +13 -0
  52. package/webpack.preload.config.js +7 -0
  53. package/webpack.renderer.config.js +30 -0
  54. package/webpack.rules.js +35 -0
@@ -0,0 +1,3611 @@
1
+ ## snitch.py: Analyze pcap network captures, extract TCP, UDP, and ICMP packet data, and gather extra information.
2
+ #
3
+ # This script processes .pcap files, extracting TCP, UDP, and ICMP packet payloads and
4
+ # metadata, and generates testcases and info files for each packet. It enriches the
5
+ # output with MIME types, entropy, geoip, network class, banners, and more. DNS packets
6
+ # (UDP/53) are decoded and the query/answer records are included in the output JSON.
7
+ # SNMP (UDP/TCP 161/162), DHCP (UDP 67/68), NTP (UDP 123), and SIP (UDP/TCP 5060/5061)
8
+ # packets are also decoded and their protocol-specific fields included. ICMP packets are
9
+ # fully supported with type, code, ID, and sequence fields. Optionally, it performs
10
+ # active reconnaissance to gather additional network and server information.
11
+ # Summaries and final reports can be generated using a large language model (LLM).
12
+ #
13
+ # Features:
14
+ # - Extracts TCP, UDP, and ICMP packet data and metadata from .pcap files.
15
+ # - Decodes DNS queries and responses from UDP port 53 packets.
16
+ # - Decodes SNMP, DHCP, NTP, and SIP protocol-specific fields.
17
+ # - Decodes ICMP type, code, ID, and sequence fields.
18
+ # - Writes raw payloads and info files to output directories.
19
+ # - Determines MIME types, entropy, geoip, network class, banners, and more.
20
+ # - Optionally performs active reconnaissance (reverse DNS, banners, SSL info, etc.).
21
+ # - Supports multi-threaded processing for large captures.
22
+ # - Summarizes results using LLM integration (Ollama).
23
+ # - Outputs consolidated JSON and summary files.
24
+ #
25
+ # Usage:
26
+ # python3 snitch.py <pcap_file> [options]
27
+ # See command-line argument parser below for available options.
28
+ #
29
+ # Dependencies:
30
+ # - scapy, numpy, requests, chardet, geoip2, magic, yaml, ollama, bs4, scipy, etc.
31
+ #
32
+ # Author: oxagast
33
+ # Import standard and third-party libraries for argument parsing, file handling, networking, compression, and data processing
34
+ import argparse
35
+ import base64
36
+ import csv
37
+ import json
38
+ import os
39
+ import re
40
+ import shutil
41
+ import socket
42
+ import ssl
43
+ import sys
44
+ import textwrap
45
+ import threading
46
+ import time
47
+ import zlib
48
+ from urllib.parse import unquote_plus
49
+ from datetime import datetime
50
+ from decimal import Decimal
51
+ from functools import lru_cache
52
+ import chardet
53
+ import geoip2.database
54
+ import magic
55
+ import numpy as np
56
+ import ollama
57
+ import requests
58
+ import yaml
59
+
60
+ # from tqdm import tqdm
61
+ import ipaddress
62
+ from bs4 import BeautifulSoup
63
+ from ollama import ResponseError
64
+ from scipy.stats import entropy
65
+ from concurrent.futures import ThreadPoolExecutor, as_completed
66
+
67
+ stopEvent = threading.Event()
68
+
69
+ try:
70
+ import scapy.all as scapy
71
+ except ImportError:
72
+ import scapy
73
+
74
+ activeRecon = "False"
75
+ numWorkerThreads = 2 * (os.cpu_count() or 1)
76
+ numLlmThreads = 5
77
+ llmResponseLength = 100
78
+ llmModelName = "minimax-m2.5:cloud"
79
+ useLlm = False
80
+
81
+ # Shared result lists, protected by their respective locks so that threads
82
+ # can safely append results concurrently without data corruption.
83
+ llmSummaries = []
84
+ llmSummariesLock = threading.Lock()
85
+ allPacketInfo = []
86
+ allPacketInfoLock = threading.Lock()
87
+
88
+ # Concurrency controls
89
+ llmCallLock = threading.Semaphore(numLlmThreads) # cap simultaneous LLM calls
90
+
91
+ hostOutputFile = "hosts.json"
92
+ currentDir = os.getcwd()
93
+ scriptDir = os.path.dirname(os.path.realpath(__file__)) + "/"
94
+
95
+ # --- Lookup tables loaded once at startup (see init_lookup_tables()) ---
96
+ # Keyed (port_int, "tcp"/"udp") -> description string
97
+ portDescriptionMap: dict = {}
98
+ # Keyed by uppercase MAC macPrefix (e.g. "00:1A:2B") -> vendor name
99
+ macVendorMap: dict = {}
100
+
101
+ # --- GeoIP reader opened once and reused across all packets ---
102
+ # Protected by geoIpCacheLock for the cache; the Reader itself is thread-safe.
103
+ geoIpReader = None
104
+ geoIpCache: dict = {}
105
+ geoIpCacheLock = threading.Lock()
106
+
107
+ # --- Banner cache: (ip, port) -> banner dict, avoids redundant socket probes ---
108
+ cachedBanners: dict = {}
109
+ cachedBannersLock = threading.Lock()
110
+
111
+ # --- TCP stream protocol cache: canonical stream key -> initial packet dst port ---
112
+ tcpStreamInitialDstPortMap: dict = {}
113
+
114
+ # --- HTTP method set used by decodeHTTP() for request-line detection ---
115
+ HTTP_METHODS: set = {
116
+ "GET",
117
+ "POST",
118
+ "HEAD",
119
+ "PUT",
120
+ "DELETE",
121
+ "PATCH",
122
+ "OPTIONS",
123
+ "TRACE",
124
+ "CONNECT",
125
+ }
126
+
127
+ TLS_SERVICE_PORTS = {443, 465, 636, 853, 8443, 9443, 5061}
128
+
129
+ # Matches common credential-related field names in HTTP query strings, POST bodies, etc.
130
+ # Each keyword is an independent alternative; compound names like "auth_token" or
131
+ # "api_key" are covered by the optional prefix/suffix anchors.
132
+ CREDENTIAL_FIELD_RE = re.compile(
133
+ r"^(?:.*[_\-.])?(?:pass(?:w(?:or)?d?)?|pw|secret|auth|auth_token|"
134
+ r"credential|api[_\-.]?key|token|user(?:name)?|login|email)(?:[_\-.].*)?$",
135
+ re.IGNORECASE,
136
+ )
137
+
138
+
139
+ def _extractUrlCredentials(paramStr):
140
+ """
141
+ Parse a URL-encoded query string or POST body (e.g. ``user=alice&pass=s3cr3t``).
142
+ Returns a dict of {fieldName: value} for every field whose name matches
143
+ CREDENTIAL_FIELD_RE and whose value is non-empty. Returns an empty dict when
144
+ nothing interesting is found.
145
+ """
146
+ creds = {}
147
+ if not paramStr:
148
+ return creds
149
+ for pair in paramStr.split("&"):
150
+ if "=" not in pair:
151
+ continue
152
+ rawKey, _, rawVal = pair.partition("=")
153
+ key = unquote_plus(rawKey.strip())
154
+ val = unquote_plus(rawVal.strip())
155
+ if val and CREDENTIAL_FIELD_RE.match(key):
156
+ creds[key] = val
157
+ return creds
158
+
159
+
160
+ # Cookie names that are always treated as sensitive regardless of CREDENTIAL_FIELD_RE.
161
+ # These are common session / auth cookie names used by popular frameworks and platforms.
162
+ _SENSITIVE_COOKIE_RE = re.compile(
163
+ r"^(?:sess(?:ion)?(?:id)?|auth(?:_?token)?|access_token|refresh_token|"
164
+ r"remember(?:_me)?(?:_token)?|jwt|bearer|csrf(?:_token)?|xsrf(?:_token)?|"
165
+ r"sid|uid|user(?:id)?|login|pass(?:w(?:or)?d?)?|pw|secret|"
166
+ r"PHPSESSID|ASP\.NET_SessionId|__Secure-.*|__Host-.*)$",
167
+ re.IGNORECASE,
168
+ )
169
+
170
+
171
+ def _extractCookieCredentials(cookieHeader):
172
+ """
173
+ Parse a ``Cookie:`` request header (e.g. ``session=abc; token=xyz; q=1``).
174
+ Always stores the raw full cookie string under the key ``cookie_raw``.
175
+ Also stores each individual cookie whose name matches CREDENTIAL_FIELD_RE or
176
+ _SENSITIVE_COOKIE_RE under the key ``cookie.<name>``.
177
+ Returns a dict; an empty dict means nothing sensitive was found.
178
+ """
179
+ if not cookieHeader:
180
+ return {}
181
+ creds = {"cookie_raw": cookieHeader}
182
+ for crumb in cookieHeader.split(";"):
183
+ crumb = crumb.strip()
184
+ if "=" not in crumb:
185
+ continue
186
+ name, _, value = crumb.partition("=")
187
+ name = name.strip()
188
+ value = value.strip()
189
+ if value and (CREDENTIAL_FIELD_RE.match(name) or _SENSITIVE_COOKIE_RE.match(name)):
190
+ creds[f"cookie.{name}"] = value
191
+ return creds
192
+
193
+
194
+ def _extractSetCookieCredentials(setCookieHeader):
195
+ """
196
+ Parse a ``Set-Cookie:`` response header and return a dict with the raw value
197
+ and the parsed cookie name/value pair (before any attributes like HttpOnly).
198
+ """
199
+ if not setCookieHeader:
200
+ return {}
201
+ creds = {"set_cookie_raw": setCookieHeader}
202
+ # The first pair before any ";" is the actual cookie name=value
203
+ firstPair = setCookieHeader.split(";")[0].strip()
204
+ if "=" in firstPair:
205
+ name, _, value = firstPair.partition("=")
206
+ name = name.strip()
207
+ value = value.strip()
208
+ if value:
209
+ creds[f"cookie.{name}"] = value
210
+ return creds
211
+
212
+
213
+ # Matches JSON key-value pairs where the key looks like a credential field.
214
+ # The keyword list mirrors CREDENTIAL_FIELD_RE but is spelled out explicitly here
215
+ # so the pattern is self-contained and does not depend on regex string manipulation.
216
+ _CRED_KEYWORDS = (
217
+ r"pass(?:w(?:or)?d?)?|pw|secret|auth|auth_token|credential|"
218
+ r"api[_\-.]?key|token|user(?:name)?|login|email"
219
+ )
220
+ _JSON_CRED_RE = re.compile(
221
+ r'"(?:(?:.*[_\-.])?' + r'(?:' + _CRED_KEYWORDS + r')' + r'(?:[_\-.].*)?)"'
222
+ r'\s*:\s*"([^"]{1,512})"',
223
+ re.IGNORECASE,
224
+ )
225
+
226
+ # Matches plain-text key:value or key=value lines where the key is credential-like.
227
+ # Delimiters before the key include whitespace, common punctuation, and '&' (form data).
228
+ _TEXT_CRED_RE = re.compile(
229
+ r'(?:^|[\s,{;&])'
230
+ r'(?:(?:.*[_\-.])?(?:' + _CRED_KEYWORDS + r')(?:[_\-.].*)?)'
231
+ r'(?:\s*[=:]\s*)([^\s&"\'<>,;]{1,512})',
232
+ re.IGNORECASE | re.MULTILINE,
233
+ )
234
+
235
+
236
+ def _extractPostBodyCredentials(body, contentType):
237
+ """
238
+ Scan a POST/PUT/PATCH body for credential fields regardless of content type.
239
+ - For ``application/json`` bodies: uses JSON key-value regex.
240
+ - For all other bodies (multipart, plain-text, XML, etc.): uses a more general
241
+ key=value / key:value regex.
242
+ Returns a dict of {fieldName: value}; empty dict when nothing is found.
243
+ """
244
+ if not body or not body.strip():
245
+ return {}
246
+ creds = {}
247
+ lowerContentType = contentType.lower()
248
+ if "json" in lowerContentType:
249
+ for match in _JSON_CRED_RE.finditer(body):
250
+ val = match.group(1).strip()
251
+ if val:
252
+ # Derive a human-readable key from the JSON property name
253
+ fullMatch = match.group(0)
254
+ keyEnd = fullMatch.index('"', 1)
255
+ creds[fullMatch[1:keyEnd]] = val
256
+ else:
257
+ for match in _TEXT_CRED_RE.finditer(body):
258
+ val = match.group(1).strip()
259
+ if val:
260
+ # Use the raw matched token before the separator as the key
261
+ raw = match.group(0).lstrip(" \t,{;&")
262
+ sep = next(
263
+ (i for i, c in enumerate(raw) if c in "=:"), len(raw)
264
+ )
265
+ key = raw[:sep].strip()
266
+ if key:
267
+ creds[key] = val
268
+ return creds
269
+
270
+
271
+ def llmQuery(packetInfoStr):
272
+ """
273
+ Query a large language model (LLM) with packet information for summarization.
274
+ Handles retries and concurrency limits. Appends responses to the global llmSummaries list.
275
+ """
276
+ with llmCallLock:
277
+ try:
278
+ if ollama and useLlm and packetInfoStr:
279
+ # Attempt up to 2 times with exponential backoff; halve the payload on each retry
280
+ for retryCount in range(2):
281
+ try:
282
+ llmResponse = ollama.generate(
283
+ model=llmModelName,
284
+ prompt=f"Tell me what you can about the following network capture (encoded in json, from pcap), its payload, and any interesting or unusual traits... respond with a single paragraph around {llmResponseLength} words: {packetInfoStr}",
285
+ )
286
+ if llmResponse and "response" in llmResponse:
287
+ # Protect list append from concurrent thread writes
288
+ with llmSummariesLock:
289
+ llmSummaries.append(llmResponse["response"])
290
+ else:
291
+ return {"Summary": ""}
292
+ except ResponseError as responseErr:
293
+ if verbose >= 2:
294
+ print(
295
+ f"LLM API response error (attempt {retryCount + 1}/3): {str(responseErr)}",
296
+ file=sys.stderr,
297
+ )
298
+ time.sleep(2**retryCount) # Exponential backoff
299
+ packetInfoStr = packetInfoStr[
300
+ : int(len(packetInfoStr) / (2**retryCount))
301
+ ]
302
+ if verbose >= 1:
303
+ print(
304
+ f"Retrying with truncated (halved) string (attempt {retryCount + 1}/3)...",
305
+ file=sys.stderr,
306
+ )
307
+ else:
308
+ return {"Summary": "LLM integration not enabled"}
309
+ except Exception as e:
310
+ return {"Summary": "LLM integration error: " + str(e)}
311
+
312
+
313
+ def configLoader(filename="conf.yaml"):
314
+ """
315
+ Load YAML configuration from the specified file.
316
+ Exits if the file does not exist.
317
+ """
318
+ with open(filename, "r") as f:
319
+ return yaml.safe_load(f)
320
+
321
+
322
+ @lru_cache(maxsize=2048)
323
+ def getPortDescription(port, protocol="tcp"):
324
+ """
325
+ Return the IANA description for a port/protocol pair.
326
+ Uses the portDescriptionMap dict loaded once at startup for O(1) lookup.
327
+ Also cached with LRU for additional layer of caching.
328
+ """
329
+ return portDescriptionMap.get((port, protocol), "No description available")
330
+
331
+
332
+ def reverseDnsLookup(ip):
333
+ """
334
+ Perform a reverse DNS lookup for the given IP address.
335
+ Returns a dictionary with resolution status and hostnames or error.
336
+ """
337
+
338
+ try:
339
+ dnsResult = socket.gethostbyaddr(ip)
340
+ return (
341
+ {"Resolved": True, "Hostnames": dnsResult}
342
+ if dnsResult and len(dnsResult) > 0
343
+ else {"Resolved": False, "Error": "No PTR record found"}
344
+ )
345
+ except Exception as e:
346
+ return {
347
+ "Resolved": False,
348
+ "Error": "Address resolution error: " + str(e),
349
+ }
350
+
351
+
352
+ def getServBanner(ip, port, timeout, hostname, serviceName=None):
353
+ """
354
+ Retrieve the service banner, SSL certificate, and page title for a given IP and port.
355
+ Uses a dict cache keyed by (ip, port) to avoid redundant network probes.
356
+ Handles both HTTP and HTTPS. Returns a dict with banner, page title, and encryption data.
357
+ The optional serviceName helps choose the correct URL scheme for non-standard ports.
358
+ """
359
+
360
+ ipPortKey = (ip, port)
361
+ # Fast O(1) cache hit check before doing any network work
362
+ with cachedBannersLock:
363
+ if ipPortKey in cachedBanners:
364
+ return cachedBanners[ipPortKey]
365
+
366
+ sslCert = "Unavailable"
367
+ cipherInfo = "N/A"
368
+ sslVersion = "N/A"
369
+ pageTitle = "N/A"
370
+ bannerInfo = {}
371
+ # Get page title for HTTP/HTTPS ports
372
+ try:
373
+ serviceNameNormalized = (
374
+ serviceName.lower()
375
+ if isinstance(serviceName, str) and serviceName
376
+ else ""
377
+ )
378
+ isLikelyTlsService = (
379
+ port in TLS_SERVICE_PORTS
380
+ or "https" in serviceNameNormalized
381
+ or "ssl" in serviceNameNormalized
382
+ or "tls" in serviceNameNormalized
383
+ or "wss" in serviceNameNormalized
384
+ )
385
+ if isLikelyTlsService:
386
+ pageTitle = getPageTitle("https://" + hostname + ":" + str(port), timeout)
387
+ else:
388
+ pageTitle = getPageTitle("http://" + hostname + ":" + str(port), timeout)
389
+ except Exception:
390
+ pageTitle = "N/A"
391
+ # Try to fetch SSL certificate info (ignore errors; port may not support TLS)
392
+ sslContext = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
393
+ if hasattr(ssl, "OP_NO_TLSv1"):
394
+ sslContext.options |= ssl.OP_NO_TLSv1
395
+ if hasattr(ssl, "OP_NO_TLSv1_1"):
396
+ sslContext.options |= ssl.OP_NO_TLSv1_1
397
+ sslContext.check_hostname = False
398
+ sslContext.verify_mode = ssl.CERT_NONE
399
+
400
+ serverHostnamesToTry = [None]
401
+ if isinstance(hostname, str) and hostname:
402
+ try:
403
+ ipaddress.ip_address(hostname)
404
+ except ValueError:
405
+ # Only use SNI when the provided host is a domain, not a literal IP.
406
+ serverHostnamesToTry.insert(0, hostname)
407
+
408
+ for serverHostname in serverHostnamesToTry:
409
+ try:
410
+ with socket.create_connection((ip, port), timeout=timeout) as tcpSocket:
411
+ with sslContext.wrap_socket(
412
+ tcpSocket, server_hostname=serverHostname
413
+ ) as sslSocket:
414
+ peerCert = sslSocket.getpeercert()
415
+ if peerCert:
416
+ sslCert = peerCert
417
+ cipherInfo = sslSocket.cipher() or "N/A"
418
+ sslVersion = sslSocket.version() or "N/A"
419
+ break
420
+ except Exception:
421
+ continue
422
+ # Try to fetch banner from server
423
+ try:
424
+ tcpSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
425
+ tcpSocket.settimeout(timeout)
426
+ tcpSocket.connect((ip, port))
427
+ banner = tcpSocket.recv(1024).decode(errors="ignore").strip()
428
+ if len(banner) > 0:
429
+ bannerInfo = {
430
+ "Banner": banner,
431
+ "Page Title": pageTitle,
432
+ "Encryption Data": {
433
+ "SSL Cert": sslCert,
434
+ "SSL Version": sslVersion,
435
+ "Encrypted With": cipherInfo,
436
+ }
437
+ if sslVersion != "N/A"
438
+ else "N/A",
439
+ }
440
+ tcpSocket.close()
441
+ else:
442
+ # No passive banner; try an HTTP HEAD request as a fallback
443
+ tcpSocket.sendall(b"HEAD / HTTP/1.0\r\n\r\n")
444
+ banner = tcpSocket.recv(1024).decode(errors="ignore").strip()
445
+ tcpSocket.close()
446
+ if len(banner) > 0:
447
+ bannerInfo = {
448
+ "Banner": banner,
449
+ "Page Title": pageTitle,
450
+ "Encryption Data": {
451
+ "SSL Cert": sslCert,
452
+ "SSL Version": sslVersion,
453
+ "Encrypted With": cipherInfo,
454
+ }
455
+ if sslVersion != "N/A"
456
+ else "N/A",
457
+ }
458
+ else:
459
+ bannerInfo = {
460
+ "Page Title": pageTitle,
461
+ "Encryption Data": {
462
+ "SSL Cert": sslCert,
463
+ "SSL Version": sslVersion,
464
+ "Encrypted With": cipherInfo,
465
+ }
466
+ if sslVersion != "N/A"
467
+ else "N/A",
468
+ }
469
+ except Exception:
470
+ bannerInfo = {
471
+ "Page Title": pageTitle,
472
+ "Encryption Data": {
473
+ "SSL Cert": sslCert,
474
+ "SSL Version": sslVersion,
475
+ "Encrypted With": cipherInfo,
476
+ }
477
+ if sslVersion != "N/A"
478
+ else "N/A",
479
+ }
480
+ # Store in cache so repeated calls for the same (ip, port) are free
481
+ with cachedBannersLock:
482
+ cachedBanners[ipPortKey] = bannerInfo
483
+ return bannerInfo
484
+
485
+
486
+ def getPageTitle(url, timeout):
487
+ """
488
+ Fetch the HTML page title from the given URL with a timeout.
489
+ Returns the title string or "N/A" if unavailable.
490
+ """
491
+
492
+ try:
493
+ requests.packages.urllib3.disable_warnings( # ignore
494
+ category=InsecureRequestWarning # ignore request warning
495
+ ) # ignore
496
+ httpResponse = requests.get(url, timeout=timeout, verify=False)
497
+ httpResponse.raise_for_status()
498
+ responseContent = httpResponse.content
499
+ htmlParser = BeautifulSoup(responseContent, "html.parser")
500
+ return htmlParser.title.string if htmlParser.title else "N/A"
501
+ except Exception:
502
+ return "N/A"
503
+
504
+
505
+ def writeTestcase(data, outputDirPath, portDir, index):
506
+ """
507
+ Write raw packet payload bytes to a testcase file.
508
+ Creates the per-port sub-directory on first use; errors there are non-fatal.
509
+ Uses a context manager so the file descriptor is always released.
510
+ """
511
+ destDir = outputDirPath + "/" + portDir
512
+ if not os.path.exists(destDir):
513
+ try:
514
+ os.mkdir(destDir)
515
+ except Exception:
516
+ print("Error: Nonfatal: Could not create minor dir.")
517
+ with open(destDir + "/pcap.data_packet." + str(index) + ".dat", "wb") as out:
518
+ out.write(data)
519
+
520
+
521
+ def joinInfo(outputDirPath, portDir, index, dataTypeJson, packetInfoJson, host):
522
+ """
523
+ Merge packet-level info with extra analysis info and write as a JSON file.
524
+ Thread-safe: uses allPacketInfoLock when appending to the shared allPacketInfo list.
525
+ """
526
+ mergedJson = {
527
+ "Packet Info": json.loads(packetInfoJson),
528
+ "Extra Info": json.loads(dataTypeJson),
529
+ }
530
+ path = outputDirPath + "/" + portDir + "/pcap.info_packet." + str(index) + ".json"
531
+ with open(path, "wb+") as out:
532
+ out.write(json.dumps(mergedJson).encode())
533
+ if verbose >= 2:
534
+ print(json.dumps(mergedJson, indent=2))
535
+ # Protect the shared list from concurrent thread writes
536
+ with allPacketInfoLock:
537
+ allPacketInfo.append({"Host": host, "Packet": mergedJson})
538
+ return mergedJson
539
+
540
+
541
+ packetsByHost = {}
542
+
543
+
544
+ def sortAndIndexPackets(hostPacketMap):
545
+ for host, packets in hostPacketMap.items():
546
+ # Skip empty or invalid entries
547
+ if not packets:
548
+ continue
549
+
550
+ # Sort packets by timestamp
551
+ packets.sort(
552
+ key=lambda p: datetime.strptime(
553
+ p["Packet Info"]["Packet Timestamp"], "%Y-%m-%d %H:%M:%S.%f"
554
+ )
555
+ )
556
+
557
+ # Add chronological index
558
+ for i, pkt in enumerate(packets, start=1):
559
+ pkt["Packet Info"]["Index"] = i
560
+
561
+ return hostPacketMap
562
+
563
+
564
+ def byHost(outputDirPath, finalSummary):
565
+ """
566
+ Organise allPacketInfo entries by destination host and write the result to hosts.json.
567
+ Bug fix: the original code created the empty list but then only appended on the
568
+ *else* branch, silently dropping the first packet for every unique host.
569
+ Now every packet is always appended.
570
+ """
571
+ global packetsByHost
572
+ for entry in allPacketInfo:
573
+ host = entry.get("Host")
574
+ if host not in packetsByHost:
575
+ packetsByHost[host] = []
576
+ # Always append — previously the first packet per host was lost
577
+ packetsByHost[host].append(entry.get("Packet"))
578
+
579
+ packetsByHost = sortAndIndexPackets(packetsByHost)
580
+
581
+ # Write the consolidated hosts file; use a context manager to guarantee flush/close
582
+ with open(outputDirPath + "/" + hostOutputFile, "w+", encoding="utf-8") as f:
583
+ f.write(
584
+ json.dumps({"Host": packetsByHost, "Final Summary": finalSummary}, indent=2)
585
+ )
586
+
587
+
588
+ @lru_cache(maxsize=4096)
589
+ def getNetclass(ip):
590
+ """
591
+ Determine the network class (A, B, C, or Unknown) of an IPv4 address.
592
+ Cached to avoid repeated parsing of the same IP addresses.
593
+ """
594
+ ipAddressObj = ipaddress.ip_address(ip)
595
+ # Get the first octet
596
+ firstOctet = int(str(ipAddressObj).split(".")[0])
597
+ # Determine the class
598
+ if 1 <= firstOctet <= 127:
599
+ return "A"
600
+ elif 128 <= firstOctet <= 191:
601
+ return "B"
602
+ elif 192 <= firstOctet <= 223:
603
+ return "C"
604
+ elif 224 <= firstOctet <= 239:
605
+ return "D"
606
+ elif 240 <= firstOctet <= 255:
607
+ return "E"
608
+ else:
609
+ return "Invalid IP"
610
+
611
+
612
+ def safeDecompress(compressedData):
613
+ """
614
+ Safely decompress gzip or zlib-compressed data.
615
+ Returns the decompressed bytes, or empty bytes on error.
616
+ """
617
+
618
+ # Initialize decompressor
619
+ # Handle gzip and zlib formats
620
+ decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16)
621
+ result = b""
622
+ try:
623
+ result = decompressor.decompress(compressedData)
624
+ result += decompressor.flush()
625
+ except zlib.error:
626
+ pass
627
+ return result
628
+
629
+
630
+ def getGeoipInfo(ip, srcOrDst):
631
+ """
632
+ Look up GeoIP information (country, city, postal code, timezone) for an IP address.
633
+ Uses geoIpReader opened once at startup and a per-session cache dict so that
634
+ repeated lookups for the same IP cost nothing beyond a dict read.
635
+ Returns a dictionary with location data or error message.
636
+ """
637
+ if geoIpReader is None:
638
+ return {"Location": "Error: GeoIP database not found!"}
639
+
640
+ # Check cache first (lock only for the brief check/insert, not for the DB query)
641
+ geoIpCacheKey = (ip, srcOrDst)
642
+ with geoIpCacheLock:
643
+ if geoIpCacheKey in geoIpCache:
644
+ return geoIpCache[geoIpCacheKey]
645
+
646
+ try:
647
+ geoIpResponse = geoIpReader.city(ip)
648
+ if srcOrDst == "src":
649
+ geoIpResult = {
650
+ "Country": geoIpResponse.country.name,
651
+ "loc.src.country": geoIpResponse.country.name,
652
+ "City": geoIpResponse.city.name,
653
+ "loc.src.city": geoIpResponse.city.name,
654
+ "Postal Code": geoIpResponse.postal.code, # type: ignore
655
+ "loc.src.postal": geoIpResponse.postal.code, # type: ignore
656
+ "Time Zone": geoIpResponse.location.time_zone, # type: ignore
657
+ "loc.src.tz": geoIpResponse.location.time_zone, # type: ignore
658
+ "loc.src.timezone": geoIpResponse.location.time_zone, # type: ignore
659
+ }
660
+ else: # srcOrDst == "dst"
661
+ geoIpResult = {
662
+ "Country": geoIpResponse.country.name,
663
+ "loc.dst.country": geoIpResponse.country.name,
664
+ "City": geoIpResponse.city.name,
665
+ "loc.dst.city": geoIpResponse.city.name,
666
+ "Postal Code": geoIpResponse.postal.code, # type: ignore
667
+ "loc.dst.postal": geoIpResponse.postal.code, # type: ignore
668
+ "Time Zone": geoIpResponse.location.time_zone, # type: ignore
669
+ "loc.dst.tz": geoIpResponse.location.time_zone, # type: ignore
670
+ "loc.dst.timezone": geoIpResponse.location.time_zone, # type: ignore
671
+ }
672
+ except geoip2.errors.AddressNotFoundError: # type: ignore
673
+ geoIpResult = {"Location": "Localnet"}
674
+ except Exception as e:
675
+ geoIpResult = {"Location": "Error: " + str(e)}
676
+
677
+ # Store in cache so subsequent calls for this IP are instant
678
+ with geoIpCacheLock:
679
+ geoIpCache[geoIpCacheKey] = geoIpResult
680
+ return geoIpResult
681
+
682
+
683
+ def getTcpStreamKey(srcIp, srcPort, dstIp, dstPort):
684
+ """
685
+ Return a direction-agnostic key for a TCP stream.
686
+ """
687
+ endpointA = (str(srcIp), int(srcPort))
688
+ endpointB = (str(dstIp), int(dstPort))
689
+ return tuple(sorted((endpointA, endpointB)))
690
+
691
+
692
+ def buildTcpStreamInitialDstPortMap(packetList):
693
+ """
694
+ Build a map of TCP stream key -> destination port from the stream's first packet
695
+ in capture order.
696
+ """
697
+ streamMap = {}
698
+ for p in packetList:
699
+ if not (p.haslayer("IP") and p.haslayer("TCP")):
700
+ continue
701
+ streamKey = getTcpStreamKey(p["IP"].src, p["TCP"].sport, p["IP"].dst, p["TCP"].dport)
702
+ if streamKey not in streamMap:
703
+ streamMap[streamKey] = p["TCP"].dport
704
+ return streamMap
705
+
706
+
707
+ def getDatatypes(data, dstPort, sourceIp, destIp, timeout, protocol="tcp"):
708
+ """
709
+ Analyze data to determine MIME type, decompress if possible, and extract traits.
710
+ Returns a dictionary with MIME type, decompression info, data types, and traits.
711
+ The protocol parameter ("tcp" or "udp") is forwarded to getTraits for accurate
712
+ port-description lookups.
713
+ """
714
+ mimeType = magic.from_buffer(data, mime=True)
715
+ lineDescs = []
716
+ decompData = ""
717
+ decomprInfo = {"Decompressed": False}
718
+ for ln in data.splitlines():
719
+ lineDescs.append(magic.from_buffer(ln))
720
+ decompData = safeDecompress(ln)
721
+ if decompData and len(decompData) > 0:
722
+ decomprInfo = {
723
+ "Decompressed data": {
724
+ "Decompressed Hex Encoded": decompData.hex(),
725
+ "payload.decompressed.hex": decompData.hex(),
726
+ "Decompressed ASCII Encoded": decompData.decode(errors="ignore"),
727
+ "payload.decompressed.ascii": decompData.decode(errors="ignore"),
728
+ },
729
+ }
730
+ uniqueDescs = list(set(lineDescs))
731
+ if "empty" in uniqueDescs:
732
+ uniqueDescs.remove("empty")
733
+ if "data" in uniqueDescs:
734
+ uniqueDescs.remove("data")
735
+ if uniqueDescs == []:
736
+ uniqueDescs = ["Unknown data type"]
737
+ traitData = getTraits(data, dstPort, sourceIp, destIp, timeout, protocol)
738
+ dataTypeResult = {
739
+ "MIME Type": mimeType,
740
+ "payload.mime": mimeType,
741
+ "Decompressed": decomprInfo,
742
+ "payload.decompressed": decomprInfo,
743
+ "Data Types": uniqueDescs,
744
+ "Traits": traitData,
745
+ }
746
+ return dataTypeResult
747
+
748
+
749
+ @lru_cache(maxsize=1024)
750
+ def getServ(port, protocol="tcp"):
751
+ """
752
+ Return the service name for a given port and protocol using the system's services database.
753
+ Cached with LRU to avoid repeated system calls for the same port/protocol.
754
+ """
755
+
756
+ try:
757
+ serviceName = socket.getservbyport(port, protocol)
758
+ return serviceName
759
+ except Exception:
760
+ return "Unknown"
761
+
762
+
763
+ def getTraits(data, dstPort, sourceIp, destIp, timeout, protocol="tcp"):
764
+ """
765
+ Analyze data for entropy, charsetType, encoding, and network/server traits.
766
+ Returns a dictionary with entropy, network data, length, server info, and character info.
767
+ The protocol parameter ("tcp" or "udp") is used for port-description lookups so that
768
+ UDP service names and descriptions are resolved correctly.
769
+ """
770
+
771
+ byteCounts = np.bincount(list(data))
772
+ shannonEntropy = entropy(byteCounts, base=2)
773
+ dataLength = len(data)
774
+ protoName = getServ(dstPort, protocol)
775
+ charsetType = "ascii" if all(32 <= b <= 126 for b in data) else "binary"
776
+ uniqueCharCount = len(set(data))
777
+ uniqueCharsSet = set(data)
778
+ if activeRecon:
779
+ dnsHostnames = reverseDnsLookup(destIp)
780
+ else:
781
+ dnsHostnames = {
782
+ "Resolved": False,
783
+ "Error": "Active recon not performed",
784
+ "Hostnames": [],
785
+ }
786
+ if activeRecon and dnsHostnames.get("Hostnames") is not None:
787
+ banner = getServBanner(
788
+ destIp,
789
+ dstPort,
790
+ timeout,
791
+ dnsHostnames.get("Hostnames")[0]
792
+ if dnsHostnames.get("Resolved")
793
+ else destIp, # ignore subscript warning, it checks for resolution first
794
+ protoName,
795
+ )
796
+ else:
797
+ banner = "Active recon not performed"
798
+ encoding = chardet.detect(data)
799
+ srcGeoInfo = getGeoipInfo(sourceIp, "src")
800
+ dstGeoInfo = getGeoipInfo(destIp, "dst")
801
+ srcNetClass = getNetclass(sourceIp)
802
+ dstNetClass = getNetclass(destIp)
803
+ portDesc = getPortDescription(dstPort, protocol)
804
+ return {
805
+ "Shannon Entropy": shannonEntropy,
806
+ "payload.entropy": shannonEntropy,
807
+ "Network Data": {
808
+ "Source IP": {
809
+ "Class": srcNetClass,
810
+ "ip.src.class": srcNetClass,
811
+ "Location": srcGeoInfo,
812
+ "ip.src.location": srcGeoInfo,
813
+ },
814
+ "Destination IP": {
815
+ "Class": dstNetClass,
816
+ "ip.dst.class": dstNetClass,
817
+ "Location": dstGeoInfo,
818
+ "ip.dst.location": dstGeoInfo,
819
+ },
820
+ "Port Protcol": protoName,
821
+ "tcp.proto": protoName,
822
+ "Port Description": portDesc,
823
+ "tcp.desc": portDesc,
824
+ "Hostnames": dnsHostnames,
825
+ "dns.hostnames": dnsHostnames,
826
+ },
827
+ "Length": dataLength,
828
+ "Server Info": banner,
829
+ "host.banner": banner,
830
+ "Characters": {
831
+ "Charset": charsetType,
832
+ "payload.charset": charsetType,
833
+ "Encoding": encoding
834
+ if shannonEntropy <= 4.85
835
+ else "Unavailable for high entropy data",
836
+ "payload.encoding": encoding
837
+ if shannonEntropy <= 4.85
838
+ else "Unavailable for high entropy data",
839
+ "Characters used": uniqueCharCount,
840
+ "payload.chars.used": uniqueCharCount,
841
+ "Unique characters": bytearray(list(uniqueCharsSet)).hex(),
842
+ },
843
+ }
844
+
845
+
846
+ def macAddrToVendor(macAddr):
847
+ """
848
+ Return the vendor name for a MAC address.
849
+ Uses macVendorMap dict loaded once at startup for O(1) macPrefix lookup.
850
+ MAC prefixes are stored as the first 8 characters of the normalised address (e.g. "00:1A:2B").
851
+ """
852
+ macPrefix = macAddr[:8].upper()
853
+ return macVendorMap.get(macPrefix, "Unknown Vendor")
854
+
855
+
856
+ def decodeSNMP(p):
857
+ """
858
+ Decode SNMP layer fields from a scapy packet.
859
+ Returns a dict with both display-friendly keys (e.g., 'Version') and
860
+ dot-notation keys (e.g., 'snmp.version') for version, community, and PDU type,
861
+ or None if the packet does not contain an SNMP layer or decoding fails.
862
+ """
863
+ if not p.haslayer("SNMP"):
864
+ return None
865
+ snmpLayer = p["SNMP"]
866
+ try:
867
+ version = int(snmpLayer.version)
868
+ versionMap = {0: "SNMPv1", 1: "SNMPv2c", 3: "SNMPv3"}
869
+ versionStr = versionMap.get(version, f"Unknown({version})")
870
+ community = ""
871
+ if hasattr(snmpLayer, "community") and snmpLayer.community is not None:
872
+ community = (
873
+ snmpLayer.community.decode(errors="ignore")
874
+ if isinstance(snmpLayer.community, bytes)
875
+ else str(snmpLayer.community)
876
+ )
877
+ pduType = "Unknown"
878
+ if hasattr(snmpLayer, "PDU") and snmpLayer.PDU is not None:
879
+ pduType = snmpLayer.PDU.__class__.__name__
880
+ return {
881
+ "Version": versionStr,
882
+ "snmp.version": versionStr,
883
+ "Community": community,
884
+ "snmp.community": community,
885
+ "PDU Type": pduType,
886
+ "snmp.pdu_type": pduType,
887
+ }
888
+ except Exception:
889
+ return None
890
+
891
+
892
+ def decodeDHCP(p):
893
+ """
894
+ Decode DHCP/BOOTP layer fields from a scapy packet.
895
+ Returns a dict with both display-friendly keys and dot-notation keys for message
896
+ type, transaction ID, and IP fields (Client IP, Your IP, Server IP), or None if
897
+ the packet does not contain a DHCP layer or decoding fails.
898
+ """
899
+ if not p.haslayer("DHCP"):
900
+ return None
901
+ dhcpLayer = p["DHCP"]
902
+ bootpLayer = p["BOOTP"] if p.haslayer("BOOTP") else None
903
+ try:
904
+ msgType = "Unknown"
905
+ msgTypeMap = {
906
+ 1: "Discover",
907
+ 2: "Offer",
908
+ 3: "Request",
909
+ 4: "Decline",
910
+ 5: "ACK",
911
+ 6: "NAK",
912
+ 7: "Release",
913
+ 8: "Inform",
914
+ }
915
+ for opt in dhcpLayer.options:
916
+ if isinstance(opt, tuple) and opt[0] == "message-type" and len(opt) > 1:
917
+ msgType = msgTypeMap.get(opt[1], str(opt[1]))
918
+ break
919
+ result = {
920
+ "Message Type": msgType,
921
+ "dhcp.msg_type": msgType,
922
+ }
923
+ if bootpLayer:
924
+ try:
925
+ xid = hex(int(bootpLayer.xid)) if hasattr(bootpLayer, "xid") else "N/A"
926
+ except (TypeError, ValueError):
927
+ xid = "N/A"
928
+ ciaddr = str(bootpLayer.ciaddr) if hasattr(bootpLayer, "ciaddr") else "N/A"
929
+ yiaddr = str(bootpLayer.yiaddr) if hasattr(bootpLayer, "yiaddr") else "N/A"
930
+ siaddr = str(bootpLayer.siaddr) if hasattr(bootpLayer, "siaddr") else "N/A"
931
+ result["Transaction ID"] = xid
932
+ result["dhcp.xid"] = xid
933
+ result["Client IP"] = ciaddr
934
+ result["dhcp.ciaddr"] = ciaddr
935
+ result["Your IP"] = yiaddr
936
+ result["dhcp.yiaddr"] = yiaddr
937
+ result["Server IP"] = siaddr
938
+ result["dhcp.siaddr"] = siaddr
939
+ return result
940
+ except Exception:
941
+ return None
942
+
943
+
944
+ def decodeNTP(p):
945
+ """
946
+ Decode NTP layer fields from a scapy packet.
947
+ Returns a dict with both display-friendly keys and dot-notation keys for leap
948
+ indicator, version, mode, stratum, and reference ID, or None if the packet does
949
+ not contain an NTP layer or decoding fails.
950
+ """
951
+ if not p.haslayer("NTP"):
952
+ return None
953
+ ntpLayer = p["NTP"]
954
+ modeMap = {
955
+ 0: "Reserved",
956
+ 1: "Symmetric Active",
957
+ 2: "Symmetric Passive",
958
+ 3: "Client",
959
+ 4: "Server",
960
+ 5: "Broadcast",
961
+ 6: "NTP Control",
962
+ 7: "Private",
963
+ }
964
+ try:
965
+ leap = int(ntpLayer.leap) if hasattr(ntpLayer, "leap") else 0
966
+ version = int(ntpLayer.version) if hasattr(ntpLayer, "version") else 0
967
+ mode = int(ntpLayer.mode) if hasattr(ntpLayer, "mode") else 0
968
+ stratum = int(ntpLayer.stratum) if hasattr(ntpLayer, "stratum") else 0
969
+ modeStr = modeMap.get(mode, f"Unknown({mode})")
970
+ refId = str(ntpLayer.id) if hasattr(ntpLayer, "id") else "N/A"
971
+ return {
972
+ "Leap Indicator": leap,
973
+ "ntp.leap": leap,
974
+ "Version": version,
975
+ "ntp.version": version,
976
+ "Mode": modeStr,
977
+ "ntp.mode": modeStr,
978
+ "Stratum": stratum,
979
+ "ntp.stratum": stratum,
980
+ "Reference ID": refId,
981
+ "ntp.ref_id": refId,
982
+ }
983
+ except Exception:
984
+ return None
985
+
986
+
987
+ def decodeSIP(rawPayload):
988
+ """
989
+ Decode SIP message fields from raw payload bytes.
990
+ Parses the first line and common headers (From, To, Call-ID).
991
+ Returns a dict with both display-friendly keys and dot-notation keys for message
992
+ type, method/status, and headers, or None if the payload is not a SIP message or
993
+ decoding fails.
994
+ """
995
+ sipMethods = {
996
+ "INVITE",
997
+ "ACK",
998
+ "BYE",
999
+ "CANCEL",
1000
+ "REGISTER",
1001
+ "OPTIONS",
1002
+ "SUBSCRIBE",
1003
+ "NOTIFY",
1004
+ "REFER",
1005
+ "INFO",
1006
+ "UPDATE",
1007
+ "PRACK",
1008
+ }
1009
+ try:
1010
+ text = rawPayload.decode(errors="ignore")
1011
+ lines = text.split("\r\n") if "\r\n" in text else text.split("\n")
1012
+ if not lines:
1013
+ return None
1014
+ firstLine = lines[0].strip()
1015
+ isSipResponse = firstLine.startswith("SIP/")
1016
+ isSipRequest = (
1017
+ firstLine.split(" ")[0] in sipMethods if " " in firstLine else False
1018
+ )
1019
+ if not isSipResponse and not isSipRequest:
1020
+ return None
1021
+ headers = {}
1022
+ for line in lines[1:]:
1023
+ if ": " in line:
1024
+ key, _, val = line.partition(": ")
1025
+ headers[key.strip()] = val.strip()
1026
+ if isSipRequest:
1027
+ parts = firstLine.split(" ", 2)
1028
+ method = parts[0]
1029
+ requestUri = parts[1] if len(parts) > 1 else "Unknown"
1030
+ return {
1031
+ "Type": "Request",
1032
+ "sip.type": "Request",
1033
+ "Method": method,
1034
+ "sip.method": method,
1035
+ "Request URI": requestUri,
1036
+ "sip.uri": requestUri,
1037
+ "From": headers.get("From", "Unknown"),
1038
+ "sip.from": headers.get("From", "Unknown"),
1039
+ "To": headers.get("To", "Unknown"),
1040
+ "sip.to": headers.get("To", "Unknown"),
1041
+ "Call-ID": headers.get("Call-ID", "Unknown"),
1042
+ "sip.call_id": headers.get("Call-ID", "Unknown"),
1043
+ }
1044
+ else:
1045
+ parts = firstLine.split(" ", 2)
1046
+ statusCode = parts[1] if len(parts) > 1 else "Unknown"
1047
+ statusMsg = parts[2] if len(parts) > 2 else "Unknown"
1048
+ return {
1049
+ "Type": "Response",
1050
+ "sip.type": "Response",
1051
+ "Status Code": statusCode,
1052
+ "sip.status_code": statusCode,
1053
+ "Status Message": statusMsg,
1054
+ "sip.status_msg": statusMsg,
1055
+ "From": headers.get("From", "Unknown"),
1056
+ "sip.from": headers.get("From", "Unknown"),
1057
+ "To": headers.get("To", "Unknown"),
1058
+ "sip.to": headers.get("To", "Unknown"),
1059
+ "Call-ID": headers.get("Call-ID", "Unknown"),
1060
+ "sip.call_id": headers.get("Call-ID", "Unknown"),
1061
+ }
1062
+ except Exception:
1063
+ return None
1064
+
1065
+
1066
+ def decodeHTTP(rawPayload):
1067
+ """
1068
+ Decode an HTTP request or response from raw payload bytes.
1069
+ Handles both HTTP/1.x requests and responses. Returns a dict with
1070
+ both display-friendly keys (e.g., 'Method') and dot-notation keys
1071
+ (e.g., 'http.method') for use by the frontend, or None if the payload
1072
+ does not look like an HTTP message.
1073
+
1074
+ For requests the following fields are extracted:
1075
+ Method, URL, HTTP Version, Host, User-Agent, Content-Type,
1076
+ Content-Length, Referer, Accept, Accept-Encoding, Connection.
1077
+ For responses the following fields are extracted:
1078
+ HTTP Version, Status Code, Status Message, Content-Type,
1079
+ Content-Length, Server, Content-Encoding, Transfer-Encoding,
1080
+ Connection, Location (for redirects).
1081
+ """
1082
+ try:
1083
+ text = rawPayload.decode(errors="ignore")
1084
+ # Normalise line endings so both CRLF and bare-LF messages are handled uniformly
1085
+ normalised = text.replace("\r\n", "\n")
1086
+ headerSection = normalised.split("\n\n")[0]
1087
+ lines = headerSection.split("\n")
1088
+ if not lines:
1089
+ return None
1090
+ firstLine = lines[0].strip()
1091
+ isHttpResponse = firstLine.startswith("HTTP/")
1092
+ isHttpRequest = (
1093
+ firstLine.split(" ")[0] in HTTP_METHODS if " " in firstLine else False
1094
+ )
1095
+ if not isHttpResponse and not isHttpRequest:
1096
+ return None
1097
+
1098
+ # Parse headers into a dict (lowercase keys for case-insensitive lookup)
1099
+ headers = {}
1100
+ for line in lines[1:]:
1101
+ if ": " in line:
1102
+ key, _, val = line.partition(": ")
1103
+ headers[key.strip().lower()] = val.strip()
1104
+
1105
+ if isHttpRequest:
1106
+ parts = firstLine.split(" ", 2)
1107
+ method = parts[0]
1108
+ url = parts[1] if len(parts) > 1 else "Unknown"
1109
+ httpVersion = parts[2] if len(parts) > 2 else "Unknown"
1110
+ result = {
1111
+ "Type": "Request",
1112
+ "http.type": "Request",
1113
+ "Method": method,
1114
+ "http.method": method,
1115
+ "URL": url,
1116
+ "http.url": url,
1117
+ "HTTP Version": httpVersion,
1118
+ "http.version": httpVersion,
1119
+ "Host": headers.get("host", "Unknown"),
1120
+ "http.host": headers.get("host", "Unknown"),
1121
+ "User-Agent": headers.get("user-agent", "Unknown"),
1122
+ "http.user_agent": headers.get("user-agent", "Unknown"),
1123
+ "Content-Type": headers.get("content-type", "Unknown"),
1124
+ "http.content_type": headers.get("content-type", "Unknown"),
1125
+ "Content-Length": headers.get("content-length", "Unknown"),
1126
+ "http.content_length": headers.get("content-length", "Unknown"),
1127
+ "Referer": headers.get("referer", "Unknown"),
1128
+ "http.referer": headers.get("referer", "Unknown"),
1129
+ "Accept": headers.get("accept", "Unknown"),
1130
+ "http.accept": headers.get("accept", "Unknown"),
1131
+ "Accept-Encoding": headers.get("accept-encoding", "Unknown"),
1132
+ "http.accept_encoding": headers.get("accept-encoding", "Unknown"),
1133
+ "Connection": headers.get("connection", "Unknown"),
1134
+ "http.connection": headers.get("connection", "Unknown"),
1135
+ }
1136
+ # --- Credential extraction ----------------------------------------
1137
+ # Extract credential fields from query-string (GET, POST, any method)
1138
+ creds = {}
1139
+ if "?" in url:
1140
+ queryStr = url.split("?", 1)[1].split("#")[0]
1141
+ creds.update(_extractUrlCredentials(queryStr))
1142
+ # Also check Authorization header (Basic auth decoded by the frontend,
1143
+ # but include the raw value so the frontend decoder can handle it too)
1144
+ authHeader = headers.get("authorization", "")
1145
+ if authHeader:
1146
+ creds["authorization"] = authHeader
1147
+ # Extract Cookie header — session tokens and auth cookies are sensitive
1148
+ cookieHeader = headers.get("cookie", "")
1149
+ if cookieHeader:
1150
+ creds.update(_extractCookieCredentials(cookieHeader))
1151
+ # For request bodies (POST/PUT/PATCH) scan for credential fields
1152
+ contentType = headers.get("content-type", "")
1153
+ if method in ("POST", "PUT", "PATCH"):
1154
+ bodyStart = normalised.find("\n\n")
1155
+ if bodyStart != -1:
1156
+ body = normalised[bodyStart + 2 :]
1157
+ if body.strip():
1158
+ if "urlencoded" in contentType.lower():
1159
+ creds.update(_extractUrlCredentials(body))
1160
+ else:
1161
+ # JSON, multipart, plain-text, XML — regex scan
1162
+ creds.update(_extractPostBodyCredentials(body, contentType))
1163
+ if creds:
1164
+ result["Credentials"] = creds
1165
+ return result
1166
+ else:
1167
+ parts = firstLine.split(" ", 2)
1168
+ httpVersion = parts[0]
1169
+ statusCode = parts[1] if len(parts) > 1 else "Unknown"
1170
+ statusMessage = parts[2] if len(parts) > 2 else "Unknown"
1171
+ responseResult = {
1172
+ "Type": "Response",
1173
+ "http.type": "Response",
1174
+ "HTTP Version": httpVersion,
1175
+ "http.version": httpVersion,
1176
+ "Status Code": statusCode,
1177
+ "http.status_code": statusCode,
1178
+ "Status Message": statusMessage,
1179
+ "http.status_msg": statusMessage,
1180
+ "Content-Type": headers.get("content-type", "Unknown"),
1181
+ "http.content_type": headers.get("content-type", "Unknown"),
1182
+ "Content-Length": headers.get("content-length", "Unknown"),
1183
+ "http.content_length": headers.get("content-length", "Unknown"),
1184
+ "Server": headers.get("server", "Unknown"),
1185
+ "http.server": headers.get("server", "Unknown"),
1186
+ "Content-Encoding": headers.get("content-encoding", "Unknown"),
1187
+ "http.content_encoding": headers.get("content-encoding", "Unknown"),
1188
+ "Transfer-Encoding": headers.get("transfer-encoding", "Unknown"),
1189
+ "http.transfer_encoding": headers.get("transfer-encoding", "Unknown"),
1190
+ "Connection": headers.get("connection", "Unknown"),
1191
+ "http.connection": headers.get("connection", "Unknown"),
1192
+ "Location": headers.get("location", "Unknown"),
1193
+ "http.location": headers.get("location", "Unknown"),
1194
+ }
1195
+ setCookieVal = headers.get("set-cookie", "")
1196
+ if setCookieVal:
1197
+ responseCreds = _extractSetCookieCredentials(setCookieVal)
1198
+ if responseCreds:
1199
+ responseResult["Credentials"] = responseCreds
1200
+ return responseResult
1201
+ except Exception:
1202
+ return None
1203
+
1204
+
1205
+ def decodeFTP(rawPayload):
1206
+ """
1207
+ Decode FTP commands and responses from raw payload bytes.
1208
+ Returns a dict with Type (Command/Response), command/status, and argument/message,
1209
+ or None if the payload is not recognisable as FTP traffic.
1210
+ """
1211
+ FTP_COMMANDS = {
1212
+ "USER",
1213
+ "PASS",
1214
+ "ACCT",
1215
+ "CWD",
1216
+ "CDUP",
1217
+ "SMNT",
1218
+ "QUIT",
1219
+ "REIN",
1220
+ "PORT",
1221
+ "PASV",
1222
+ "TYPE",
1223
+ "STRU",
1224
+ "MODE",
1225
+ "RETR",
1226
+ "STOR",
1227
+ "STOU",
1228
+ "APPE",
1229
+ "ALLO",
1230
+ "REST",
1231
+ "RNFR",
1232
+ "RNTO",
1233
+ "ABOR",
1234
+ "DELE",
1235
+ "RMD",
1236
+ "MKD",
1237
+ "PWD",
1238
+ "LIST",
1239
+ "NLST",
1240
+ "SITE",
1241
+ "SYST",
1242
+ "STAT",
1243
+ "HELP",
1244
+ "NOOP",
1245
+ "FEAT",
1246
+ "OPTS",
1247
+ "MLST",
1248
+ "MLSD",
1249
+ "SIZE",
1250
+ "MDTM",
1251
+ "EPRT",
1252
+ "EPSV",
1253
+ "AUTH",
1254
+ "PBSZ",
1255
+ "PROT",
1256
+ }
1257
+ try:
1258
+ text = rawPayload.decode(errors="ignore")
1259
+ lines = text.replace("\r\n", "\n").split("\n")
1260
+ firstLine = lines[0].strip()
1261
+ if not firstLine:
1262
+ return None
1263
+ parts = firstLine.split(" ", 1)
1264
+ word = parts[0].upper()
1265
+ if word in FTP_COMMANDS:
1266
+ arg = parts[1].strip() if len(parts) > 1 else ""
1267
+ if word == "PASS":
1268
+ arg = "***"
1269
+ return {
1270
+ "Type": "Command",
1271
+ "ftp.type": "Command",
1272
+ "Command": word,
1273
+ "ftp.command": word,
1274
+ "Argument": arg,
1275
+ "ftp.argument": arg,
1276
+ }
1277
+ if len(word) == 3 and word.isdigit():
1278
+ statusCode = word
1279
+ message = parts[1].strip() if len(parts) > 1 else ""
1280
+ return {
1281
+ "Type": "Response",
1282
+ "ftp.type": "Response",
1283
+ "Status Code": statusCode,
1284
+ "ftp.status_code": statusCode,
1285
+ "Message": message,
1286
+ "ftp.message": message,
1287
+ }
1288
+ return None
1289
+ except Exception:
1290
+ return None
1291
+
1292
+
1293
+ def decodeSMTP(rawPayload):
1294
+ """
1295
+ Decode SMTP commands and responses from raw payload bytes.
1296
+ Returns a dict with Type (Command/Response), command/status code, and arguments/message,
1297
+ or None if the payload is not recognisable as SMTP traffic.
1298
+ """
1299
+ SMTP_COMMANDS = {
1300
+ "EHLO",
1301
+ "HELO",
1302
+ "MAIL",
1303
+ "RCPT",
1304
+ "DATA",
1305
+ "RSET",
1306
+ "VRFY",
1307
+ "EXPN",
1308
+ "HELP",
1309
+ "NOOP",
1310
+ "QUIT",
1311
+ "AUTH",
1312
+ "STARTTLS",
1313
+ "BDAT",
1314
+ }
1315
+ try:
1316
+ text = rawPayload.decode(errors="ignore")
1317
+ lines = text.replace("\r\n", "\n").split("\n")
1318
+ firstLine = lines[0].strip()
1319
+ if not firstLine:
1320
+ return None
1321
+ parts = firstLine.split(" ", 1)
1322
+ word = parts[0].upper()
1323
+ if word in SMTP_COMMANDS:
1324
+ arg = parts[1].strip() if len(parts) > 1 else ""
1325
+ result = {
1326
+ "Type": "Command",
1327
+ "smtp.type": "Command",
1328
+ "Command": word,
1329
+ "smtp.command": word,
1330
+ "Argument": arg,
1331
+ "smtp.argument": arg,
1332
+ }
1333
+ # --- Credential extraction for AUTH commands ----------------------
1334
+ # AUTH PLAIN <base64> → decode "\0username\0password"
1335
+ # AUTH LOGIN → subsequent lines are base64 user then pass
1336
+ if word == "AUTH":
1337
+ argParts = arg.split()
1338
+ mechanism = argParts[0].upper() if argParts else ""
1339
+ creds = {}
1340
+ if mechanism == "PLAIN" and len(argParts) > 1:
1341
+ try:
1342
+ decoded = base64.b64decode(argParts[1]).decode(errors="replace")
1343
+ segments = decoded.split("\x00")
1344
+ segments = [s for s in segments if s]
1345
+ if len(segments) >= 2:
1346
+ creds["username"] = segments[0]
1347
+ creds["password"] = segments[1]
1348
+ elif len(segments) == 1:
1349
+ creds["username"] = segments[0]
1350
+ except Exception:
1351
+ pass
1352
+ elif mechanism == "LOGIN":
1353
+ # Subsequent packets carry the base64-encoded username and
1354
+ # password separately; capture what we have in this packet.
1355
+ if len(argParts) > 1:
1356
+ try:
1357
+ creds["username"] = base64.b64decode(
1358
+ argParts[1]
1359
+ ).decode(errors="replace")
1360
+ except Exception:
1361
+ pass
1362
+ # Scan remaining lines in the same payload for the password
1363
+ for extraLine in lines[1:]:
1364
+ extraLine = extraLine.strip()
1365
+ if extraLine:
1366
+ try:
1367
+ creds["password"] = base64.b64decode(
1368
+ extraLine
1369
+ ).decode(errors="replace")
1370
+ except Exception:
1371
+ pass
1372
+ break
1373
+ # Mask the argument in the display field only when inline credential
1374
+ # data was present (so "AUTH LOGIN" without inline data stays readable)
1375
+ if len(argParts) > 1:
1376
+ result["Argument"] = mechanism + " ***"
1377
+ result["smtp.argument"] = mechanism + " ***"
1378
+ if creds:
1379
+ result["Credentials"] = creds
1380
+ return result
1381
+ if len(word) == 3 and word.isdigit():
1382
+ statusCode = word
1383
+ message = parts[1].strip() if len(parts) > 1 else ""
1384
+ return {
1385
+ "Type": "Response",
1386
+ "smtp.type": "Response",
1387
+ "Status Code": statusCode,
1388
+ "smtp.status_code": statusCode,
1389
+ "Message": message,
1390
+ "smtp.message": message,
1391
+ }
1392
+ return None
1393
+ except Exception:
1394
+ return None
1395
+
1396
+
1397
+ def decodePOP3(rawPayload):
1398
+ """
1399
+ Decode POP3 commands and responses from raw payload bytes.
1400
+ Returns a dict with Type (Command/Response), command/status, and argument/message,
1401
+ or None if the payload is not recognisable as POP3 traffic.
1402
+ """
1403
+ POP3_COMMANDS = {
1404
+ "USER",
1405
+ "PASS",
1406
+ "APOP",
1407
+ "QUIT",
1408
+ "STAT",
1409
+ "LIST",
1410
+ "RETR",
1411
+ "DELE",
1412
+ "NOOP",
1413
+ "RSET",
1414
+ "TOP",
1415
+ "UIDL",
1416
+ "CAPA",
1417
+ "AUTH",
1418
+ "STLS",
1419
+ }
1420
+ try:
1421
+ text = rawPayload.decode(errors="ignore")
1422
+ lines = text.replace("\r\n", "\n").split("\n")
1423
+ firstLine = lines[0].strip()
1424
+ if not firstLine:
1425
+ return None
1426
+ parts = firstLine.split(" ", 1)
1427
+ word = parts[0].upper()
1428
+ if word in POP3_COMMANDS:
1429
+ arg = parts[1].strip() if len(parts) > 1 else ""
1430
+ result = {
1431
+ "Type": "Command",
1432
+ "pop3.type": "Command",
1433
+ "Command": word,
1434
+ "pop3.command": word,
1435
+ "Argument": arg,
1436
+ "pop3.argument": arg,
1437
+ }
1438
+ # Capture credentials; mask the display field for PASS
1439
+ if word == "USER" and arg:
1440
+ result["Credentials"] = {"username": arg}
1441
+ elif word == "PASS" and arg:
1442
+ result["Credentials"] = {"password": arg}
1443
+ result["Argument"] = "***"
1444
+ result["pop3.argument"] = "***"
1445
+ return result
1446
+ if word in ("+OK", "-ERR"):
1447
+ message = parts[1].strip() if len(parts) > 1 else ""
1448
+ return {
1449
+ "Type": "Response",
1450
+ "pop3.type": "Response",
1451
+ "Status": word,
1452
+ "pop3.status": word,
1453
+ "Message": message,
1454
+ "pop3.message": message,
1455
+ }
1456
+ return None
1457
+ except Exception:
1458
+ return None
1459
+
1460
+
1461
+ def decodeIMAP(rawPayload):
1462
+ """
1463
+ Decode IMAP commands and server responses from raw payload bytes.
1464
+ Returns a dict with Type (Command/Response/Untagged), tag, command/status, and argument,
1465
+ or None if the payload is not recognisable as IMAP traffic.
1466
+ """
1467
+ IMAP_COMMANDS = {
1468
+ "CAPABILITY",
1469
+ "NOOP",
1470
+ "LOGOUT",
1471
+ "AUTHENTICATE",
1472
+ "LOGIN",
1473
+ "SELECT",
1474
+ "EXAMINE",
1475
+ "CREATE",
1476
+ "DELETE",
1477
+ "RENAME",
1478
+ "SUBSCRIBE",
1479
+ "UNSUBSCRIBE",
1480
+ "LIST",
1481
+ "LSUB",
1482
+ "STATUS",
1483
+ "APPEND",
1484
+ "CHECK",
1485
+ "CLOSE",
1486
+ "EXPUNGE",
1487
+ "SEARCH",
1488
+ "FETCH",
1489
+ "STORE",
1490
+ "COPY",
1491
+ "UID",
1492
+ "IDLE",
1493
+ "NAMESPACE",
1494
+ "STARTTLS",
1495
+ "ENABLE",
1496
+ }
1497
+ try:
1498
+ text = rawPayload.decode(errors="ignore")
1499
+ lines = text.replace("\r\n", "\n").split("\n")
1500
+ firstLine = lines[0].strip()
1501
+ if not firstLine:
1502
+ return None
1503
+ if firstLine.startswith("* "):
1504
+ rest = firstLine[2:].strip()
1505
+ restParts = rest.split(" ", 1)
1506
+ status = restParts[0]
1507
+ info = restParts[1].strip() if len(restParts) > 1 else ""
1508
+ return {
1509
+ "Type": "Untagged",
1510
+ "imap.type": "Untagged",
1511
+ "Status": status,
1512
+ "imap.status": status,
1513
+ "Info": info,
1514
+ "imap.info": info,
1515
+ }
1516
+ parts = firstLine.split(" ", 2)
1517
+ if len(parts) >= 2:
1518
+ tag = parts[0]
1519
+ word = parts[1].upper()
1520
+ arg = parts[2].strip() if len(parts) > 2 else ""
1521
+ if word in IMAP_COMMANDS:
1522
+ result = {
1523
+ "Type": "Command",
1524
+ "imap.type": "Command",
1525
+ "Tag": tag,
1526
+ "imap.tag": tag,
1527
+ "Command": word,
1528
+ "imap.command": word,
1529
+ "Argument": arg,
1530
+ "imap.argument": arg,
1531
+ }
1532
+ # Extract LOGIN credentials and mask the password in the display field
1533
+ if word == "LOGIN" and arg:
1534
+ argParts = arg.split(" ", 1)
1535
+ username = argParts[0].strip('"')
1536
+ if len(argParts) > 1:
1537
+ password = argParts[1].strip('"')
1538
+ result["Credentials"] = {"username": username, "password": password}
1539
+ result["Argument"] = username + " ***"
1540
+ result["imap.argument"] = username + " ***"
1541
+ else:
1542
+ result["Credentials"] = {"username": username}
1543
+ return result
1544
+ if word in ("OK", "NO", "BAD", "PREAUTH", "BYE"):
1545
+ return {
1546
+ "Type": "Response",
1547
+ "imap.type": "Response",
1548
+ "Tag": tag,
1549
+ "imap.tag": tag,
1550
+ "Status": word,
1551
+ "imap.status": word,
1552
+ "Message": arg,
1553
+ "imap.message": arg,
1554
+ }
1555
+ return None
1556
+ except Exception:
1557
+ return None
1558
+
1559
+
1560
+ def decodeTelnet(rawPayload):
1561
+ """
1562
+ Decode Telnet IAC (Interpret As Command) negotiation bytes from raw payload.
1563
+ Returns a dict with negotiation options and any printable text found,
1564
+ or None if no Telnet IAC bytes are present.
1565
+ """
1566
+ IAC = 0xFF
1567
+ TELNET_COMMANDS = {
1568
+ 0xF0: "SE",
1569
+ 0xF1: "NOP",
1570
+ 0xF2: "Data Mark",
1571
+ 0xF3: "Break",
1572
+ 0xF4: "Interrupt Process",
1573
+ 0xF5: "Abort Output",
1574
+ 0xF6: "Are You There",
1575
+ 0xF7: "Erase Character",
1576
+ 0xF8: "Erase Line",
1577
+ 0xF9: "Go Ahead",
1578
+ 0xFA: "SB",
1579
+ 0xFB: "WILL",
1580
+ 0xFC: "WONT",
1581
+ 0xFD: "DO",
1582
+ 0xFE: "DONT",
1583
+ 0xFF: "IAC",
1584
+ }
1585
+ TELNET_OPTIONS = {
1586
+ 0: "Binary",
1587
+ 1: "Echo",
1588
+ 2: "Reconnection",
1589
+ 3: "Suppress GA",
1590
+ 5: "Status",
1591
+ 6: "Timing Mark",
1592
+ 24: "Terminal Type",
1593
+ 31: "Window Size",
1594
+ 32: "Terminal Speed",
1595
+ 33: "Remote Flow",
1596
+ 34: "Linemode",
1597
+ 36: "Environment",
1598
+ 39: "New Environment",
1599
+ }
1600
+ try:
1601
+ if IAC not in rawPayload:
1602
+ return None
1603
+ negotiations = []
1604
+ i = 0
1605
+ while i < len(rawPayload):
1606
+ if rawPayload[i] == IAC and i + 1 < len(rawPayload):
1607
+ cmd = rawPayload[i + 1]
1608
+ cmdName = TELNET_COMMANDS.get(cmd, f"0x{cmd:02X}")
1609
+ if cmd in (0xFB, 0xFC, 0xFD, 0xFE) and i + 2 < len(rawPayload):
1610
+ optByte = rawPayload[i + 2]
1611
+ optName = TELNET_OPTIONS.get(optByte, f"Option-{optByte}")
1612
+ negotiations.append(f"{cmdName} {optName}")
1613
+ i += 3
1614
+ else:
1615
+ negotiations.append(cmdName)
1616
+ i += 2
1617
+ else:
1618
+ i += 1
1619
+ printableText = "".join(chr(b) for b in rawPayload if 32 <= b <= 126).strip()
1620
+ result = {
1621
+ "Negotiations": negotiations,
1622
+ "telnet.negotiations": negotiations,
1623
+ "Printable Text": printableText[:200] if printableText else "",
1624
+ "telnet.text": printableText[:200] if printableText else "",
1625
+ }
1626
+ # Scan negotiation packets' printable text for any embedded credentials
1627
+ creds = _extractTelnetCredentialText(printableText)
1628
+ if creds:
1629
+ result["Credentials"] = creds
1630
+ return result
1631
+ except Exception:
1632
+ return None
1633
+
1634
+
1635
+ # Compiled patterns for Telnet credential extraction (reused across all calls)
1636
+ _TELNET_USER_RE = re.compile(
1637
+ r"(?:login|user(?:name)?)\s*:\s*(\S+)", re.IGNORECASE
1638
+ )
1639
+ _TELNET_PASS_RE = re.compile(
1640
+ r"(?:pass(?:w(?:or)?d?)?|pw)\s*:\s*(\S+)", re.IGNORECASE
1641
+ )
1642
+
1643
+
1644
+ def _extractTelnetCredentialText(text):
1645
+ """
1646
+ Scan a printable Telnet text snippet for login/password prompt-response patterns
1647
+ (e.g. ``login: alice`` or ``Password: s3cr3t``).
1648
+ Returns a dict of found credential fields, or an empty dict.
1649
+ """
1650
+ if not text:
1651
+ return {}
1652
+ creds = {}
1653
+ userMatch = _TELNET_USER_RE.search(text)
1654
+ passMatch = _TELNET_PASS_RE.search(text)
1655
+ if userMatch:
1656
+ creds["username"] = userMatch.group(1)
1657
+ if passMatch:
1658
+ creds["password"] = passMatch.group(1)
1659
+ return creds
1660
+
1661
+
1662
+ def extractTelnetCredentials(rawPayload):
1663
+ """
1664
+ Detect cleartext Telnet login credentials from raw TCP port-23 payloads that
1665
+ do NOT necessarily contain IAC negotiation bytes. This handles the data-transfer
1666
+ phase of a Telnet session where usernames and passwords are transmitted as plain
1667
+ ASCII lines (line-at-a-time mode) or labelled prompt/response pairs.
1668
+
1669
+ Returns a dict with any found credential fields, or an empty dict.
1670
+ """
1671
+ try:
1672
+ printableText = "".join(
1673
+ chr(b) for b in rawPayload if 32 <= b <= 126
1674
+ ).strip()
1675
+ if not printableText:
1676
+ return {}
1677
+ # Check for labelled prompt-response patterns (server echo or combined packet)
1678
+ creds = _extractTelnetCredentialText(printableText)
1679
+ return creds
1680
+ except Exception:
1681
+ return {}
1682
+
1683
+
1684
+ def decodeIRC(rawPayload):
1685
+ """
1686
+ Decode IRC protocol messages from raw payload bytes.
1687
+ Parses prefix, command, and parameters per RFC 1459.
1688
+ Returns a dict with the IRC command and parameters, or None if not recognisable.
1689
+ """
1690
+ IRC_COMMANDS = {
1691
+ "NICK",
1692
+ "USER",
1693
+ "JOIN",
1694
+ "PART",
1695
+ "PRIVMSG",
1696
+ "NOTICE",
1697
+ "QUIT",
1698
+ "PING",
1699
+ "PONG",
1700
+ "MODE",
1701
+ "TOPIC",
1702
+ "NAMES",
1703
+ "LIST",
1704
+ "INVITE",
1705
+ "KICK",
1706
+ "WHOIS",
1707
+ "WHO",
1708
+ "WHOWAS",
1709
+ "MOTD",
1710
+ "LUSERS",
1711
+ "VERSION",
1712
+ "STATS",
1713
+ "LINKS",
1714
+ "TIME",
1715
+ "CONNECT",
1716
+ "TRACE",
1717
+ "ADMIN",
1718
+ "INFO",
1719
+ "SERVLIST",
1720
+ "SQUERY",
1721
+ "KILL",
1722
+ "PASS",
1723
+ "OPER",
1724
+ "REHASH",
1725
+ "DIE",
1726
+ "RESTART",
1727
+ "AWAY",
1728
+ "USERHOST",
1729
+ "ISON",
1730
+ "CAP",
1731
+ "AUTHENTICATE",
1732
+ }
1733
+ try:
1734
+ text = rawPayload.decode(errors="ignore")
1735
+ messages = []
1736
+ for line in text.replace("\r\n", "\n").split("\n"):
1737
+ line = line.strip()
1738
+ if not line:
1739
+ continue
1740
+ prefix = ""
1741
+ if line.startswith(":"):
1742
+ pparts = line.split(" ", 1)
1743
+ prefix = pparts[0][1:]
1744
+ line = pparts[1] if len(pparts) > 1 else ""
1745
+ parts = line.split(" ", 1)
1746
+ command = parts[0].upper()
1747
+ params = parts[1] if len(parts) > 1 else ""
1748
+ if command in IRC_COMMANDS or (len(command) == 3 and command.isdigit()):
1749
+ messages.append(
1750
+ {"Prefix": prefix, "Command": command, "Parameters": params}
1751
+ )
1752
+ if not messages:
1753
+ return None
1754
+ first = messages[0]
1755
+ return {
1756
+ "Command": first["Command"],
1757
+ "irc.command": first["Command"],
1758
+ "Prefix": first["Prefix"],
1759
+ "irc.prefix": first["Prefix"],
1760
+ "Parameters": first["Parameters"],
1761
+ "irc.params": first["Parameters"],
1762
+ "Message Count": len(messages),
1763
+ "irc.msg_count": len(messages),
1764
+ }
1765
+ except Exception:
1766
+ return None
1767
+
1768
+
1769
+ def decodeMTP(rawPayload):
1770
+ """
1771
+ Decode MTP/MMS (Microsoft Media Services over TCP, port 1755) packets.
1772
+ Checks for the MMS command identifier prefix (0x00000001 little-endian).
1773
+ Returns basic MTP/MMS info dict or None if not recognisable.
1774
+ """
1775
+ import struct
1776
+
1777
+ MMS_COMMANDS = {
1778
+ 0x00030001: "CONNECT_REQUEST",
1779
+ 0x00030002: "CONNECT_RESPONSE",
1780
+ 0x00030003: "TRANSPORT_INFO_REQUEST",
1781
+ 0x00030004: "TRANSPORT_INFO_RESPONSE",
1782
+ 0x00030005: "MEDIA_DETAILS_REQUEST",
1783
+ 0x00030006: "PLAY_REQUEST",
1784
+ 0x00030007: "STOP",
1785
+ 0x00030009: "STREAM_STOPPED",
1786
+ 0x0004001B: "HEADER",
1787
+ 0x0004001A: "DATA",
1788
+ }
1789
+ try:
1790
+ if len(rawPayload) < 12:
1791
+ return None
1792
+ prefix = struct.unpack_from("<I", rawPayload, 0)[0]
1793
+ if prefix != 0x00000001:
1794
+ return None
1795
+ length = struct.unpack_from("<I", rawPayload, 4)[0]
1796
+ cmdId = struct.unpack_from("<I", rawPayload, 8)[0]
1797
+ cmdName = MMS_COMMANDS.get(cmdId, f"0x{cmdId:08X}")
1798
+ return {
1799
+ "Protocol": "MMS/MTP",
1800
+ "mtp.protocol": "MMS/MTP",
1801
+ "Command ID": f"0x{cmdId:08X}",
1802
+ "mtp.cmd_id": f"0x{cmdId:08X}",
1803
+ "Command": cmdName,
1804
+ "mtp.command": cmdName,
1805
+ "Length": length,
1806
+ "mtp.length": length,
1807
+ }
1808
+ except Exception:
1809
+ return None
1810
+
1811
+
1812
+ def decodeLDAP(rawPayload):
1813
+ """
1814
+ Decode basic LDAP message fields from raw payload bytes using ASN.1 BER structure.
1815
+ Extracts message ID and operation type from the outer SEQUENCE.
1816
+ Returns a dict with message ID and operation, or None if the payload does not look like LDAP.
1817
+ """
1818
+ LDAP_OPERATIONS = {
1819
+ 0x60: "BindRequest",
1820
+ 0x61: "BindResponse",
1821
+ 0x62: "UnbindRequest",
1822
+ 0x63: "SearchRequest",
1823
+ 0x64: "SearchResEntry",
1824
+ 0x65: "SearchResDone",
1825
+ 0x66: "SearchResRef",
1826
+ 0x67: "ModifyRequest",
1827
+ 0x68: "ModifyResponse",
1828
+ 0x69: "AddRequest",
1829
+ 0x6A: "AddResponse",
1830
+ 0x6B: "DelRequest",
1831
+ 0x6C: "DelResponse",
1832
+ 0x6D: "ModDNRequest",
1833
+ 0x6E: "ModDNResponse",
1834
+ 0x6F: "CompareRequest",
1835
+ 0x70: "CompareResponse",
1836
+ 0x77: "ExtendedRequest",
1837
+ 0x78: "ExtendedResponse",
1838
+ 0x79: "IntermediateResponse",
1839
+ }
1840
+ try:
1841
+ if len(rawPayload) < 4:
1842
+ return None
1843
+ if rawPayload[0] != 0x30:
1844
+ return None
1845
+ idx = 1
1846
+ if rawPayload[idx] & 0x80:
1847
+ numBytes = rawPayload[idx] & 0x7F
1848
+ idx += 1 + numBytes
1849
+ else:
1850
+ idx += 1
1851
+ if idx >= len(rawPayload) or rawPayload[idx] != 0x02:
1852
+ return None
1853
+ idxLen = rawPayload[idx + 1]
1854
+ msgId = int.from_bytes(rawPayload[idx + 2 : idx + 2 + idxLen], "big")
1855
+ idx += 2 + idxLen
1856
+ if idx >= len(rawPayload):
1857
+ return None
1858
+ opTag = rawPayload[idx]
1859
+ opName = LDAP_OPERATIONS.get(opTag, f"0x{opTag:02X}")
1860
+ return {
1861
+ "Message ID": msgId,
1862
+ "ldap.msg_id": msgId,
1863
+ "Operation": opName,
1864
+ "ldap.operation": opName,
1865
+ }
1866
+ except Exception:
1867
+ return None
1868
+
1869
+
1870
+ def decodeMySQL(rawPayload):
1871
+ """
1872
+ Decode MySQL protocol packets from raw payload bytes.
1873
+ Handles server greeting (handshake), OK, ERR, and client command packets.
1874
+ Returns a dict with packet type and relevant fields, or None if not recognisable.
1875
+ """
1876
+ import struct
1877
+
1878
+ MYSQL_COMMANDS = {
1879
+ 0x00: "Sleep",
1880
+ 0x01: "Quit",
1881
+ 0x02: "Init DB",
1882
+ 0x03: "Query",
1883
+ 0x04: "Field List",
1884
+ 0x05: "Create DB",
1885
+ 0x06: "Drop DB",
1886
+ 0x07: "Refresh",
1887
+ 0x08: "Shutdown",
1888
+ 0x09: "Statistics",
1889
+ 0x0A: "Process Info",
1890
+ 0x0B: "Connect",
1891
+ 0x0C: "Process Kill",
1892
+ 0x0D: "Debug",
1893
+ 0x0E: "Ping",
1894
+ 0x0F: "Time",
1895
+ 0x10: "Delayed Insert",
1896
+ 0x11: "Change User",
1897
+ 0x16: "Stmt Prepare",
1898
+ 0x17: "Stmt Execute",
1899
+ 0x19: "Stmt Close",
1900
+ 0x1A: "Stmt Reset",
1901
+ 0x1C: "Set Option",
1902
+ 0x1D: "Stmt Fetch",
1903
+ }
1904
+ try:
1905
+ if len(rawPayload) < 5:
1906
+ return None
1907
+ pktLen = struct.unpack_from("<I", rawPayload[:4])[0] & 0xFFFFFF
1908
+ seqNum = rawPayload[3]
1909
+ payload = rawPayload[4:]
1910
+ if not payload:
1911
+ return None
1912
+ firstByte = payload[0]
1913
+ if firstByte == 0x0A:
1914
+ versionEnd = payload.find(b"\x00", 1)
1915
+ version = (
1916
+ payload[1:versionEnd].decode(errors="ignore")
1917
+ if versionEnd > 1
1918
+ else "Unknown"
1919
+ )
1920
+ return {
1921
+ "Type": "Server Greeting",
1922
+ "mysql.type": "Server Greeting",
1923
+ "Protocol Version": 10,
1924
+ "mysql.proto_version": 10,
1925
+ "Server Version": version,
1926
+ "mysql.server_version": version,
1927
+ "Sequence": seqNum,
1928
+ "mysql.seq": seqNum,
1929
+ }
1930
+ if firstByte == 0x00:
1931
+ return {
1932
+ "Type": "OK",
1933
+ "mysql.type": "OK",
1934
+ "Sequence": seqNum,
1935
+ "mysql.seq": seqNum,
1936
+ }
1937
+ if firstByte == 0xFF:
1938
+ errCode = (
1939
+ struct.unpack_from("<H", payload, 1)[0] if len(payload) >= 3 else 0
1940
+ )
1941
+ errMsg = payload[9:].decode(errors="ignore") if len(payload) > 9 else ""
1942
+ return {
1943
+ "Type": "Error",
1944
+ "mysql.type": "Error",
1945
+ "Error Code": errCode,
1946
+ "mysql.error_code": errCode,
1947
+ "Error Message": errMsg[:100],
1948
+ "mysql.error_msg": errMsg[:100],
1949
+ "Sequence": seqNum,
1950
+ "mysql.seq": seqNum,
1951
+ }
1952
+ if seqNum == 0 and firstByte in MYSQL_COMMANDS:
1953
+ cmdName = MYSQL_COMMANDS[firstByte]
1954
+ query = (
1955
+ payload[1:].decode(errors="ignore")[:200] if len(payload) > 1 else ""
1956
+ )
1957
+ return {
1958
+ "Type": "Command",
1959
+ "mysql.type": "Command",
1960
+ "Command": cmdName,
1961
+ "mysql.command": cmdName,
1962
+ "Query": query,
1963
+ "mysql.query": query,
1964
+ "Sequence": seqNum,
1965
+ "mysql.seq": seqNum,
1966
+ }
1967
+ return None
1968
+ except Exception:
1969
+ return None
1970
+
1971
+
1972
+ def decodePostgreSQL(rawPayload):
1973
+ """
1974
+ Decode PostgreSQL frontend/backend protocol messages from raw payload bytes.
1975
+ Returns a dict with message type and relevant fields, or None if not recognisable.
1976
+ """
1977
+ import struct
1978
+
1979
+ PG_BACKEND_TYPES = {
1980
+ b"R": "Authentication",
1981
+ b"K": "BackendKeyData",
1982
+ b"2": "BindComplete",
1983
+ b"3": "CloseComplete",
1984
+ b"C": "CommandComplete",
1985
+ b"d": "CopyData",
1986
+ b"c": "CopyDone",
1987
+ b"f": "CopyFail",
1988
+ b"G": "CopyInResponse",
1989
+ b"H": "CopyOutResponse",
1990
+ b"D": "DataRow",
1991
+ b"I": "EmptyQueryResponse",
1992
+ b"E": "ErrorResponse",
1993
+ b"V": "FunctionCallResponse",
1994
+ b"n": "NoData",
1995
+ b"N": "NoticeResponse",
1996
+ b"A": "NotificationResponse",
1997
+ b"t": "ParameterDescription",
1998
+ b"S": "ParameterStatus",
1999
+ b"1": "ParseComplete",
2000
+ b"s": "PortalSuspended",
2001
+ b"Z": "ReadyForQuery",
2002
+ b"T": "RowDescription",
2003
+ }
2004
+ PG_FRONTEND_TYPES = {
2005
+ b"B": "Bind",
2006
+ b"C": "Close",
2007
+ b"d": "CopyData",
2008
+ b"c": "CopyDone",
2009
+ b"f": "CopyFail",
2010
+ b"D": "Describe",
2011
+ b"E": "Execute",
2012
+ b"H": "Flush",
2013
+ b"F": "FunctionCall",
2014
+ b"P": "Parse",
2015
+ b"p": "Password",
2016
+ b"Q": "Query",
2017
+ b"S": "Sync",
2018
+ b"X": "Terminate",
2019
+ }
2020
+ try:
2021
+ if len(rawPayload) < 5:
2022
+ return None
2023
+ firstInt = struct.unpack_from(">I", rawPayload, 0)[0]
2024
+ if firstInt == len(rawPayload) and len(rawPayload) >= 8:
2025
+ protoMajor = struct.unpack_from(">H", rawPayload, 4)[0]
2026
+ protoMinor = struct.unpack_from(">H", rawPayload, 6)[0]
2027
+ return {
2028
+ "Type": "StartupMessage",
2029
+ "pg.type": "StartupMessage",
2030
+ "Protocol Version": f"{protoMajor}.{protoMinor}",
2031
+ "pg.proto_version": f"{protoMajor}.{protoMinor}",
2032
+ }
2033
+ msgType = rawPayload[0:1]
2034
+ if msgType in PG_BACKEND_TYPES:
2035
+ typeName = PG_BACKEND_TYPES[msgType]
2036
+ msgLen = struct.unpack_from(">I", rawPayload, 1)[0]
2037
+ return {
2038
+ "Type": typeName,
2039
+ "pg.type": typeName,
2040
+ "Direction": "Backend",
2041
+ "pg.direction": "Backend",
2042
+ "Message Length": msgLen,
2043
+ "pg.msg_length": msgLen,
2044
+ }
2045
+ if msgType in PG_FRONTEND_TYPES:
2046
+ typeName = PG_FRONTEND_TYPES[msgType]
2047
+ msgLen = struct.unpack_from(">I", rawPayload, 1)[0]
2048
+ body = (
2049
+ rawPayload[5 : 5 + min(msgLen - 4, 200)].decode(errors="ignore")
2050
+ if msgLen > 4
2051
+ else ""
2052
+ )
2053
+ return {
2054
+ "Type": typeName,
2055
+ "pg.type": typeName,
2056
+ "Direction": "Frontend",
2057
+ "pg.direction": "Frontend",
2058
+ "Message Length": msgLen,
2059
+ "pg.msg_length": msgLen,
2060
+ "Body": body,
2061
+ "pg.body": body,
2062
+ }
2063
+ return None
2064
+ except Exception:
2065
+ return None
2066
+
2067
+
2068
+ def decodeXMPP(rawPayload):
2069
+ """
2070
+ Decode XMPP (Extensible Messaging and Presence Protocol) XML stream data.
2071
+ Parses stream open tags, message, presence, and IQ stanzas.
2072
+ Returns a dict with the stanza type and attributes, or None if not XMPP.
2073
+ """
2074
+ import re
2075
+
2076
+ try:
2077
+ text = rawPayload.decode(errors="ignore").strip()
2078
+ if not text:
2079
+ return None
2080
+ isXmpp = (
2081
+ text.startswith("<?xml")
2082
+ or "<stream:stream" in text
2083
+ or text.startswith("<message")
2084
+ or text.startswith("<presence")
2085
+ or text.startswith("<iq ")
2086
+ or text.startswith("<iq>")
2087
+ or "<message " in text
2088
+ or "<presence" in text
2089
+ )
2090
+ if not isXmpp:
2091
+ return None
2092
+ stanzaType = "Unknown"
2093
+ if "<stream:stream" in text:
2094
+ stanzaType = "StreamOpen"
2095
+ elif "</stream:stream>" in text:
2096
+ stanzaType = "StreamClose"
2097
+ elif "<message" in text:
2098
+ stanzaType = "Message"
2099
+ elif "<presence" in text:
2100
+ stanzaType = "Presence"
2101
+ elif "<iq " in text or "<iq>" in text:
2102
+ stanzaType = "IQ"
2103
+ toMatch = re.search(r'\bto=["\']([^"\']+)["\']', text)
2104
+ fromMatch = re.search(r'\bfrom=["\']([^"\']+)["\']', text)
2105
+ toAttr = toMatch.group(1) if toMatch else "Unknown"
2106
+ fromAttr = fromMatch.group(1) if fromMatch else "Unknown"
2107
+ return {
2108
+ "Stanza Type": stanzaType,
2109
+ "xmpp.stanza": stanzaType,
2110
+ "To": toAttr,
2111
+ "xmpp.to": toAttr,
2112
+ "From": fromAttr,
2113
+ "xmpp.from": fromAttr,
2114
+ }
2115
+ except Exception:
2116
+ return None
2117
+
2118
+
2119
+ def decodeSMB(rawPayload):
2120
+ """
2121
+ Decode SMB (Server Message Block) protocol frames from raw payload bytes.
2122
+ Supports both SMBv1 (\\xFFSMB signature) and SMBv2/3 (\\xFESMB signature).
2123
+ Returns a dict with SMB version, command, status, and flags, or None if not SMB.
2124
+ """
2125
+ import struct
2126
+
2127
+ SMB1_COMMANDS = {
2128
+ 0x00: "CREATE_DIRECTORY",
2129
+ 0x01: "DELETE_DIRECTORY",
2130
+ 0x02: "OPEN",
2131
+ 0x03: "CREATE",
2132
+ 0x04: "CLOSE",
2133
+ 0x05: "FLUSH",
2134
+ 0x06: "DELETE",
2135
+ 0x07: "RENAME",
2136
+ 0x08: "QUERY_INFORMATION",
2137
+ 0x09: "SET_INFORMATION",
2138
+ 0x0A: "READ",
2139
+ 0x0B: "WRITE",
2140
+ 0x24: "LOCKING_ANDX",
2141
+ 0x25: "TRANSACTION",
2142
+ 0x2D: "OPEN_ANDX",
2143
+ 0x2E: "READ_ANDX",
2144
+ 0x2F: "WRITE_ANDX",
2145
+ 0x32: "TRANSACTION2",
2146
+ 0x70: "TREE_CONNECT",
2147
+ 0x71: "TREE_DISCONNECT",
2148
+ 0x72: "NEGOTIATE",
2149
+ 0x73: "SESSION_SETUP_ANDX",
2150
+ 0x74: "LOGOFF_ANDX",
2151
+ 0x75: "TREE_CONNECT_ANDX",
2152
+ 0xA0: "NT_TRANSACT",
2153
+ 0xA2: "NT_CREATE_ANDX",
2154
+ 0xA4: "NT_CANCEL",
2155
+ 0xFE: "INVALID",
2156
+ 0xFF: "NO_ANDX",
2157
+ }
2158
+ SMB2_COMMANDS = {
2159
+ 0x0000: "NEGOTIATE",
2160
+ 0x0001: "SESSION_SETUP",
2161
+ 0x0002: "LOGOFF",
2162
+ 0x0003: "TREE_CONNECT",
2163
+ 0x0004: "TREE_DISCONNECT",
2164
+ 0x0005: "CREATE",
2165
+ 0x0006: "CLOSE",
2166
+ 0x0007: "FLUSH",
2167
+ 0x0008: "READ",
2168
+ 0x0009: "WRITE",
2169
+ 0x000A: "LOCK",
2170
+ 0x000B: "IOCTL",
2171
+ 0x000C: "CANCEL",
2172
+ 0x000D: "ECHO",
2173
+ 0x000E: "QUERY_DIRECTORY",
2174
+ 0x000F: "CHANGE_NOTIFY",
2175
+ 0x0010: "QUERY_INFO",
2176
+ 0x0011: "SET_INFO",
2177
+ 0x0012: "OPLOCK_BREAK",
2178
+ }
2179
+ try:
2180
+ if len(rawPayload) < 8:
2181
+ return None
2182
+ if rawPayload[:4] == b"\xff\x53\x4d\x42":
2183
+ cmd = rawPayload[4]
2184
+ status = struct.unpack_from("<I", rawPayload, 5)[0]
2185
+ flags = rawPayload[9]
2186
+ cmdName = SMB1_COMMANDS.get(cmd, f"0x{cmd:02X}")
2187
+ isResponse = bool(flags & 0x80)
2188
+ return {
2189
+ "Version": "SMBv1",
2190
+ "smb.version": "SMBv1",
2191
+ "Command": cmdName,
2192
+ "smb.command": cmdName,
2193
+ "Status": f"0x{status:08X}",
2194
+ "smb.status": f"0x{status:08X}",
2195
+ "Is Response": isResponse,
2196
+ "smb.is_response": isResponse,
2197
+ }
2198
+ if rawPayload[:4] == b"\xfe\x53\x4d\x42":
2199
+ cmd = struct.unpack_from("<H", rawPayload, 12)[0]
2200
+ flags = struct.unpack_from("<I", rawPayload, 16)[0]
2201
+ status = struct.unpack_from("<I", rawPayload, 8)[0]
2202
+ cmdName = SMB2_COMMANDS.get(cmd, f"0x{cmd:04X}")
2203
+ isResponse = bool(flags & 0x00000001)
2204
+ return {
2205
+ "Version": "SMBv2/v3",
2206
+ "smb.version": "SMBv2/v3",
2207
+ "Command": cmdName,
2208
+ "smb.command": cmdName,
2209
+ "Status": f"0x{status:08X}",
2210
+ "smb.status": f"0x{status:08X}",
2211
+ "Is Response": isResponse,
2212
+ "smb.is_response": isResponse,
2213
+ }
2214
+ return None
2215
+ except Exception:
2216
+ return None
2217
+
2218
+
2219
+ def decodeMQTT(rawPayload):
2220
+ """
2221
+ Decode MQTT protocol messages from raw payload bytes.
2222
+ Extracts message type, QoS level, and topic from PUBLISH messages.
2223
+ Returns a dict with MQTT fields, or None if the payload does not look like MQTT.
2224
+ """
2225
+ import struct
2226
+
2227
+ MQTT_TYPES = {
2228
+ 1: "CONNECT",
2229
+ 2: "CONNACK",
2230
+ 3: "PUBLISH",
2231
+ 4: "PUBACK",
2232
+ 5: "PUBREC",
2233
+ 6: "PUBREL",
2234
+ 7: "PUBCOMP",
2235
+ 8: "SUBSCRIBE",
2236
+ 9: "SUBACK",
2237
+ 10: "UNSUBSCRIBE",
2238
+ 11: "UNSUBACK",
2239
+ 12: "PINGREQ",
2240
+ 13: "PINGRESP",
2241
+ 14: "DISCONNECT",
2242
+ }
2243
+ try:
2244
+ if len(rawPayload) < 2:
2245
+ return None
2246
+ firstByte = rawPayload[0]
2247
+ msgType = (firstByte >> 4) & 0x0F
2248
+ if msgType not in MQTT_TYPES:
2249
+ return None
2250
+ flags = firstByte & 0x0F
2251
+ qos = (flags >> 1) & 0x03
2252
+ dup = bool(flags & 0x08)
2253
+ retain = bool(flags & 0x01)
2254
+ typeName = MQTT_TYPES[msgType]
2255
+ result = {
2256
+ "Message Type": typeName,
2257
+ "mqtt.msg_type": typeName,
2258
+ "QoS": qos,
2259
+ "mqtt.qos": qos,
2260
+ "DUP Flag": dup,
2261
+ "mqtt.dup": dup,
2262
+ "Retain Flag": retain,
2263
+ "mqtt.retain": retain,
2264
+ }
2265
+ if msgType == 3 and len(rawPayload) > 4:
2266
+ idx = 1
2267
+ remainLen = 0
2268
+ shift = 0
2269
+ while idx < len(rawPayload):
2270
+ b = rawPayload[idx]
2271
+ idx += 1
2272
+ remainLen |= (b & 0x7F) << shift
2273
+ shift += 7
2274
+ if not (b & 0x80):
2275
+ break
2276
+ if idx + 2 <= len(rawPayload):
2277
+ topicLen = struct.unpack_from(">H", rawPayload, idx)[0]
2278
+ topic = rawPayload[idx + 2 : idx + 2 + topicLen].decode(errors="ignore")
2279
+ result["Topic"] = topic
2280
+ result["mqtt.topic"] = topic
2281
+ return result
2282
+ except Exception:
2283
+ return None
2284
+
2285
+
2286
+ def decodeRTSP(rawPayload):
2287
+ """
2288
+ Decode RTSP (Real Time Streaming Protocol) requests and responses from raw payload bytes.
2289
+ Similar in structure to HTTP/1.1 text-based protocol.
2290
+ Returns a dict with RTSP method/status and headers, or None if not recognisable as RTSP.
2291
+ """
2292
+ RTSP_METHODS = {
2293
+ "OPTIONS",
2294
+ "DESCRIBE",
2295
+ "ANNOUNCE",
2296
+ "SETUP",
2297
+ "PLAY",
2298
+ "PAUSE",
2299
+ "RECORD",
2300
+ "TEARDOWN",
2301
+ "GET_PARAMETER",
2302
+ "SET_PARAMETER",
2303
+ "REDIRECT",
2304
+ }
2305
+ try:
2306
+ text = rawPayload.decode(errors="ignore")
2307
+ normalised = text.replace("\r\n", "\n")
2308
+ headerSection = normalised.split("\n\n")[0]
2309
+ lines = headerSection.split("\n")
2310
+ if not lines:
2311
+ return None
2312
+ firstLine = lines[0].strip()
2313
+ isRtspResponse = firstLine.startswith("RTSP/")
2314
+ isRtspRequest = (
2315
+ firstLine.split(" ")[0].upper() in RTSP_METHODS
2316
+ if " " in firstLine
2317
+ else False
2318
+ )
2319
+ if not isRtspResponse and not isRtspRequest:
2320
+ return None
2321
+ headers = {}
2322
+ for line in lines[1:]:
2323
+ if ": " in line:
2324
+ key, _, val = line.partition(": ")
2325
+ headers[key.strip().lower()] = val.strip()
2326
+ if isRtspRequest:
2327
+ parts = firstLine.split(" ", 2)
2328
+ method = parts[0].upper()
2329
+ url = parts[1] if len(parts) > 1 else "Unknown"
2330
+ rtspVersion = parts[2] if len(parts) > 2 else "Unknown"
2331
+ return {
2332
+ "Type": "Request",
2333
+ "rtsp.type": "Request",
2334
+ "Method": method,
2335
+ "rtsp.method": method,
2336
+ "URL": url,
2337
+ "rtsp.url": url,
2338
+ "RTSP Version": rtspVersion,
2339
+ "rtsp.version": rtspVersion,
2340
+ "CSeq": headers.get("cseq", "Unknown"),
2341
+ "rtsp.cseq": headers.get("cseq", "Unknown"),
2342
+ "Session": headers.get("session", "Unknown"),
2343
+ "rtsp.session": headers.get("session", "Unknown"),
2344
+ "Transport": headers.get("transport", "Unknown"),
2345
+ "rtsp.transport": headers.get("transport", "Unknown"),
2346
+ }
2347
+ else:
2348
+ parts = firstLine.split(" ", 2)
2349
+ rtspVersion = parts[0]
2350
+ statusCode = parts[1] if len(parts) > 1 else "Unknown"
2351
+ statusMsg = parts[2] if len(parts) > 2 else "Unknown"
2352
+ return {
2353
+ "Type": "Response",
2354
+ "rtsp.type": "Response",
2355
+ "RTSP Version": rtspVersion,
2356
+ "rtsp.version": rtspVersion,
2357
+ "Status Code": statusCode,
2358
+ "rtsp.status_code": statusCode,
2359
+ "Status Message": statusMsg,
2360
+ "rtsp.status_msg": statusMsg,
2361
+ "CSeq": headers.get("cseq", "Unknown"),
2362
+ "rtsp.cseq": headers.get("cseq", "Unknown"),
2363
+ "Session": headers.get("session", "Unknown"),
2364
+ "rtsp.session": headers.get("session", "Unknown"),
2365
+ "Content-Type": headers.get("content-type", "Unknown"),
2366
+ "rtsp.content_type": headers.get("content-type", "Unknown"),
2367
+ "Content-Length": headers.get("content-length", "Unknown"),
2368
+ "rtsp.content_length": headers.get("content-length", "Unknown"),
2369
+ }
2370
+ except Exception:
2371
+ return None
2372
+
2373
+
2374
+ def decodeTFTP(rawPayload):
2375
+ """
2376
+ Decode TFTP (Trivial File Transfer Protocol) packets from raw payload bytes.
2377
+ TFTP runs over UDP. Extracts opcode and relevant fields per RFC 1350.
2378
+ Returns a dict with opcode type and arguments, or None if not recognisable as TFTP.
2379
+ """
2380
+ import struct
2381
+
2382
+ TFTP_OPCODES = {1: "RRQ", 2: "WRQ", 3: "DATA", 4: "ACK", 5: "ERROR"}
2383
+ TFTP_ERRORS = {
2384
+ 0: "Not defined",
2385
+ 1: "File not found",
2386
+ 2: "Access violation",
2387
+ 3: "Disk full",
2388
+ 4: "Illegal operation",
2389
+ 5: "Unknown TID",
2390
+ 6: "File already exists",
2391
+ 7: "No such user",
2392
+ }
2393
+ try:
2394
+ if len(rawPayload) < 4:
2395
+ return None
2396
+ opcode = struct.unpack_from(">H", rawPayload, 0)[0]
2397
+ if opcode not in TFTP_OPCODES:
2398
+ return None
2399
+ opName = TFTP_OPCODES[opcode]
2400
+ if opcode in (1, 2):
2401
+ rest = rawPayload[2:]
2402
+ nullIdx = rest.find(b"\x00")
2403
+ filename = (
2404
+ rest[:nullIdx].decode(errors="ignore")
2405
+ if nullIdx >= 0
2406
+ else rest.decode(errors="ignore")
2407
+ )
2408
+ modeStart = nullIdx + 1 if nullIdx >= 0 else len(rest)
2409
+ modeEnd = rest.find(b"\x00", modeStart)
2410
+ mode = (
2411
+ rest[modeStart:modeEnd].decode(errors="ignore")
2412
+ if modeEnd > modeStart
2413
+ else "Unknown"
2414
+ )
2415
+ return {
2416
+ "Opcode": opName,
2417
+ "tftp.opcode": opName,
2418
+ "Filename": filename,
2419
+ "tftp.filename": filename,
2420
+ "Mode": mode,
2421
+ "tftp.mode": mode,
2422
+ }
2423
+ if opcode == 3:
2424
+ block = struct.unpack_from(">H", rawPayload, 2)[0]
2425
+ return {
2426
+ "Opcode": opName,
2427
+ "tftp.opcode": opName,
2428
+ "Block Number": block,
2429
+ "tftp.block": block,
2430
+ "Data Length": len(rawPayload) - 4,
2431
+ "tftp.data_len": len(rawPayload) - 4,
2432
+ }
2433
+ if opcode == 4:
2434
+ block = struct.unpack_from(">H", rawPayload, 2)[0]
2435
+ return {
2436
+ "Opcode": opName,
2437
+ "tftp.opcode": opName,
2438
+ "Block Number": block,
2439
+ "tftp.block": block,
2440
+ }
2441
+ if opcode == 5:
2442
+ errCode = struct.unpack_from(">H", rawPayload, 2)[0]
2443
+ errMsg = rawPayload[4:].rstrip(b"\x00").decode(errors="ignore")
2444
+ errDesc = TFTP_ERRORS.get(errCode, f"Error {errCode}")
2445
+ return {
2446
+ "Opcode": opName,
2447
+ "tftp.opcode": opName,
2448
+ "Error Code": errCode,
2449
+ "tftp.error_code": errCode,
2450
+ "Error Description": errDesc,
2451
+ "tftp.error_desc": errDesc,
2452
+ "Error Message": errMsg,
2453
+ "tftp.error_msg": errMsg,
2454
+ }
2455
+ return None
2456
+ except Exception:
2457
+ return None
2458
+
2459
+
2460
+ def decodeBGP(rawPayload):
2461
+ """
2462
+ Decode BGP (Border Gateway Protocol) messages from raw payload bytes.
2463
+ BGP runs over TCP port 179. Checks for the 16-byte all-0xFF marker.
2464
+ Returns a dict with BGP message type and length, or None if not BGP.
2465
+ """
2466
+ import struct
2467
+
2468
+ BGP_TYPES = {
2469
+ 1: "OPEN",
2470
+ 2: "UPDATE",
2471
+ 3: "NOTIFICATION",
2472
+ 4: "KEEPALIVE",
2473
+ 5: "ROUTE-REFRESH",
2474
+ }
2475
+ BGP_ERRORS = {
2476
+ 1: "Message Header Error",
2477
+ 2: "OPEN Message Error",
2478
+ 3: "UPDATE Message Error",
2479
+ 4: "Hold Timer Expired",
2480
+ 5: "Finite State Machine Error",
2481
+ 6: "Cease",
2482
+ }
2483
+ try:
2484
+ if len(rawPayload) < 19:
2485
+ return None
2486
+ if rawPayload[:16] != b"\xff" * 16:
2487
+ return None
2488
+ msgLen = struct.unpack_from(">H", rawPayload, 16)[0]
2489
+ msgType = rawPayload[18]
2490
+ typeName = BGP_TYPES.get(msgType, f"Unknown({msgType})")
2491
+ result = {
2492
+ "Message Type": typeName,
2493
+ "bgp.type": typeName,
2494
+ "Message Length": msgLen,
2495
+ "bgp.length": msgLen,
2496
+ }
2497
+ if msgType == 1 and len(rawPayload) >= 29:
2498
+ version = rawPayload[19]
2499
+ asn = struct.unpack_from(">H", rawPayload, 20)[0]
2500
+ holdTime = struct.unpack_from(">H", rawPayload, 22)[0]
2501
+ routerId = ".".join(str(b) for b in rawPayload[24:28])
2502
+ result["BGP Version"] = version
2503
+ result["bgp.version"] = version
2504
+ result["ASN"] = asn
2505
+ result["bgp.asn"] = asn
2506
+ result["Hold Time"] = holdTime
2507
+ result["bgp.hold_time"] = holdTime
2508
+ result["Router ID"] = routerId
2509
+ result["bgp.router_id"] = routerId
2510
+ if msgType == 3 and len(rawPayload) >= 21:
2511
+ errCode = rawPayload[19]
2512
+ errSubcode = rawPayload[20]
2513
+ errName = BGP_ERRORS.get(errCode, f"Error {errCode}")
2514
+ result["Error Code"] = errCode
2515
+ result["bgp.error_code"] = errCode
2516
+ result["Error Name"] = errName
2517
+ result["bgp.error_name"] = errName
2518
+ result["Error Subcode"] = errSubcode
2519
+ result["bgp.error_subcode"] = errSubcode
2520
+ return result
2521
+ except Exception:
2522
+ return None
2523
+
2524
+
2525
+ def decodeHTTP2(rawPayload):
2526
+ """
2527
+ Decode HTTP/2 frames from raw payload bytes.
2528
+ Detects the HTTP/2 connection preface and binary frame headers (RFC 7540).
2529
+ Returns a dict with HTTP/2 frame info, or None if not HTTP/2.
2530
+ """
2531
+ import struct
2532
+
2533
+ HTTP2_FRAME_TYPES = {
2534
+ 0x0: "DATA",
2535
+ 0x1: "HEADERS",
2536
+ 0x2: "PRIORITY",
2537
+ 0x3: "RST_STREAM",
2538
+ 0x4: "SETTINGS",
2539
+ 0x5: "PUSH_PROMISE",
2540
+ 0x6: "PING",
2541
+ 0x7: "GOAWAY",
2542
+ 0x8: "WINDOW_UPDATE",
2543
+ 0x9: "CONTINUATION",
2544
+ }
2545
+ HTTP2_PREFACE = b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
2546
+ try:
2547
+ if len(rawPayload) < 9:
2548
+ return None
2549
+ hasPreface = rawPayload.startswith(HTTP2_PREFACE)
2550
+ offset = len(HTTP2_PREFACE) if hasPreface else 0
2551
+ if offset + 9 > len(rawPayload):
2552
+ if hasPreface:
2553
+ return {
2554
+ "Connection Preface": True,
2555
+ "http2.preface": True,
2556
+ "Frame Type": "N/A",
2557
+ "http2.frame_type": "N/A",
2558
+ }
2559
+ return None
2560
+ frameLen = struct.unpack_from(">I", b"\x00" + rawPayload[offset : offset + 3])[
2561
+ 0
2562
+ ]
2563
+ frameType = rawPayload[offset + 3]
2564
+ frameFlags = rawPayload[offset + 4]
2565
+ streamId = struct.unpack_from(">I", rawPayload, offset + 5)[0] & 0x7FFFFFFF
2566
+ typeName = HTTP2_FRAME_TYPES.get(frameType, f"0x{frameType:02X}")
2567
+ return {
2568
+ "Connection Preface": hasPreface,
2569
+ "http2.preface": hasPreface,
2570
+ "Frame Type": typeName,
2571
+ "http2.frame_type": typeName,
2572
+ "Frame Length": frameLen,
2573
+ "http2.frame_length": frameLen,
2574
+ "Frame Flags": f"0x{frameFlags:02X}",
2575
+ "http2.frame_flags": f"0x{frameFlags:02X}",
2576
+ "Stream ID": streamId,
2577
+ "http2.stream_id": streamId,
2578
+ }
2579
+ except Exception:
2580
+ return None
2581
+
2582
+
2583
+ def decodeNNTP(rawPayload):
2584
+ """
2585
+ Decode NNTP (Network News Transfer Protocol) commands and responses.
2586
+ Returns a dict with Type (Command/Response), command/status, and message,
2587
+ or None if the payload is not recognisable as NNTP traffic.
2588
+ """
2589
+ NNTP_COMMANDS = {
2590
+ "ARTICLE",
2591
+ "BODY",
2592
+ "DATE",
2593
+ "GROUP",
2594
+ "HDR",
2595
+ "HEAD",
2596
+ "HELP",
2597
+ "IHAVE",
2598
+ "LAST",
2599
+ "LIST",
2600
+ "LISTGROUP",
2601
+ "MODE",
2602
+ "NEWGROUPS",
2603
+ "NEWNEWS",
2604
+ "NEXT",
2605
+ "OVER",
2606
+ "POST",
2607
+ "QUIT",
2608
+ "READER",
2609
+ "STAT",
2610
+ "AUTHINFO",
2611
+ "COMPRESS",
2612
+ }
2613
+ try:
2614
+ text = rawPayload.decode(errors="ignore")
2615
+ lines = text.replace("\r\n", "\n").split("\n")
2616
+ firstLine = lines[0].strip()
2617
+ if not firstLine:
2618
+ return None
2619
+ parts = firstLine.split(" ", 1)
2620
+ word = parts[0].upper()
2621
+ if word in NNTP_COMMANDS:
2622
+ arg = parts[1].strip() if len(parts) > 1 else ""
2623
+ return {
2624
+ "Type": "Command",
2625
+ "nntp.type": "Command",
2626
+ "Command": word,
2627
+ "nntp.command": word,
2628
+ "Argument": arg,
2629
+ "nntp.argument": arg,
2630
+ }
2631
+ if len(word) == 3 and word.isdigit():
2632
+ message = parts[1].strip() if len(parts) > 1 else ""
2633
+ return {
2634
+ "Type": "Response",
2635
+ "nntp.type": "Response",
2636
+ "Status Code": word,
2637
+ "nntp.status_code": word,
2638
+ "Message": message,
2639
+ "nntp.message": message,
2640
+ }
2641
+ return None
2642
+ except Exception:
2643
+ return None
2644
+
2645
+
2646
+ def decodeRADIUS(rawPayload):
2647
+ """
2648
+ Decode RADIUS (Remote Authentication Dial-In User Service) packets from raw payload bytes.
2649
+ Extracts code, identifier, length, and basic attributes.
2650
+ Returns a dict with RADIUS fields, or None if not recognisable as RADIUS.
2651
+ """
2652
+ import struct
2653
+
2654
+ RADIUS_CODES = {
2655
+ 1: "Access-Request",
2656
+ 2: "Access-Accept",
2657
+ 3: "Access-Reject",
2658
+ 4: "Accounting-Request",
2659
+ 5: "Accounting-Response",
2660
+ 11: "Access-Challenge",
2661
+ 12: "Status-Server",
2662
+ 13: "Status-Client",
2663
+ 255: "Reserved",
2664
+ }
2665
+ RADIUS_ATTRIBUTES = {
2666
+ 1: "User-Name",
2667
+ 2: "User-Password",
2668
+ 3: "CHAP-Password",
2669
+ 4: "NAS-IP-Address",
2670
+ 5: "NAS-Port",
2671
+ 6: "Service-Type",
2672
+ 7: "Framed-Protocol",
2673
+ 8: "Framed-IP-Address",
2674
+ 18: "Reply-Message",
2675
+ 24: "State",
2676
+ 25: "Class",
2677
+ 26: "Vendor-Specific",
2678
+ 27: "Session-Timeout",
2679
+ 28: "Idle-Timeout",
2680
+ 30: "Called-Station-Id",
2681
+ 31: "Calling-Station-Id",
2682
+ 32: "NAS-Identifier",
2683
+ 40: "Acct-Status-Type",
2684
+ 41: "Acct-Delay-Time",
2685
+ 42: "Acct-Input-Octets",
2686
+ 43: "Acct-Output-Octets",
2687
+ 44: "Acct-Session-Id",
2688
+ 61: "NAS-Port-Type",
2689
+ 77: "Connect-Info",
2690
+ 79: "EAP-Message",
2691
+ 80: "Message-Authenticator",
2692
+ }
2693
+ try:
2694
+ if len(rawPayload) < 20:
2695
+ return None
2696
+ code = rawPayload[0]
2697
+ identifier = rawPayload[1]
2698
+ length = struct.unpack_from(">H", rawPayload, 2)[0]
2699
+ if length < 20 or length > len(rawPayload):
2700
+ return None
2701
+ codeName = RADIUS_CODES.get(code, f"Unknown({code})")
2702
+ attributes = []
2703
+ idx = 20
2704
+ while idx + 2 <= length and idx + 2 <= len(rawPayload):
2705
+ attrType = rawPayload[idx]
2706
+ attrLen = rawPayload[idx + 1]
2707
+ if attrLen < 2:
2708
+ break
2709
+ attrValue = rawPayload[idx + 2 : idx + attrLen]
2710
+ attrName = RADIUS_ATTRIBUTES.get(attrType, f"Attr-{attrType}")
2711
+ if attrType == 1:
2712
+ attrValueStr = attrValue.decode(errors="ignore")
2713
+ elif attrType in (4, 8):
2714
+ attrValueStr = (
2715
+ ".".join(str(b) for b in attrValue)
2716
+ if len(attrValue) == 4
2717
+ else attrValue.hex()
2718
+ )
2719
+ elif attrType in (2, 3):
2720
+ attrValueStr = "***"
2721
+ else:
2722
+ attrValueStr = (
2723
+ attrValue.decode(errors="ignore")
2724
+ if all(32 <= b <= 126 for b in attrValue)
2725
+ else attrValue.hex()
2726
+ )
2727
+ attributes.append({"Type": attrName, "Value": attrValueStr})
2728
+ idx += attrLen
2729
+ return {
2730
+ "Code": codeName,
2731
+ "radius.code": codeName,
2732
+ "Identifier": identifier,
2733
+ "radius.id": identifier,
2734
+ "Length": length,
2735
+ "radius.length": length,
2736
+ "Attributes": attributes,
2737
+ "radius.attrs": attributes,
2738
+ }
2739
+ except Exception:
2740
+ return None
2741
+
2742
+
2743
+ def packetLoop(p, packetIndex, srcPortFilter, dstPortFilter, timeout):
2744
+ """
2745
+ Process a single scapy packet: extract TCP, UDP, or ICMP payload, write the raw
2746
+ testcase file, gather analysis data (MIME, entropy, geoip, etc.) and merge
2747
+ everything into a single JSON output file. For UDP packets on port 53 the DNS
2748
+ layer is decoded. SNMP (161/162), DHCP (67/68), NTP/SNTP (123), and SIP (5060/5061)
2749
+ packets are also decoded and included in the output. HTTP (any port whose payload
2750
+ looks like HTTP) and HTTP/2 (connection preface or binary frames) are decoded for
2751
+ both requests and responses. FTP (20/21), SMTP (25/587/465), POP3/POP (110/995),
2752
+ IMAP/IMAP4 (143/993), Telnet (23), IRC (6667-6669), MTP (1755), LDAP (389/636),
2753
+ MySQL (3306), PostgreSQL (5432), XMPP (5222/5223), SMB (139/445), MQTT (1883/8883),
2754
+ RTSP (554), TFTP (UDP 69), BGP (179), NNTP (119), and RADIUS (1812/1813/1645/1646)
2755
+ are also decoded. ICMP packets are fully supported as a separate transport type.
2756
+
2757
+ packetIndex is the 0-based position of this packet in the full capture, used as
2758
+ the filename index so files from concurrent threads do not collide.
2759
+ Returns the merged info dict, or None if the packet should be skipped.
2760
+ """
2761
+ srcMacAddr = p.src if p.haslayer("Ethernet") else "N/A"
2762
+ dstMacAddr = p.dst if p.haslayer("Ethernet") else "N/A"
2763
+ srcMacVendor = macAddrToVendor(srcMacAddr) if srcMacAddr != "N/A" else "N/A"
2764
+ dstMacVendor = macAddrToVendor(dstMacAddr) if dstMacAddr != "N/A" else "N/A"
2765
+ if not p.haslayer("IP"):
2766
+ return None
2767
+
2768
+ isTcp = p.haslayer("TCP")
2769
+ isUdp = p.haslayer("UDP")
2770
+ isIcmp = p.haslayer("ICMP")
2771
+ if not isTcp and not isUdp and not isIcmp:
2772
+ return None
2773
+
2774
+ if isTcp:
2775
+ rawPayload = p["TCP"].payload.original
2776
+ srcPort = p["TCP"].sport
2777
+ dstPort = p["TCP"].dport
2778
+ transportProtocol = "tcp"
2779
+ dstPortStr = str(dstPort)
2780
+ elif isUdp:
2781
+ rawPayload = p["UDP"].payload.original
2782
+ srcPort = p["UDP"].sport
2783
+ dstPort = p["UDP"].dport
2784
+ transportProtocol = "udp"
2785
+ dstPortStr = str(dstPort)
2786
+ else:
2787
+ # ICMP: use the full ICMP layer bytes as the payload
2788
+ rawPayload = bytes(p["ICMP"])
2789
+ srcPort = 0
2790
+ dstPort = 0
2791
+ transportProtocol = "icmp"
2792
+ dstPortStr = "icmp"
2793
+
2794
+ if (srcPortFilter is None or srcPort == srcPortFilter) and (
2795
+ dstPortFilter is None or dstPort == dstPortFilter
2796
+ ):
2797
+ if rawPayload is not None and len(rawPayload) > 0:
2798
+ streamLabelPort = dstPort
2799
+ if isTcp:
2800
+ streamKey = getTcpStreamKey(p["IP"].src, srcPort, p["IP"].dst, dstPort)
2801
+ streamLabelPort = tcpStreamInitialDstPortMap.get(streamKey, dstPort)
2802
+ writeTestcase(rawPayload, outputDir, dstPortStr, packetIndex)
2803
+ dataTypeInfo = getDatatypes(
2804
+ rawPayload,
2805
+ streamLabelPort,
2806
+ p["IP"].src,
2807
+ p["IP"].dst,
2808
+ timeout,
2809
+ transportProtocol,
2810
+ )
2811
+ timestamp = datetime.fromtimestamp(float(Decimal(p.time))).strftime(
2812
+ "%Y-%m-%d %H:%M:%S.%f"
2813
+ )
2814
+
2815
+ # Resolve geoip once per packet so we don't hit the cache (or DB) twice
2816
+ # for the same IP within a single packet.
2817
+ srcGeoInfo = getGeoipInfo(p["IP"].src, "src")
2818
+ dstGeoInfo = getGeoipInfo(p["IP"].dst, "dst")
2819
+ isLocalNetwork = (
2820
+ srcGeoInfo.get("Location") == "Localnet"
2821
+ and dstGeoInfo.get("Location") == "Localnet"
2822
+ )
2823
+
2824
+ if isTcp:
2825
+ # Build TCP flag string once
2826
+ tcpFlags = ""
2827
+ if p["TCP"].flags.S:
2828
+ tcpFlags += "SYN|"
2829
+ if p["TCP"].flags.A:
2830
+ tcpFlags += "ACK|"
2831
+ if p["TCP"].flags.F:
2832
+ tcpFlags += "FIN|"
2833
+ if p["TCP"].flags.R:
2834
+ tcpFlags += "RST|"
2835
+ if p["TCP"].flags.P:
2836
+ tcpFlags += "PSH|"
2837
+ if p["TCP"].flags.U:
2838
+ tcpFlags += "URG|"
2839
+ if p["TCP"].flags.ECE:
2840
+ tcpFlags += "ECE|"
2841
+ if p["TCP"].flags.CWR:
2842
+ tcpFlags += "CWR|"
2843
+ if tcpFlags.endswith("|"):
2844
+ tcpFlags = tcpFlags[:-1]
2845
+
2846
+ transportSection = {
2847
+ "Source port": int(srcPort),
2848
+ "tcp.src.port": int(srcPort),
2849
+ "Destination port": int(dstPort),
2850
+ "tcp.dst.port": int(dstPort),
2851
+ "TCP checksum": hex(int(p["TCP"].chksum)),
2852
+ "tcp.chksum": hex(int(p["TCP"].chksum)),
2853
+ "Urgent flag": bool(p["TCP"].urgptr),
2854
+ "tcp.urgptr": bool(p["TCP"].urgptr),
2855
+ "TCP Flag Data": {
2856
+ "Flags": tcpFlags if tcpFlags else "None",
2857
+ "tcp.flags": tcpFlags if tcpFlags else "None",
2858
+ },
2859
+ "Options": list(p["TCP"].options),
2860
+ "tcp.options": list(p["TCP"].options),
2861
+ "TCP layer length": int(p["TCP"].dataofs * 4),
2862
+ "tcp.len": int(p["TCP"].dataofs * 4),
2863
+ "Wire length": len(p["TCP"]),
2864
+ "wire.len": len(p["TCP"]),
2865
+ }
2866
+ # Decode SIP on TCP ports 5060/5061
2867
+ if dstPort in (5060, 5061) or srcPort in (5060, 5061):
2868
+ sipSection = decodeSIP(rawPayload)
2869
+ if sipSection is not None:
2870
+ transportSection["SIP"] = sipSection
2871
+ # Decode SNMP on TCP port 161/162 (less common but valid)
2872
+ if dstPort in (161, 162) or srcPort in (161, 162):
2873
+ snmpSection = decodeSNMP(p)
2874
+ if snmpSection is not None:
2875
+ transportSection["SNMP"] = snmpSection
2876
+ # Decode HTTP on any TCP port — decodeHTTP() returns None for non-HTTP payloads
2877
+ httpSection = decodeHTTP(rawPayload)
2878
+ if httpSection is not None:
2879
+ transportSection["HTTP"] = httpSection
2880
+ # Decode HTTP/2 on any TCP port (preface or binary frame detection)
2881
+ http2Section = decodeHTTP2(rawPayload)
2882
+ if http2Section is not None:
2883
+ transportSection["HTTP2"] = http2Section
2884
+ # Decode FTP on TCP ports 20/21
2885
+ if dstPort in (20, 21) or srcPort in (20, 21):
2886
+ ftpSection = decodeFTP(rawPayload)
2887
+ if ftpSection is not None:
2888
+ transportSection["FTP"] = ftpSection
2889
+ # Decode SMTP on TCP ports 25/587/465
2890
+ if dstPort in (25, 587, 465) or srcPort in (25, 587, 465):
2891
+ smtpSection = decodeSMTP(rawPayload)
2892
+ if smtpSection is not None:
2893
+ transportSection["SMTP"] = smtpSection
2894
+ # Decode POP3/POP on TCP ports 110/995
2895
+ if dstPort in (110, 995) or srcPort in (110, 995):
2896
+ pop3Section = decodePOP3(rawPayload)
2897
+ if pop3Section is not None:
2898
+ transportSection["POP3"] = pop3Section
2899
+ # Decode IMAP/IMAP4 on TCP ports 143/993
2900
+ if dstPort in (143, 993) or srcPort in (143, 993):
2901
+ imapSection = decodeIMAP(rawPayload)
2902
+ if imapSection is not None:
2903
+ transportSection["IMAP"] = imapSection
2904
+ # Decode Telnet on TCP port 23
2905
+ if dstPort == 23 or srcPort == 23:
2906
+ telnetSection = decodeTelnet(rawPayload)
2907
+ if telnetSection is not None:
2908
+ transportSection["Telnet"] = telnetSection
2909
+ # Also scan non-IAC data packets for cleartext credentials
2910
+ telnetCreds = extractTelnetCredentials(rawPayload)
2911
+ if telnetCreds:
2912
+ if "Telnet" not in transportSection:
2913
+ transportSection["Telnet"] = {}
2914
+ transportSection["Telnet"].setdefault("Credentials", {}).update(
2915
+ telnetCreds
2916
+ )
2917
+ # Decode IRC on TCP ports 6667/6668/6669
2918
+ if dstPort in (6667, 6668, 6669) or srcPort in (6667, 6668, 6669):
2919
+ ircSection = decodeIRC(rawPayload)
2920
+ if ircSection is not None:
2921
+ transportSection["IRC"] = ircSection
2922
+ # Decode MTP/MMS on TCP port 1755
2923
+ if dstPort == 1755 or srcPort == 1755:
2924
+ mtpSection = decodeMTP(rawPayload)
2925
+ if mtpSection is not None:
2926
+ transportSection["MTP"] = mtpSection
2927
+ # Decode LDAP on TCP ports 389/636
2928
+ if dstPort in (389, 636) or srcPort in (389, 636):
2929
+ ldapSection = decodeLDAP(rawPayload)
2930
+ if ldapSection is not None:
2931
+ transportSection["LDAP"] = ldapSection
2932
+ # Decode MySQL on TCP port 3306
2933
+ if dstPort == 3306 or srcPort == 3306:
2934
+ mysqlSection = decodeMySQL(rawPayload)
2935
+ if mysqlSection is not None:
2936
+ transportSection["MySQL"] = mysqlSection
2937
+ # Decode PostgreSQL on TCP port 5432
2938
+ if dstPort == 5432 or srcPort == 5432:
2939
+ pgSection = decodePostgreSQL(rawPayload)
2940
+ if pgSection is not None:
2941
+ transportSection["PostgreSQL"] = pgSection
2942
+ # Decode XMPP on TCP ports 5222/5223
2943
+ if dstPort in (5222, 5223) or srcPort in (5222, 5223):
2944
+ xmppSection = decodeXMPP(rawPayload)
2945
+ if xmppSection is not None:
2946
+ transportSection["XMPP"] = xmppSection
2947
+ # Decode SMB on TCP ports 139/445
2948
+ if dstPort in (139, 445) or srcPort in (139, 445):
2949
+ smbSection = decodeSMB(rawPayload)
2950
+ if smbSection is not None:
2951
+ transportSection["SMB"] = smbSection
2952
+ # Decode MQTT on TCP ports 1883/8883
2953
+ if dstPort in (1883, 8883) or srcPort in (1883, 8883):
2954
+ mqttSection = decodeMQTT(rawPayload)
2955
+ if mqttSection is not None:
2956
+ transportSection["MQTT"] = mqttSection
2957
+ # Decode RTSP on TCP port 554
2958
+ if dstPort == 554 or srcPort == 554:
2959
+ rtspSection = decodeRTSP(rawPayload)
2960
+ if rtspSection is not None:
2961
+ transportSection["RTSP"] = rtspSection
2962
+ # Decode BGP on TCP port 179
2963
+ if dstPort == 179 or srcPort == 179:
2964
+ bgpSection = decodeBGP(rawPayload)
2965
+ if bgpSection is not None:
2966
+ transportSection["BGP"] = bgpSection
2967
+ # Decode NNTP on TCP port 119
2968
+ if dstPort == 119 or srcPort == 119:
2969
+ nntpSection = decodeNNTP(rawPayload)
2970
+ if nntpSection is not None:
2971
+ transportSection["NNTP"] = nntpSection
2972
+ # Decode RADIUS on TCP ports 1812/1813/1645/1646 (RFC 6614 defines RADIUS over TCP)
2973
+ if dstPort in (1812, 1813, 1645, 1646) or srcPort in (
2974
+ 1812,
2975
+ 1813,
2976
+ 1645,
2977
+ 1646,
2978
+ ):
2979
+ radiusSection = decodeRADIUS(rawPayload)
2980
+ if radiusSection is not None:
2981
+ transportSection["RADIUS"] = radiusSection
2982
+ protocolKey = "TCP"
2983
+ elif isUdp:
2984
+ # Build UDP section; decode DNS if present
2985
+ dnsSection = None
2986
+ if p.haslayer("DNS"):
2987
+ dnsLayer = p["DNS"]
2988
+ queryNames = []
2989
+ answerNames = []
2990
+ answerIps = []
2991
+ try:
2992
+ qd = dnsLayer.qd
2993
+ while qd is not None and hasattr(qd, "qname"):
2994
+ queryNames.append(
2995
+ qd.qname.decode(errors="ignore").rstrip(".")
2996
+ )
2997
+ qd = qd.payload if hasattr(qd, "payload") else None
2998
+ except Exception:
2999
+ pass
3000
+ try:
3001
+ an = dnsLayer.an
3002
+ while an is not None and hasattr(an, "rrname"):
3003
+ answerNames.append(
3004
+ an.rrname.decode(errors="ignore").rstrip(".")
3005
+ )
3006
+ if hasattr(an, "rdata"):
3007
+ answerIps.append(str(an.rdata))
3008
+ an = an.payload if hasattr(an, "payload") else None
3009
+ except Exception:
3010
+ pass
3011
+ firstQname = queryNames[0] if queryNames else ""
3012
+ firstAip = answerIps[0] if answerIps else ""
3013
+ dnsSection = {
3014
+ "Transaction ID": int(dnsLayer.id),
3015
+ "dns.id": int(dnsLayer.id),
3016
+ "Is Response": bool(dnsLayer.qr),
3017
+ "dns.qr": bool(dnsLayer.qr),
3018
+ "Query Names": queryNames,
3019
+ "dns.qnames": queryNames,
3020
+ "First Query Name": firstQname,
3021
+ "dns.qname": firstQname,
3022
+ "Answer Names": answerNames,
3023
+ "dns.anames": answerNames,
3024
+ "Answer IPs": answerIps,
3025
+ "dns.aips": answerIps,
3026
+ "First Answer IP": firstAip,
3027
+ "dns.aip": firstAip,
3028
+ "Question Count": int(dnsLayer.qdcount),
3029
+ "dns.qdcount": int(dnsLayer.qdcount),
3030
+ "Answer Count": int(dnsLayer.ancount),
3031
+ "dns.ancount": int(dnsLayer.ancount),
3032
+ }
3033
+
3034
+ transportSection = {
3035
+ "Source port": int(srcPort),
3036
+ "udp.src.port": int(srcPort),
3037
+ "Destination port": int(dstPort),
3038
+ "udp.dst.port": int(dstPort),
3039
+ "UDP checksum": hex(int(p["UDP"].chksum)),
3040
+ "udp.chksum": hex(int(p["UDP"].chksum)),
3041
+ "UDP length": int(p["UDP"].len),
3042
+ "udp.len": int(p["UDP"].len),
3043
+ "Wire length": len(p["UDP"]),
3044
+ "wire.len": len(p["UDP"]),
3045
+ }
3046
+ if dnsSection is not None:
3047
+ transportSection["DNS"] = dnsSection
3048
+ # Decode SNMP on UDP ports 161/162
3049
+ if dstPort in (161, 162) or srcPort in (161, 162):
3050
+ snmpSection = decodeSNMP(p)
3051
+ if snmpSection is not None:
3052
+ transportSection["SNMP"] = snmpSection
3053
+ # Decode DHCP on UDP ports 67/68
3054
+ if dstPort in (67, 68) or srcPort in (67, 68):
3055
+ dhcpSection = decodeDHCP(p)
3056
+ if dhcpSection is not None:
3057
+ transportSection["DHCP"] = dhcpSection
3058
+ # Decode NTP on UDP port 123
3059
+ if dstPort == 123 or srcPort == 123:
3060
+ ntpSection = decodeNTP(p)
3061
+ if ntpSection is not None:
3062
+ transportSection["NTP"] = ntpSection
3063
+ # Decode SIP on UDP ports 5060/5061
3064
+ if dstPort in (5060, 5061) or srcPort in (5060, 5061):
3065
+ sipSection = decodeSIP(rawPayload)
3066
+ if sipSection is not None:
3067
+ transportSection["SIP"] = sipSection
3068
+ # Decode TFTP on UDP port 69
3069
+ if dstPort == 69 or srcPort == 69:
3070
+ tftpSection = decodeTFTP(rawPayload)
3071
+ if tftpSection is not None:
3072
+ transportSection["TFTP"] = tftpSection
3073
+ # Decode MQTT on UDP ports 1883/8883
3074
+ if dstPort in (1883, 8883) or srcPort in (1883, 8883):
3075
+ mqttSection = decodeMQTT(rawPayload)
3076
+ if mqttSection is not None:
3077
+ transportSection["MQTT"] = mqttSection
3078
+ # Decode LDAP on UDP ports 389/636
3079
+ if dstPort in (389, 636) or srcPort in (389, 636):
3080
+ ldapSection = decodeLDAP(rawPayload)
3081
+ if ldapSection is not None:
3082
+ transportSection["LDAP"] = ldapSection
3083
+ # Decode RADIUS on UDP ports 1812/1813/1645/1646
3084
+ if dstPort in (1812, 1813, 1645, 1646) or srcPort in (
3085
+ 1812,
3086
+ 1813,
3087
+ 1645,
3088
+ 1646,
3089
+ ):
3090
+ radiusSection = decodeRADIUS(rawPayload)
3091
+ if radiusSection is not None:
3092
+ transportSection["RADIUS"] = radiusSection
3093
+ protocolKey = "UDP"
3094
+ else:
3095
+ # ICMP transport section
3096
+ icmpLayer = p["ICMP"]
3097
+ icmpTypeMap = {
3098
+ 0: "Echo Reply",
3099
+ 3: "Destination Unreachable",
3100
+ 4: "Source Quench",
3101
+ 5: "Redirect",
3102
+ 8: "Echo Request",
3103
+ 9: "Router Advertisement",
3104
+ 10: "Router Solicitation",
3105
+ 11: "Time Exceeded",
3106
+ 12: "Parameter Problem",
3107
+ 13: "Timestamp",
3108
+ 14: "Timestamp Reply",
3109
+ 15: "Information Request",
3110
+ 16: "Information Reply",
3111
+ }
3112
+ icmpType = int(icmpLayer.type) if hasattr(icmpLayer, "type") else 0
3113
+ icmpCode = int(icmpLayer.code) if hasattr(icmpLayer, "code") else 0
3114
+ icmpTypeStr = icmpTypeMap.get(icmpType, f"Type {icmpType}")
3115
+ icmpId = "N/A"
3116
+ icmpSeq = "N/A"
3117
+ try:
3118
+ icmpId = int(icmpLayer.id)
3119
+ except Exception:
3120
+ pass
3121
+ try:
3122
+ icmpSeq = int(icmpLayer.seq)
3123
+ except Exception:
3124
+ pass
3125
+ icmpChksum = "N/A"
3126
+ try:
3127
+ icmpChksum = hex(int(icmpLayer.chksum))
3128
+ except Exception:
3129
+ pass
3130
+ transportSection = {
3131
+ "Type": icmpTypeStr,
3132
+ "icmp.type": icmpTypeStr,
3133
+ "Code": icmpCode,
3134
+ "icmp.code": icmpCode,
3135
+ "ID": icmpId,
3136
+ "icmp.id": icmpId,
3137
+ "Sequence": icmpSeq,
3138
+ "icmp.seq": icmpSeq,
3139
+ "ICMP Checksum": icmpChksum,
3140
+ "icmp.chksum": icmpChksum,
3141
+ "Wire length": len(p["ICMP"]),
3142
+ "wire.len": len(p["ICMP"]),
3143
+ }
3144
+ protocolKey = "ICMP"
3145
+
3146
+ packetInfo = {
3147
+ "Packet Processed": int(packetIndex),
3148
+ "Packet Timestamp": timestamp,
3149
+ "packet.timestamp": timestamp,
3150
+ "Protocol": protocolKey,
3151
+ "packet.proto": protocolKey,
3152
+ # Include Ethernet MAC data when at least one IP is local (private),
3153
+ # so that mixed private+internet traffic still exposes the local device's MAC.
3154
+ "Ethernet Frame": {
3155
+ "MAC Source": srcMacAddr,
3156
+ "ether.src.mac.addr": srcMacAddr,
3157
+ "MAC Destination": dstMacAddr,
3158
+ "ether.dst.mac.addr": dstMacAddr,
3159
+ "MAC Source Vendor": srcMacVendor,
3160
+ "ether.src.mac.vendor": srcMacVendor,
3161
+ "MAC Destination Vendor": dstMacVendor,
3162
+ "ether.dst.mac.vendor": dstMacVendor,
3163
+ }
3164
+ if (
3165
+ srcGeoInfo.get("Location") == "Localnet"
3166
+ or dstGeoInfo.get("Location") == "Localnet"
3167
+ )
3168
+ else "N/A",
3169
+ "IP": {
3170
+ "Source IP": str(p["IP"].src),
3171
+ "ip.src.addr": str(p["IP"].src),
3172
+ "Destination IP": str(p["IP"].dst),
3173
+ "ip.dst.addr": str(p["IP"].dst),
3174
+ "IP Checksum": hex(int(p["IP"].chksum)),
3175
+ "ip.chksum": hex(int(p["IP"].chksum)),
3176
+ "IP layer length": int(p["IP"].len),
3177
+ "ip.len": int(p["IP"].len),
3178
+ },
3179
+ protocolKey: transportSection,
3180
+ "Raw data": {
3181
+ "Payload": {
3182
+ "Hex Encoded": rawPayload.hex(),
3183
+ "payload.hex": rawPayload.hex(),
3184
+ "ASCII Encoded": rawPayload.decode(errors="ignore"),
3185
+ "payload.ascii": rawPayload.decode(errors="ignore"),
3186
+ },
3187
+ "Packet": bytes(p).hex(),
3188
+ "packet.hex": bytes(p).hex(),
3189
+ "Payload Length": len(rawPayload),
3190
+ "payload.len": len(rawPayload),
3191
+ },
3192
+ }
3193
+ # Use the non-local IP as the host key; fall back to src for LAN captures
3194
+ hostKey = (
3195
+ p["IP"].dst if dstGeoInfo.get("Location") != "Localnet" else p["IP"].src
3196
+ )
3197
+ mergedInfo = joinInfo(
3198
+ outputDir,
3199
+ dstPortStr,
3200
+ packetIndex,
3201
+ json.dumps(dataTypeInfo).encode(),
3202
+ json.dumps(packetInfo).encode(),
3203
+ hostKey,
3204
+ )
3205
+ return mergedInfo
3206
+
3207
+
3208
+ def processPacketAtIndex(packetIndex, srcPortFilter, dstPortFilter, timeout):
3209
+ """
3210
+ Thin wrapper used by ThreadPoolExecutor.map so we can pass a single (index, packet)
3211
+ task without pickling scapy packet objects. The global `packets` list is already
3212
+ loaded in memory, so this is just a cheap indexed lookup + the real per-packet work.
3213
+ """
3214
+ if stopEvent.is_set():
3215
+ return None
3216
+ p = packets[packetIndex]
3217
+ return packetLoop(p, packetIndex, srcPortFilter, dstPortFilter, timeout)
3218
+
3219
+
3220
+ llmSummariesBatch = []
3221
+
3222
+
3223
+ def infoDistiller(batchSize):
3224
+ """
3225
+ lines: iterable of input data
3226
+ worker_fn: function that takes a list (batch) and processes it
3227
+ batchSize: number of items per batch
3228
+ maxWorkers: number of threads
3229
+ """
3230
+ print("Starting LLM calls...")
3231
+ maxWorkers = 4
3232
+ jsonStack = allPacketInfo
3233
+
3234
+ def chunker(iterable, size):
3235
+ for i in range(0, len(iterable), size):
3236
+ yield iterable[i : i + size]
3237
+
3238
+ packetBatches = list(chunker(jsonStack, batchSize))
3239
+ results = []
3240
+
3241
+ with ThreadPoolExecutor(max_workers=maxWorkers) as executor:
3242
+ taskFutures = [executor.submit(llmBrief, batch) for batch in packetBatches]
3243
+
3244
+ for future in as_completed(taskFutures):
3245
+ try:
3246
+ results.append(future.result())
3247
+ except Exception as e:
3248
+ print(f"Batch failed: {e}")
3249
+
3250
+ return results
3251
+
3252
+
3253
+ def popDictKey(obj, keyToRemove):
3254
+ if isinstance(obj, dict):
3255
+ # Create a new dict to avoid modifying while iterating
3256
+ return {
3257
+ k: popDictKey(v, keyToRemove) for k, v in obj.items() if k != keyToRemove
3258
+ }
3259
+ elif isinstance(obj, list):
3260
+ return [popDictKey(item, keyToRemove) for item in obj]
3261
+ else:
3262
+ return obj
3263
+
3264
+
3265
+ def llmBrief(jsonBatch):
3266
+ """
3267
+ Strip raw payload bytes (to keep the prompt short) and send a batch of packet
3268
+ metadata to the LLM for summarisation. Appends the response to llmSummaries.
3269
+ """
3270
+ strippedJson = popDictKey(jsonBatch, "Raw data")
3271
+ packetInfoStr = json.dumps(strippedJson)
3272
+ llmResponse = ollama.generate(
3273
+ model=llmModelName,
3274
+ prompt="Provide a concise summary of the following packets, in paragraph form, limited to three paragraphs: "
3275
+ + packetInfoStr,
3276
+ )
3277
+ if llmResponse and "response" in llmResponse:
3278
+ with llmSummariesLock:
3279
+ llmSummaries.append(llmResponse["response"])
3280
+ return llmResponse["response"]
3281
+
3282
+
3283
+ def startThreading():
3284
+ """
3285
+ Process all TCP, UDP, and ICMP packets from the pre-loaded `packets` list using a
3286
+ ThreadPoolExecutor with chunked processing for reduced overhead.
3287
+
3288
+ Rather than re-reading the pcap file in every thread (which was the old behaviour),
3289
+ this submits chunked tasks to reduce thread scheduling overhead. ThreadPoolExecutor
3290
+ handles work-stealing, so threads stay busy even if individual packets take different
3291
+ amounts of time (e.g. when active-recon network calls vary in latency).
3292
+ """
3293
+ if __name__ == "__main__":
3294
+ print(
3295
+ f"Spooling up {numWorkerThreads} worker threads to process {totalPackets} packets...",
3296
+ file=sys.stderr,
3297
+ )
3298
+ # Build the list of packet indices that belong to TCP, UDP, or ICMP packets
3299
+ packetIndices = [
3300
+ i
3301
+ for i, p in enumerate(packets)
3302
+ if p.haslayer("TCP") or p.haslayer("UDP") or p.haslayer("ICMP")
3303
+ ]
3304
+
3305
+ # Chunk packets to reduce thread scheduling overhead
3306
+ chunkSize = max(1, len(packetIndices) // (numWorkerThreads * 4))
3307
+ packetChunks = [
3308
+ packetIndices[i : i + chunkSize]
3309
+ for i in range(0, len(packetIndices), chunkSize)
3310
+ ]
3311
+
3312
+ def processChunk(chunk):
3313
+ """Process a chunk of packet indices."""
3314
+ results = []
3315
+ for idx in chunk:
3316
+ if stopEvent.is_set():
3317
+ break
3318
+ result = processPacketAtIndex(
3319
+ idx, args.source_port, args.dest_port, args.timeout
3320
+ )
3321
+ if result:
3322
+ results.append(result)
3323
+ return results
3324
+
3325
+ with ThreadPoolExecutor(max_workers=numWorkerThreads) as executor:
3326
+ taskFutures = {
3327
+ executor.submit(processChunk, chunk): chunk for chunk in packetChunks
3328
+ }
3329
+ for future in as_completed(taskFutures):
3330
+ if stopEvent.is_set():
3331
+ break
3332
+ try:
3333
+ future.result()
3334
+ except Exception as exc:
3335
+ if verbose >= 1:
3336
+ print(
3337
+ f"Packet {taskFutures[future]} raised an exception: {exc}",
3338
+ file=sys.stderr,
3339
+ )
3340
+
3341
+
3342
+ parser = argparse.ArgumentParser(
3343
+ prog="snitch.py",
3344
+ formatter_class=argparse.RawDescriptionHelpFormatter,
3345
+ description=textwrap.dedent(
3346
+ f"""
3347
+ PacketSnitch.
3348
+ This software analyzes pcap network captures. It extracts TCP and UDP packet data,
3349
+ writes testcases, and gathers extra information such as MIME types, entropy, geoip,
3350
+ network class, banners, and more. DNS packets (UDP port 53) are decoded and included
3351
+ in the output. Optionally, it performs active reconnaissance to enrich the output
3352
+ with additional network and server information. A full capture summary is generated
3353
+ using a large language model to provide insights into the data.
3354
+ Outputs:
3355
+ - Testcase files: outputDirPath/<dest_port>/pcap.data_packet.<index>.dat
3356
+ - Testcase info: outputDirPath/<dest_port>/pcap.info_packet.<index>.json
3357
+ - all_testcases_info.json: a consolidated file with info for the entire
3358
+ capture.
3359
+ """,
3360
+ ),
3361
+ epilog="Example usage: \n python3 snitch.py traffic.pcap -o outputDirPath -s 80 -d 8080 -T 5 -a",
3362
+ ) # ignore fstring
3363
+ parser.add_argument("pcap_file", help="The .pcap file to parse.")
3364
+ parser.add_argument(
3365
+ "-o",
3366
+ "--output",
3367
+ help="The output directory for the testcases.",
3368
+ default="testcases",
3369
+ )
3370
+ parser.add_argument(
3371
+ "-s",
3372
+ "--source-port",
3373
+ help="Only generate from this source port.",
3374
+ type=int,
3375
+ )
3376
+ parser.add_argument(
3377
+ "-d",
3378
+ "--dest-port",
3379
+ help="Only generate for this destination port.",
3380
+ type=int,
3381
+ )
3382
+ parser.add_argument(
3383
+ "-T",
3384
+ "--timeout",
3385
+ help="Timeout for network requests in seconds (default: 3).",
3386
+ type=int,
3387
+ default=3,
3388
+ )
3389
+ parser.add_argument(
3390
+ "-a",
3391
+ "--active-recon",
3392
+ help="Perform active reconnaissance to gather extra info (geoip, banners, titles).",
3393
+ action="store_true",
3394
+ )
3395
+ parser.add_argument(
3396
+ "-c",
3397
+ "--conf",
3398
+ help="Path to configuration YAML file (default: conf.yaml).",
3399
+ )
3400
+ parser.add_argument(
3401
+ "-v",
3402
+ "--verbose",
3403
+ help="Enable verbose output for debugging.",
3404
+ action="count",
3405
+ default=0,
3406
+ )
3407
+ parser.add_argument(
3408
+ "--nollm",
3409
+ help="Disable LLM summarisation regardless of configuration.",
3410
+ action="store_true",
3411
+ )
3412
+ verbose = parser.parse_args().verbose
3413
+ args = parser.parse_args() # parse once; verbose is needed by functions defined above
3414
+ try:
3415
+ config = configLoader(args.conf if args.conf else "conf.yaml")
3416
+ # this next exception handles if ther is no config file
3417
+ # these are default opts that should work decently
3418
+ except Exception:
3419
+ config = {
3420
+ "active_recon": True,
3421
+ "ollama": {
3422
+ "use_llm": True,
3423
+ "llm_brief": True,
3424
+ "model": "minimax-m2.5:cloud",
3425
+ "response_length": 340,
3426
+ "server_call_threads": 5,
3427
+ "batch_size": 65,
3428
+ },
3429
+ "threads": 16,
3430
+ "final_summary": True,
3431
+ }
3432
+ pcapFilePath = args.pcap_file
3433
+ geoDbPath = scriptDir + "common/GeoLite2-City.mmdb"
3434
+ macVendorsPath = scriptDir + "common/mac-vendors-export.csv"
3435
+ icannCsvPath = scriptDir + "common/service-names-port-numbers.csv"
3436
+
3437
+ # --- Open the GeoIP database once for the lifetime of the process.
3438
+ # The geoip2 Reader is documented as thread-safe for concurrent city() calls.
3439
+ if os.path.exists(geoDbPath):
3440
+ geoIpReader = geoip2.database.Reader(geoDbPath)
3441
+ else:
3442
+ print("Warning: GeoIP database not found at " + geoDbPath, file=sys.stderr)
3443
+
3444
+ # --- Load ICANN port-description CSV into a dict for O(1) per-packet lookups.
3445
+ # Without this, every call to getPortDescription() would scan the full CSV.
3446
+ if os.path.exists(icannCsvPath):
3447
+ with open(icannCsvPath, newline="", encoding="utf-8") as csvFile:
3448
+ for csvRow in csv.DictReader(csvFile):
3449
+ try:
3450
+ portNum = int(csvRow.get("Port Number", ""))
3451
+ protoStr = csvRow.get("Transport Protocol", "").strip().lower()
3452
+ portDescription = csvRow.get("Description", "No description available")
3453
+ if portNum and protoStr:
3454
+ portDescriptionMap[(portNum, protoStr)] = portDescription
3455
+ except (ValueError, TypeError):
3456
+ pass
3457
+ else:
3458
+ print("Warning: ICANN port CSV not found at " + icannCsvPath, file=sys.stderr)
3459
+
3460
+ # --- Load MAC vendor CSV into a dict for O(1) per-packet lookups.
3461
+ # Without this, every call to macAddrToVendor() would scan the full CSV.
3462
+ if os.path.exists(macVendorsPath):
3463
+ with open(macVendorsPath, newline="", encoding="utf-8") as csvFile:
3464
+ for csvRow in csv.DictReader(csvFile):
3465
+ if "Mac Prefix" in csvRow and "Vendor Name" in csvRow:
3466
+ macVendorMap[csvRow["Mac Prefix"].upper()] = csvRow["Vendor Name"]
3467
+ else:
3468
+ print("Warning: MAC vendor CSV not found at " + macVendorsPath, file=sys.stderr)
3469
+
3470
+ totalPackets = 0
3471
+ packets = scapy.rdpcap(args.pcap_file) # type: ignore
3472
+ tcpStreamInitialDstPortMap = buildTcpStreamInitialDstPortMap(packets)
3473
+ allPacketCount = len(packets)
3474
+ llmBatchSize = 0
3475
+ totalPackets = len(
3476
+ [p for p in packets if p.haslayer("TCP") or p.haslayer("UDP") or p.haslayer("ICMP")]
3477
+ )
3478
+ if totalPackets == 0:
3479
+ print("No TCP, UDP, or ICMP packets found in the capture.", file=sys.stderr)
3480
+ sys.exit(1)
3481
+ if "threads" in config and config["threads"]:
3482
+ numWorkerThreads = config["threads"]
3483
+ outputDir = currentDir + "/" + "testcases"
3484
+ if args.output and args.output != "testcases":
3485
+ outputDir = args.output
3486
+ print("Using output directory: " + args.output, file=sys.stderr)
3487
+ if "output_dir" in config:
3488
+ outputDir = currentDir + "/" + config["output_dir"]
3489
+ print("Using output directory from config: " + outputDir, file=sys.stderr)
3490
+ if not args.active_recon:
3491
+ if config["active_recon"]:
3492
+ activeRecon = config["active_recon"]
3493
+ else:
3494
+ activeRecon = False
3495
+ if "ollama" in config and config["ollama"].get("model"):
3496
+ if config["ollama"].get("use_llm", False) and verbose >= 1:
3497
+ print(
3498
+ "LLM integration enabled. Using model: " + config["ollama"]["model"] + ".",
3499
+ file=sys.stderr,
3500
+ )
3501
+ llmModelName = config["ollama"]["model"]
3502
+ if config["ollama"]["llm_brief"]:
3503
+ print(
3504
+ "LLM brief generation enabled. Only packet metadata will be sent through the LLM.",
3505
+ file=sys.stderr,
3506
+ )
3507
+ else:
3508
+ print(
3509
+ "LLM brief generation disabled. LLM will be used for full data packets! This will take significantly more time, but will provide more detailed llmSummaries for each packet.",
3510
+ file=sys.stderr,
3511
+ )
3512
+ llmResponseLength = config["ollama"].get("response_length", 200)
3513
+ llmBatchSize = config["ollama"].get("batch_size", 65)
3514
+ useLlm = config["ollama"].get("use_llm", False)
3515
+ if args.nollm:
3516
+ useLlm = False
3517
+ config = {
3518
+ "active_recon": True,
3519
+ "ollama": {
3520
+ "use_llm": False,
3521
+ "llm_brief": False,
3522
+ },
3523
+ "threads": 16,
3524
+ "final_summary": False,
3525
+ }
3526
+
3527
+
3528
+ if llmModelName and useLlm:
3529
+ if llmModelName.endswith(":cloud"):
3530
+ if verbose >= 2:
3531
+ print(
3532
+ "Using cloud-based LLM model: "
3533
+ + "minimax-m2.5:cloud" # doesn't need to be that fast, but has to look decent
3534
+ + ". Ensure you have network connectivity and API access.",
3535
+ file=sys.stderr,
3536
+ )
3537
+ print(
3538
+ "Preparing to process "
3539
+ + str(totalPackets)
3540
+ + " TCP/UDP/ICMP packets with "
3541
+ + str(numWorkerThreads)
3542
+ + " threads.",
3543
+ file=sys.stderr,
3544
+ )
3545
+ if not os.path.exists(args.pcap_file):
3546
+ print("The .pcap file does not exist.", file=sys.stderr)
3547
+ sys.exit(1)
3548
+ try:
3549
+ if os.path.isdir(outputDir):
3550
+ shutil.rmtree(outputDir, ignore_errors=True)
3551
+ # Small delay to ensure file system has completed deletions
3552
+ time.sleep(1)
3553
+ os.makedirs(outputDir, exist_ok=True)
3554
+ try:
3555
+ threadingResult = startThreading()
3556
+ except Exception as startErr:
3557
+ print(
3558
+ f"Warning: startThreading raised an exception ({startErr}); retrying.",
3559
+ file=sys.stderr,
3560
+ )
3561
+ threadingResult = startThreading()
3562
+ finally:
3563
+ finalSummary = ""
3564
+ if config["ollama"]["llm_brief"] != True and useLlm:
3565
+ infoDistiller(llmBatchSize)
3566
+ else:
3567
+ # Strip raw payload bytes before sending to the LLM to keep the prompt small,
3568
+ # then restore the full allPacketInfo for the hosts.json output.
3569
+ allPacketInfoBackup = allPacketInfo.copy()
3570
+ strippedPacketInfo = popDictKey(allPacketInfo, "Raw data")
3571
+ allPacketInfo = strippedPacketInfo
3572
+ if allPacketInfo and useLlm:
3573
+ print("Generating LLM brief for batch of packets...")
3574
+ infoDistiller(50)
3575
+ allPacketInfo = allPacketInfoBackup
3576
+
3577
+ if config.get("final_summary", True) and config["ollama"].get("use_llm", True):
3578
+ joinedSummaries = (
3579
+ " ".join(llmSummaries) if llmSummaries else "No LLM summaries generated."
3580
+ )
3581
+ try:
3582
+ finalLlmResponse = ollama.generate(
3583
+ model=llmModelName,
3584
+ prompt="Provide a concise summary of the following packets, in paragraph form, limited to three paragraphs: "
3585
+ + joinedSummaries,
3586
+ )
3587
+ finalSummary = finalLlmResponse["response"]
3588
+ with open(
3589
+ outputDir + "/final_summary.txt", "w", encoding="utf-8"
3590
+ ) as summaryFile:
3591
+ summaryFile.write(finalSummary)
3592
+ print("\n" + finalSummary)
3593
+ print("\nFinal summary saved to: " + outputDir + "/final_summary.txt")
3594
+ except Exception as e:
3595
+ print("\nLLM Final summary generation error: " + str(e))
3596
+
3597
+ # Always write hosts.json so the frontend can load data regardless of
3598
+ # whether LLM summarisation was enabled or succeeded.
3599
+ byHost(outputDir, finalSummary)
3600
+
3601
+ # Close the GeoIP reader now that all packets have been processed
3602
+ if geoIpReader is not None:
3603
+ geoIpReader.close()
3604
+
3605
+ print(
3606
+ "Processing complete. Generated testcases and info files are located in: "
3607
+ + outputDir,
3608
+ file=sys.stderr,
3609
+ )
3610
+
3611
+ sys.exit(0)