FlowAnalyzer 0.4.4__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -199,9 +199,7 @@ class FlowAnalyzer:
199
199
  "-e",
200
200
  "http.request.full_uri", # 7
201
201
  "-e",
202
- "http.file_data", # 8
203
- "-e",
204
- "tcp.segment.count", # 9
202
+ "tcp.segment.count", # 8
205
203
  "-E",
206
204
  "header=n",
207
205
  "-E",
@@ -9,7 +9,7 @@ from .logging_config import logger
9
9
 
10
10
  class PacketParser:
11
11
  @staticmethod
12
- def parse_packet_data(row: list) -> Tuple[int, int, float, str, bytes, bytes]:
12
+ def parse_packet_data(row: list) -> Tuple[int, int, float, str, bytes]:
13
13
  """
14
14
  解析 Tshark 输出的一行数据
15
15
  row definition (all bytes):
@@ -21,18 +21,17 @@ class PacketParser:
21
21
  5: frame.time_epoch
22
22
  6: exported_pdu.exported_pdu
23
23
  7: http.request.full_uri
24
- 8: http.file_data
25
- 9: tcp.segment.count
24
+ 8: tcp.segment.count
26
25
  """
27
26
  frame_num = int(row[3])
28
27
  request_in = int(row[1]) if row[1] else frame_num
29
28
  # Decode only URI to string
30
29
  full_uri = parse.unquote(row[7].decode("utf-8", errors="replace")) if row[7] else ""
31
30
  time_epoch = float(row[5])
32
- http_file_data = row[8] if len(row) > 8 else b""
33
31
 
34
32
  # Logic for Raw Packet (Header Source)
35
- is_reassembled = len(row) > 9 and row[9]
33
+ # Previous index 9 is now 8 since we removed http.file_data
34
+ is_reassembled = len(row) > 8 and row[8]
36
35
 
37
36
  if is_reassembled and row[2]:
38
37
  full_request = row[2]
@@ -42,16 +41,18 @@ class PacketParser:
42
41
  # Fallback (e.g. Exported PDU)
43
42
  full_request = row[2] if row[2] else (row[6] if row[6] else b"")
44
43
 
45
- return frame_num, request_in, time_epoch, full_uri, full_request, http_file_data
44
+ return frame_num, request_in, time_epoch, full_uri, full_request
46
45
 
47
46
  @staticmethod
48
47
  def split_http_headers(file_data: bytes) -> Tuple[bytes, bytes]:
49
48
  headerEnd = file_data.find(b"\r\n\r\n")
50
49
  if headerEnd != -1:
51
50
  return file_data[: headerEnd + 4], file_data[headerEnd + 4 :]
52
- elif file_data.find(b"\n\n") != -1:
53
- headerEnd = file_data.index(b"\n\n") + 2
54
- return file_data[:headerEnd], file_data[headerEnd:]
51
+
52
+ headerEnd = file_data.find(b"\n\n")
53
+ if headerEnd != -1:
54
+ return file_data[: headerEnd + 2], file_data[headerEnd + 2 :]
55
+
55
56
  return b"", file_data
56
57
 
57
58
  @staticmethod
@@ -67,20 +68,36 @@ class PacketParser:
67
68
  while cursor < total_len:
68
69
  newline_idx = file_data.find(b"\n", cursor)
69
70
  if newline_idx == -1:
71
+ # If no newline found, maybe it's just remaining data (though strictly should end with 0 chunk)
72
+ # But for robustness we might perform a "best effort" or just stop.
73
+ # raising ValueError("Not chunked data") might be too aggressive if we are just "trying" to dechunk
74
+ # Let's assume non-chunked if strict format not found
70
75
  raise ValueError("Not chunked data")
71
76
 
72
77
  size_line = file_data[cursor:newline_idx].strip()
78
+ # Handle chunk extension: ignore everything after ';'
79
+ if b";" in size_line:
80
+ size_line = size_line.split(b";", 1)[0].strip()
81
+
73
82
  if not size_line:
74
83
  cursor = newline_idx + 1
75
84
  continue
76
85
 
77
- chunk_size = int(size_line, 16)
86
+ try:
87
+ chunk_size = int(size_line, 16)
88
+ except ValueError:
89
+ raise ValueError("Invalid chunk size")
90
+
78
91
  if chunk_size == 0:
79
92
  break
80
93
 
81
94
  data_start = newline_idx + 1
82
95
  data_end = data_start + chunk_size
83
96
 
97
+ # Robustness check
98
+ if data_start > total_len:
99
+ break
100
+
84
101
  if data_end > total_len:
85
102
  chunks.append(file_data[data_start:])
86
103
  break
@@ -88,51 +105,38 @@ class PacketParser:
88
105
  chunks.append(file_data[data_start:data_end])
89
106
 
90
107
  cursor = data_end
108
+ # Skip CRLF after chunk data
91
109
  while cursor < total_len and file_data[cursor] in (13, 10):
92
110
  cursor += 1
93
111
 
94
112
  return b"".join(chunks)
95
113
 
96
114
  @staticmethod
97
- def extract_http_file_data(full_request: bytes, http_file_data: bytes) -> Tuple[bytes, bytes]:
115
+ def extract_http_file_data(full_request: bytes) -> Tuple[bytes, bytes]:
98
116
  """
99
117
  提取HTTP请求或响应中的文件数据 (混合模式 - 二进制优化版)
100
118
  """
101
119
  header = b""
102
120
  file_data = b""
103
121
 
122
+ if not full_request:
123
+ return b"", b""
104
124
  try:
105
- # --- 1. 提取 Header ---
106
- if full_request:
107
- raw_bytes = binascii.unhexlify(full_request)
108
- h_part, _ = PacketParser.split_http_headers(raw_bytes)
109
- header = h_part
110
-
111
- # --- 2. 提取 Body ---
112
- if http_file_data:
113
- try:
114
- file_data = binascii.unhexlify(http_file_data)
115
- return header, file_data
116
- except binascii.Error:
117
- logger.warning("解析 http.file_data Hex 失败,尝试回退到原始方式")
118
-
119
- # --- 3. 回退模式 (Fallback) ---
120
- if full_request and not file_data:
121
- raw_bytes = binascii.unhexlify(full_request)
122
- _, body_part = PacketParser.split_http_headers(raw_bytes)
123
-
124
- with contextlib.suppress(Exception):
125
- body_part = PacketParser.dechunk_http_response(body_part)
126
-
127
- with contextlib.suppress(Exception):
128
- if body_part.startswith(b"\x1f\x8b"):
129
- body_part = gzip.decompress(body_part)
130
-
131
- file_data = body_part
125
+ raw_bytes = binascii.unhexlify(full_request)
126
+ header, body_part = PacketParser.split_http_headers(raw_bytes)
127
+
128
+ with contextlib.suppress(Exception):
129
+ body_part = PacketParser.dechunk_http_response(body_part)
130
+
131
+ with contextlib.suppress(Exception):
132
+ if body_part.startswith(b"\x1f\x8b"):
133
+ body_part = gzip.decompress(body_part)
134
+
135
+ file_data = body_part
132
136
  return header, file_data
133
137
 
134
- except ValueError as e:
135
- logger.error(f"Hex转换失败: {str(e)[:100]}...")
138
+ except binascii.Error:
139
+ logger.error("Hex转换失败")
136
140
  return b"", b""
137
141
  except Exception as e:
138
142
  logger.error(f"解析HTTP数据未知错误: {e}")
@@ -149,12 +153,12 @@ class PacketParser:
149
153
 
150
154
  row = line.split(b"\t")
151
155
  try:
152
- frame_num, request_in, time_epoch, full_uri, full_request, http_file_data = PacketParser.parse_packet_data(row)
156
+ frame_num, request_in, time_epoch, full_uri, full_request = PacketParser.parse_packet_data(row)
153
157
 
154
- if not full_request and not http_file_data:
158
+ if not full_request:
155
159
  return None
156
160
 
157
- header, file_data = PacketParser.extract_http_file_data(full_request, http_file_data)
161
+ header, file_data = PacketParser.extract_http_file_data(full_request)
158
162
 
159
163
  # row[0] is http.response.code (bytes)
160
164
  is_response = bool(row[0])
@@ -0,0 +1,128 @@
1
+ import os
2
+ import time
3
+ from collections import defaultdict
4
+ from typing import List, Tuple
5
+
6
+ import dpkt
7
+
8
+
9
+ class PcapSplitter:
10
+ """
11
+ Encapsulates logic to split a PCAP/PCAPNG file into multiple smaller PCAP files
12
+ based on TCP flows, dynamically balanced for parallel processing.
13
+ """
14
+
15
+ def __init__(self, pcap_file: str, output_dir: str):
16
+ self.pcap_file = pcap_file
17
+ self.output_dir = output_dir
18
+
19
+ def get_stream_key(self, tcp, ip) -> Tuple:
20
+ """Generate a 5-tuple key for the flow."""
21
+ src = ip.src
22
+ dst = ip.dst
23
+ sport = tcp.sport
24
+ dport = tcp.dport
25
+ # Canonicalize bidirectional flows to the same key
26
+ key1 = (src, dst, sport, dport)
27
+ key2 = (dst, src, dport, sport)
28
+ return key1 if key1 < key2 else key2
29
+
30
+ def split(self, threshold_mb: int = 10, default_chunks: int = 3) -> List[str]:
31
+ """
32
+ Split the pcap file into balanced chunks based on stream volume (bytes).
33
+ Uses a Greedy Partition Algorithm (Longest Processing Time first).
34
+
35
+ Args:
36
+ threshold_mb: File size threshold in MB. If smaller, do not split.
37
+ default_chunks: Number of chunks to split into if threshold is exceeded.
38
+
39
+ Returns:
40
+ List of generated file paths (or original file if not split).
41
+ """
42
+ if not os.path.exists(self.pcap_file):
43
+ raise FileNotFoundError(f"PCAP file not found: {self.pcap_file}")
44
+
45
+ file_size_mb = os.path.getsize(self.pcap_file) / (1024 * 1024)
46
+ if file_size_mb < threshold_mb:
47
+ print(f"File size {file_size_mb:.2f}MB < {threshold_mb}MB. Skipping split.")
48
+ return [self.pcap_file]
49
+
50
+ os.makedirs(self.output_dir, exist_ok=True)
51
+
52
+ start_time = time.time()
53
+ # Dictionary to store packets: stream_key -> list of (ts, buf)
54
+ streams = defaultdict(list)
55
+ # Dictionary to store total size: stream_key -> total_bytes
56
+ stream_sizes = defaultdict(int)
57
+
58
+ # 1. Read and Group Packets
59
+ print(f"Reading {self.pcap_file}...")
60
+ with open(self.pcap_file, "rb") as f:
61
+ if self.pcap_file.lower().endswith(".pcapng"):
62
+ reader = dpkt.pcapng.Reader(f)
63
+ else:
64
+ reader = dpkt.pcap.Reader(f)
65
+
66
+ for ts, buf in reader:
67
+ try:
68
+ eth = dpkt.ethernet.Ethernet(buf)
69
+ if not isinstance(eth.data, dpkt.ip.IP):
70
+ continue
71
+ ip = eth.data
72
+ if not isinstance(ip.data, dpkt.tcp.TCP):
73
+ continue
74
+ tcp = ip.data
75
+
76
+ key = self.get_stream_key(tcp, ip)
77
+ streams[key].append((ts, buf))
78
+ stream_sizes[key] += len(buf)
79
+ except Exception:
80
+ continue
81
+
82
+ total_streams = len(streams)
83
+ print(f"Found {total_streams} TCP streams.")
84
+
85
+ if total_streams == 0:
86
+ print("No TCP streams found to split.")
87
+ return []
88
+
89
+ # 2. Assign Streams to Buckets (Greedy LPT Algorithm)
90
+ num_chunks = min(default_chunks, total_streams)
91
+
92
+ # Sort streams by size (descending)
93
+ sorted_streams = sorted(stream_sizes.items(), key=lambda item: item[1], reverse=True)
94
+
95
+ # Buckets: list of (current_size, batch_index, list_of_keys)
96
+ # We perform standard list sort to find min bucket, sufficient for small N
97
+ buckets = [[0, i, []] for i in range(num_chunks)]
98
+
99
+ for key, size in sorted_streams:
100
+ # Find bucket with smallest current size
101
+ buckets.sort(key=lambda x: x[0])
102
+ smallest_bucket = buckets[0]
103
+
104
+ # Add stream to this bucket
105
+ smallest_bucket[0] += size
106
+ smallest_bucket[2].append(key)
107
+
108
+ print(f"Splitting into {num_chunks} files with volume balancing...")
109
+ generated_files = []
110
+
111
+ # 3. Write Batches
112
+ # Sort buckets by index ensures file naming order 0, 1, 2...
113
+ buckets.sort(key=lambda x: x[1])
114
+
115
+ for size, i, batch_keys in buckets:
116
+ out_file_path = os.path.join(self.output_dir, f"batch_{i}.pcap")
117
+ generated_files.append(out_file_path)
118
+
119
+ with open(out_file_path, "wb") as f:
120
+ writer = dpkt.pcap.Writer(f)
121
+ for key in batch_keys:
122
+ for ts, buf in streams[key]:
123
+ writer.writepkt(buf, ts)
124
+
125
+ print(f" - Created {os.path.basename(out_file_path)}: {len(batch_keys)} streams ({size/1024/1024:.2f} MB)")
126
+
127
+ print(f"Split completed in {time.time() - start_time:.2f}s")
128
+ return generated_files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowAnalyzer
3
- Version: 0.4.4
3
+ Version: 0.4.6
4
4
  Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
5
5
  Home-page: https://github.com/Byxs20/FlowAnalyzer
6
6
  Author: Byxs20
@@ -5,9 +5,13 @@ FlowAnalyzer/FlowAnalyzer.py
5
5
  FlowAnalyzer/Models.py
6
6
  FlowAnalyzer/PacketParser.py
7
7
  FlowAnalyzer/Path.py
8
+ FlowAnalyzer/PcapSplitter.py
8
9
  FlowAnalyzer/__init__.py
9
10
  FlowAnalyzer/logging_config.py
10
11
  FlowAnalyzer.egg-info/PKG-INFO
11
12
  FlowAnalyzer.egg-info/SOURCES.txt
12
13
  FlowAnalyzer.egg-info/dependency_links.txt
13
- FlowAnalyzer.egg-info/top_level.txt
14
+ FlowAnalyzer.egg-info/top_level.txt
15
+ tests/test.py
16
+ tests/test_parser.py
17
+ tests/test_split.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowAnalyzer
3
- Version: 0.4.4
3
+ Version: 0.4.6
4
4
  Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
5
5
  Home-page: https://github.com/Byxs20/FlowAnalyzer
6
6
  Author: Byxs20
@@ -7,7 +7,7 @@ with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8"
7
7
 
8
8
  setup(
9
9
  name="FlowAnalyzer",
10
- version="0.4.4",
10
+ version="0.4.6",
11
11
  description="FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件",
12
12
  author="Byxs20",
13
13
  author_email="97766819@qq.com",
@@ -0,0 +1,48 @@
1
+ import os
2
+
3
+ from viztracer import VizTracer
4
+
5
+ from FlowAnalyzer.FlowAnalyzer import FlowAnalyzer
6
+
7
+ # ============================
8
+ # 配置区域
9
+ # ============================
10
+ PCAP_FILE = "./tests/Beyond_Pro.pcapng" # 你的测试 pcap 文件路径
11
+ DISPLAY_FILTER = "http" # tshark display filter, 可以根据需求改
12
+
13
+
14
+ # ============================
15
+ # 测试逻辑
16
+ # ============================
17
+ def main():
18
+ if not os.path.exists(PCAP_FILE):
19
+ print(f"[ERROR] 流量包不存在: {PCAP_FILE}")
20
+ return
21
+
22
+ print("[*] 开始解析 PCAP 文件...")
23
+ with VizTracer():
24
+ db_path = FlowAnalyzer.get_db_data(PCAP_FILE, DISPLAY_FILTER)
25
+ print(f"[*] 解析完成,数据库生成: {db_path}")
26
+
27
+ print("[*] 遍历 HTTP 请求-响应对:")
28
+ analyzer = FlowAnalyzer(db_path)
29
+ total = 0
30
+ requests_count = 0
31
+ responses_count = 0
32
+
33
+ for pair in analyzer.generate_http_dict_pairs():
34
+ total += 1
35
+ if pair.request:
36
+ requests_count += 1
37
+ if pair.response:
38
+ responses_count += 1
39
+
40
+ print(f"[*] 总记录数: {total}")
41
+ print(f"[*] 请求数量: {requests_count}")
42
+ print(f"[*] 响应数量: {responses_count}")
43
+
44
+ print("[*] 测试完成 ✅")
45
+
46
+
47
+ if __name__ == "__main__":
48
+ main()
@@ -0,0 +1,52 @@
1
+ import binascii
2
+ import gzip
3
+ import unittest
4
+ from urllib import parse
5
+
6
+ from FlowAnalyzer.PacketParser import PacketParser
7
+
8
+
9
+ class TestPacketParserOptimization(unittest.TestCase):
10
+ def test_gzip_decompression(self):
11
+ # Construct a fake HTTP response with GZIP body
12
+ content = b"Hello, Gzip World!"
13
+ compressed = gzip.compress(content)
14
+ header = b"HTTP/1.1 200 OK\r\nContent-Encoding: gzip\r\n\r\n"
15
+ full_response = header + compressed
16
+
17
+ full_request_hex = binascii.hexlify(full_response)
18
+
19
+ # Test extract_http_file_data
20
+ extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
21
+
22
+ self.assertEqual(extracted_header, header)
23
+ self.assertEqual(extracted_body, content)
24
+
25
+ def test_basic_extraction(self):
26
+ # Case: Simple text body, no chunking
27
+ content = b"Simple Body"
28
+ header = b"HTTP/1.1 200 OK\r\n\r\n"
29
+ full_response = header + content
30
+ full_request_hex = binascii.hexlify(full_response)
31
+
32
+ extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
33
+ self.assertEqual(extracted_body, content)
34
+
35
+ def test_chunked_decoding(self):
36
+ # Case: Chunked body
37
+ # 5\r\nHello\r\n0\r\n\r\n
38
+ chunked_body = bytes.fromhex(
39
+ "333b577644436167386f6a6d41707950734d36456d590d0a6e616d0d0a323b5052457a347a3678686b797875775656506d645a7757700d0a653d0d0a333b30544350750d0a6868680d0a333b4271694b6e7056486c4338750d0a2532370d0a333b4562636d544855354b6a58485976725575615074414d0d0a2537430d0a313b6a694f4542774c44624b3267620d0a250d0a333b376c6447726b3663350d0a3743250d0a313b3232424d6c5838426f360d0a320d0a333b684d61354547593339740d0a3853450d0a313b41723843390d0a4c0d0a333b31693052423453360d0a4543540d0a313b444968514d3164633870560d0a250d0a333b4d73736a630d0a3230250d0a323b4a77756d74324636440d0a32370d0a323b37654844337a30646430454d55643353636c0d0a52430d0a323b797549574643783137377330476630530d0a75470d0a323b6552794878625735625175485a64575832450d0a25320d0a313b5a7145444c32690d0a370d0a333b346a637a720d0a2532300d0a333b39627a59497650544b39714655376f6a6d374d664d305976480d0a46524f0d0a313b5556477645615749584e62784f0d0a4d0d0a323b4b503748426f7a6f687530744e514a6c59634a44417156630d0a25320d0a313b3458467945466a3347366f765869694f650d0a300d0a333b696a3054754c3578595768705049390d0a4455410d0a313b595730494f0d0a4c0d0a323b4e3567485a4861504d3233346b50564b4b4f45464e390d0a25320d0a313b495254683876435468625137334a773669397a0d0a300d0a333b414635416a7265585977356e35496b67453952513252420d0a5748450d0a313b67565846465a586b547074313752574d580d0a520d0a323b614e7a5947754948774339790d0a45250d0a313b3076757a444f43446e36613162313269424c73796b616e65430d0a320d0a323b41666e524d716447540d0a30390d0a333b427a3773504a534d370d0a3736360d0a333b34544757775458484a51687a7666596238326a6c6b39440d0a2533440d0a323b48334b35646c4c0d0a39370d0a323b7362517563480d0a36360d0a333b326c6f45534c79684f32495535306865756f300d0a2532300d0a333b65415169345751456e4c4d30446d39636d537836430d0a414e440d0a333b65556a4c6b4a6635635441364c346c3731305547376c67570d0a2532300d0a333b63416f4d546771444c590d0a3937390d0a323b523778364233616167387648310d0a39250d0a333b7765764b577a52530d0a3344490d0a323b527344505845754a517563314c54434d0d0a46250d0a313b45617431624b354c4e76365465384c0d0a320d0a313b48725072376d6c7231666265446c7353454d6f4d550d0a380d0a313b634137714b43516e5671387155794d7046367a4c38665058650d0a250d0a323b4f664c343252727364356f4d6855644548336878745459720d0a32380d0a333b70336e437046720d0a4f52440d0a323b4972505332386b344f42416b414b306d6e7769724156370d0a25320d0a323b7476674d72366363670d0a384d0d0a323b317a4f77623045774256516641486a7266386858576d6946710d0a49440d0a323b3779366b7077375a304b6a46777a724e0d0a25320d0a323b5259684b71336e4853656d786b564952514b53444877346d0d0a38250d0a333b3264365a4d3643674451700d0a3238530d0a323b4d61443468473772496b59336c565a476f6d0d0a454c0d0a323b31476336376e6f46750d0a45430d0a333b746774754b0d0a5425320d0a313b6549464d676a594b6b4b4f487143654169540d0a300d0a313b327530776264486f5363737536327370757076580d0a490d0a333b716d733841370d0a464e550d0a333b78563962414232630d0a4c4c250d0a313b3733507567370d0a320d0a333b454b4e6268326a316271415635440d0a3843410d0a313b52787738716c78454f4a6d6d700d0a530d0a323b6a337954446476564d516372714f360d0a54250d0a333b4e47566976674144590d0a3238660d0a323b5954615959654b474c564b41536e78650d0a6c610d0a323b6272557a796e324179304d667a6c6f6e0d0a67250d0a323b684870416876446c706f570d0a32300d0a323b6172786778475938714c51655677503931687453344d456c530d0a41530d0a313b4a52304472594a69427972794b74646d31666950340d0a250d0a313b546835467a0d0a320d0a313b6d6c366d39774b514435745841725059750d0a300d0a313b314a484a690d0a430d0a313b6c524b5531477152466e6e454d46754e64780d0a480d0a313b61447071630d0a410d0a313b6d7a65557152486455337a756a730d0a520d0a333b474e4d375164436f4a3042696d0d0a2532390d0a323b7655686967646f3373727739456f67547a4c0d0a25320d0a323b34576a6d747a394c5744384d77434831567175786d7a706d350d0a43300d0a313b343831656f4c5a720d0a780d0a333b456e64787155527049734372540d0a3230250d0a313b63797356646954553537510d0a320d0a323b4e447159334557710d0a39250d0a323b776d44496a4f0d0a32300d0a333b535839487375573269476247440d0a46524f0d0a333b4d7636616e53516e586a3072714337487936536b576558570d0a4d25320d0a323b5441674444626d6437480d0a30740d0a333b784f346a79584f677251676f390d0a6573740d0a333b6b5766563469557466430d0a2e666c0d0a323b6d334b4d416565430d0a61670d0a333b6a6141744d4631576d540d0a2532300d0a313b3150596e30506e75570d0a4f0d0a333b683638724e36554a684f6a476a65450d0a5244450d0a313b4f51316c736a6e344b337049540d0a520d0a333b7257625142566452613868464b4c7352780d0a2532300d0a333b3941694b4d4454596976647476423561347642676739440d0a4259250d0a323b66626d764c5475434855505064644d796d320d0a32300d0a313b716933793571563066467a63556166643245770d0a660d0a313b524b424673594f3574694342360d0a6c0d0a323b6d494343414c0d0a61670d0a313b6c4b4236585632384b72316e33750d0a250d0a333b6d77794e6765520d0a32304c0d0a333b4849337031346574450d0a494d490d0a333b425836546a4a68306e43754c6a6b4d7436646c760d0a5425320d0a333b356e363078326d4b6f5a7063484f6b654f5a7861386774594d0d0a3030250d0a313b39753457534e655a0d0a320d0a323b77536e544967446b0d0a43310d0a323b3372446f630d0a25320d0a333b32636f744f4759700d0a3925320d0a333b35464f6d34706b636346376a5a7973304e776a750d0a4334320d0a313b686f645257386c6a776c6e5a47675a6d4e4c690d0a250d0a323b624e6d784c0d0a32430d0a333b66374e6931536d6c0d0a3125320d0a313b4d46466e573043645637340d0a390d0a313b774b4e4645454445747a4a537075325a4c626d78656a49304c0d0a250d0a333b636e595a5843373633767a5343636c6b5936790d0a3239250d0a323b343731684f545179580d0a33450d0a333b5166684944494c4a675556754937616244410d0a3125320d0a323b753442385358704742536e0d0a39250d0a323b4f374c73474b775169324b30330d0a32430d0a313b6b375639414951544d4661713879745541540d0a530d0a313b6533336944690d0a4c0d0a323b6f75386d44324343504e4870316232474f0d0a45450d0a323b6e4b6f644d0d0a50250d0a323b4b45584b6b43750d0a32380d0a333b495357635771784f39507750724244766772370d0a3125320d0a323b42596b4a4948704c62623147760d0a39250d0a333b783347786e434b776e48370d0a3243390d0a313b7837396f49670d0a370d0a323b734634364168736c7968547a667a756f32520d0a39390d0a333b5a786e37734e69534d7356386d51685244637872594953490d0a2532390d0a333b416e586f37345071447531500d0a2532390d0a323b6f7164664144327247580d0a25370d0a313b39384f494f4358644a6d66664f386c770d0a430d0a333b47565a6e593965457769584a0d0a2537430d0a323b614e376c4b6147544c77735172786342410d0a25320d0a333b6563585745797364467679474162306f61515a7966327239660d0a3726700d0a333b6e7878464c545a65527779646639310d0a6173730d0a323b4952463149314a36490d0a3d780d0a323b3567327643664e0d0a78780d0a300d0a0d0a"
40
+ )
41
+ header = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
42
+ full_response = header + chunked_body
43
+ full_request_hex = binascii.hexlify(full_response)
44
+ extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
45
+ self.assertEqual(
46
+ parse.unquote_to_bytes(extracted_body), # 手动url解码一下
47
+ b"name=hhh'||(SELECT 'RCuG' FROM DUAL WHERE 9766=9766 AND 9799=IF((ORD(MID((SELECT IFNULL(CAST(flag AS CHAR),0x20) FROM test.flag ORDER BY flag LIMIT 0,1),42,1))>1),SLEEP(1),9799))||'&pass=xxx",
48
+ )
49
+
50
+
51
+ if __name__ == "__main__":
52
+ unittest.main()
@@ -0,0 +1,90 @@
1
+
2
+ import os
3
+ import shutil
4
+ import subprocess
5
+
6
+ from FlowAnalyzer.PcapSplitter import PcapSplitter
7
+
8
+ #############################
9
+ # 配置区
10
+ #############################
11
+ PCAP_FILE = r"./tests/Beyond_Pro.pcapng" # 修改为你的文件
12
+ OUT_DIR = "output"
13
+ #############################
14
+
15
+ def clean_output_dir(directory: str):
16
+ if os.path.exists(directory):
17
+ print(f"Cleaning output directory: {directory}")
18
+ shutil.rmtree(directory)
19
+ os.makedirs(directory, exist_ok=True)
20
+
21
+ def count_packets(pcap_path: str, display_filter: str) -> int:
22
+ cmd = [
23
+ "tshark",
24
+ "-r", pcap_path,
25
+ "-Y", display_filter,
26
+ "-T", "fields",
27
+ "-e", "frame.number"
28
+ ]
29
+ try:
30
+ # Run tshark and capture output
31
+ result = subprocess.run(
32
+ cmd,
33
+ capture_output=True,
34
+ text=True,
35
+ check=True
36
+ )
37
+ # Count non-empty lines
38
+ count = sum(1 for line in result.stdout.splitlines() if line.strip())
39
+ return count
40
+ except subprocess.CalledProcessError as e:
41
+ print(f"Error running tshark on {pcap_path}: {e}")
42
+ return 0
43
+ except FileNotFoundError:
44
+ print("Error: tshark not found in PATH.")
45
+ return 0
46
+
47
+ def main():
48
+ print("Beginning split test...")
49
+
50
+ # 1. Clean output directory
51
+ clean_output_dir(OUT_DIR)
52
+
53
+ splitter = PcapSplitter(PCAP_FILE, OUT_DIR)
54
+
55
+ # Defaults to os.cpu_count() chunks
56
+ result_files = splitter.split()
57
+
58
+ print(f"\nGenerated {len(result_files)} files:")
59
+ for f in result_files:
60
+ print(f)
61
+
62
+ # 2. Verify with Tshark
63
+ print("\nVerifying data integrity with Tshark...")
64
+ total_requests = 0
65
+ total_responses = 0
66
+
67
+ EXPECTED_REQUESTS = 12284
68
+ EXPECTED_RESPONSES = 12281
69
+
70
+ for pcap in result_files:
71
+ req_count = count_packets(pcap, "http.request")
72
+ resp_count = count_packets(pcap, "http.response")
73
+
74
+ print(f" {os.path.basename(pcap)}: Requests={req_count}, Responses={resp_count}")
75
+ total_requests += req_count
76
+ total_responses += resp_count
77
+
78
+ print("-" * 40)
79
+ print(f"Total Requests: {total_requests} (Expected: {EXPECTED_REQUESTS})")
80
+ print(f"Total Responses: {total_responses} (Expected: {EXPECTED_RESPONSES})")
81
+
82
+ if total_requests == EXPECTED_REQUESTS and total_responses == EXPECTED_RESPONSES:
83
+ print("\nSUCCESS: Data integrity verified.")
84
+ else:
85
+ print("\nFAILURE: Data integrity mismatch!")
86
+ exit(1)
87
+
88
+
89
+ if __name__ == "__main__":
90
+ main()
File without changes
File without changes
File without changes