FlowAnalyzer 0.4.5__tar.gz → 0.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,7 +56,7 @@ class FlowAnalyzer:
56
56
  sql_pair = """
57
57
  SELECT
58
58
  req.frame_num, req.header, req.file_data, req.full_uri, req.time_epoch, -- 0-4 (Request)
59
- resp.frame_num, resp.header, resp.file_data, resp.time_epoch, resp.request_in -- 5-9 (Response)
59
+ resp.frame_num, resp.header, resp.file_data, resp.time_epoch, resp.request_in, resp.status_code -- 5-10 (Response)
60
60
  FROM requests req
61
61
  LEFT JOIN responses resp ON req.frame_num = resp.request_in
62
62
  ORDER BY req.frame_num ASC
@@ -70,20 +70,20 @@ class FlowAnalyzer:
70
70
 
71
71
  resp = None
72
72
  if row[5] is not None:
73
- resp = Response(frame_num=row[5], header=row[6] or b"", file_data=row[7] or b"", time_epoch=row[8], _request_in=row[9])
73
+ resp = Response(frame_num=row[5], header=row[6] or b"", file_data=row[7] or b"", time_epoch=row[8], _request_in=row[9], status_code=row[10] or 0)
74
74
 
75
75
  yield HttpPair(request=req, response=resp)
76
76
 
77
77
  # === 第二步:孤儿响应查询 ===
78
78
  sql_orphan = """
79
- SELECT frame_num, header, file_data, time_epoch, request_in
79
+ SELECT frame_num, header, file_data, time_epoch, request_in, status_code
80
80
  FROM responses
81
81
  WHERE request_in NOT IN (SELECT frame_num FROM requests)
82
82
  """
83
83
  cursor.execute(sql_orphan)
84
84
 
85
85
  for row in cursor:
86
- resp = Response(frame_num=row[0], header=row[1] or b"", file_data=row[2] or b"", time_epoch=row[3], _request_in=row[4])
86
+ resp = Response(frame_num=row[0], header=row[1] or b"", file_data=row[2] or b"", time_epoch=row[3], _request_in=row[4], status_code=row[5] or 0)
87
87
  yield HttpPair(request=None, response=resp)
88
88
 
89
89
  # =========================================================================
@@ -161,7 +161,7 @@ class FlowAnalyzer:
161
161
  cursor.execute("PRAGMA journal_mode = MEMORY")
162
162
 
163
163
  cursor.execute("CREATE TABLE requests (frame_num INTEGER PRIMARY KEY, header BLOB, file_data BLOB, full_uri TEXT, time_epoch REAL)")
164
- cursor.execute("CREATE TABLE responses (frame_num INTEGER PRIMARY KEY, header BLOB, file_data BLOB, time_epoch REAL, request_in INTEGER)")
164
+ cursor.execute("CREATE TABLE responses (frame_num INTEGER PRIMARY KEY, header BLOB, file_data BLOB, time_epoch REAL, request_in INTEGER, status_code INTEGER)")
165
165
 
166
166
  cursor.execute("""
167
167
  CREATE TABLE meta_info (
@@ -174,50 +174,29 @@ class FlowAnalyzer:
174
174
  """)
175
175
  conn.commit()
176
176
 
177
+ lua_script_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tshark.lua")
178
+
179
+ # Pass filter via environment variable
180
+ env = os.environ.copy()
181
+ env["flowanalyzer_filter"] = display_filter
182
+
177
183
  command = [
178
184
  tshark_path,
179
185
  "-r",
180
186
  pcap_path,
181
- "-Y",
182
- f"({display_filter})",
183
- "-T",
184
- "fields",
185
- "-e",
186
- "http.response.code", # 0
187
- "-e",
188
- "http.request_in", # 1
189
- "-e",
190
- "tcp.reassembled.data", # 2
191
- "-e",
192
- "frame.number", # 3
193
- "-e",
194
- "tcp.payload", # 4
195
- "-e",
196
- "frame.time_epoch", # 5
197
- "-e",
198
- "exported_pdu.exported_pdu", # 6
199
- "-e",
200
- "http.request.full_uri", # 7
201
- "-e",
202
- "tcp.segment.count", # 8
203
- "-E",
204
- "header=n",
205
- "-E",
206
- "separator=/t",
207
- "-E",
208
- "quote=n",
209
- "-E",
210
- "occurrence=f",
187
+ "-q",
188
+ "-X",
189
+ f"lua_script:{lua_script_path}",
211
190
  ]
212
191
 
213
- logger.debug(f"执行 Tshark: {command}")
192
+ logger.debug(f"执行 Tshark: {' '.join(command)}")
214
193
  BATCH_SIZE = 2000
215
194
  MAX_PENDING_BATCHES = 20 # 控制内存中待处理的批次数量 (Backpressure)
216
195
 
217
196
  # 使用 ThreadPoolExecutor 并行处理数据
218
197
  max_workers = min(32, (os.cpu_count() or 1) + 4)
219
198
 
220
- process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.abspath(pcap_path)))
199
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.abspath(pcap_path)), env=env, encoding="utf-8", errors="replace")
221
200
  try:
222
201
  with sqlite3.connect(db_path) as conn:
223
202
  cursor = conn.cursor()
@@ -236,14 +215,14 @@ class FlowAnalyzer:
236
215
 
237
216
  for item in results:
238
217
  if item["type"] == "response":
239
- db_resp_rows.append((item["frame_num"], item["header"], item["file_data"], item["time_epoch"], item["request_in"]))
218
+ db_resp_rows.append((item["frame_num"], item["header"], item["file_data"], item["time_epoch"], item["request_in"], item.get("status_code", 0)))
240
219
  else:
241
220
  db_req_rows.append((item["frame_num"], item["header"], item["file_data"], item["full_uri"], item["time_epoch"]))
242
221
 
243
222
  if db_req_rows:
244
223
  cursor.executemany("INSERT OR REPLACE INTO requests VALUES (?,?,?,?,?)", db_req_rows)
245
224
  if db_resp_rows:
246
- cursor.executemany("INSERT OR REPLACE INTO responses VALUES (?,?,?,?,?)", db_resp_rows)
225
+ cursor.executemany("INSERT OR REPLACE INTO responses VALUES (?,?,?,?,?,?)", db_resp_rows)
247
226
 
248
227
  def submit_batch():
249
228
  """提交当前批次到线程池"""
@@ -259,6 +238,10 @@ class FlowAnalyzer:
259
238
  # --- Main Pipeline Loop ---
260
239
  if process.stdout:
261
240
  for line in process.stdout:
241
+ # Strip newline
242
+ line = line.strip()
243
+ if not line:
244
+ continue
262
245
  current_batch.append(line)
263
246
 
264
247
  if len(current_batch) >= BATCH_SIZE:
@@ -8,17 +8,18 @@ class Request:
8
8
  frame_num: int
9
9
  header: bytes
10
10
  file_data: bytes
11
- full_uri: str
12
11
  time_epoch: float
12
+ full_uri: str
13
13
 
14
14
 
15
15
  @dataclass
16
16
  class Response:
17
- __slots__ = ("frame_num", "header", "file_data", "time_epoch", "_request_in")
17
+ __slots__ = ("frame_num", "header", "file_data", "time_epoch", "status_code", "_request_in")
18
18
  frame_num: int
19
19
  header: bytes
20
20
  file_data: bytes
21
21
  time_epoch: float
22
+ status_code: int
22
23
  _request_in: Optional[int]
23
24
 
24
25
 
@@ -2,55 +2,90 @@ import binascii
2
2
  import contextlib
3
3
  import gzip
4
4
  from typing import List, Optional, Tuple
5
- from urllib import parse
6
5
 
7
6
  from .logging_config import logger
8
7
 
9
8
 
10
9
  class PacketParser:
11
10
  @staticmethod
12
- def parse_packet_data(row: list) -> Tuple[int, int, float, str, bytes]:
11
+ def process_batch(lines: List[str]) -> List[dict]:
13
12
  """
14
- 解析 Tshark 输出的一行数据
15
- row definition (all bytes):
16
- 0: http.response.code
17
- 1: http.request_in
18
- 2: tcp.reassembled.data
19
- 3: frame.number
20
- 4: tcp.payload
21
- 5: frame.time_epoch
22
- 6: exported_pdu.exported_pdu
23
- 7: http.request.full_uri
24
- 8: tcp.segment.count
13
+ 批量处理行数据
25
14
  """
26
- frame_num = int(row[3])
27
- request_in = int(row[1]) if row[1] else frame_num
28
- # Decode only URI to string
29
- full_uri = parse.unquote(row[7].decode("utf-8", errors="replace")) if row[7] else ""
30
- time_epoch = float(row[5])
31
-
32
- # Logic for Raw Packet (Header Source)
33
- # Previous index 9 is now 8 since we removed http.file_data
34
- is_reassembled = len(row) > 8 and row[8]
35
-
36
- if is_reassembled and row[2]:
37
- full_request = row[2]
38
- elif row[4]:
39
- full_request = row[4]
40
- else:
41
- # Fallback (e.g. Exported PDU)
42
- full_request = row[2] if row[2] else (row[6] if row[6] else b"")
43
-
44
- return frame_num, request_in, time_epoch, full_uri, full_request
15
+ results = []
16
+ for line in lines:
17
+ res = PacketParser.process_row(line)
18
+ if res:
19
+ results.append(res)
20
+ return results
21
+
22
+ @staticmethod
23
+ def process_row(line: str) -> Optional[dict]:
24
+ """
25
+ 解析 Tshark Lua 脚本输出的一行数据
26
+ Columns:
27
+ 0: type ("req" / "rep" / "data")
28
+ 1: frame.number
29
+ 2: time_epoch
30
+ 3: header_hex
31
+ 4: file_data_hex (Body)
32
+ 5: uri_or_code
33
+ 6: request_in
34
+ """
35
+ try:
36
+ parts = line.split("\t")
37
+ if len(parts) < 6:
38
+ return None
39
+
40
+ p_type = parts[0]
41
+ frame_num = int(parts[1])
42
+ time_epoch = float(parts[2])
43
+
44
+ # Hex string -> Bytes
45
+ # parts[3] might be empty string
46
+ header = binascii.unhexlify(parts[3]) if parts[3] else b""
47
+ file_data = binascii.unhexlify(parts[4]) if parts[4] else b""
48
+
49
+ uri_or_code = parts[5]
50
+ request_in_str = parts[6] if len(parts) > 6 else ""
51
+
52
+ if p_type == "req":
53
+ return {"type": "request", "frame_num": frame_num, "header": header, "file_data": file_data, "time_epoch": time_epoch, "full_uri": uri_or_code, "request_in": None}
54
+ elif p_type == "rep":
55
+ request_in = int(request_in_str) if request_in_str else 0
56
+ try:
57
+ status_code = int(uri_or_code)
58
+ except (ValueError, TypeError):
59
+ status_code = 0
60
+
61
+ return {
62
+ "type": "response",
63
+ "frame_num": frame_num,
64
+ "header": header,
65
+ "file_data": file_data,
66
+ "time_epoch": time_epoch,
67
+ "request_in": request_in,
68
+ "status_code": status_code,
69
+ "full_uri": "",
70
+ }
71
+ else:
72
+ # 'data' or unknown, ignore for now based on current logic
73
+ return None
74
+
75
+ except Exception as e:
76
+ logger.debug(f"Packet parse error: {e} | Line: {line[:100]}...")
77
+ return None
45
78
 
46
79
  @staticmethod
47
80
  def split_http_headers(file_data: bytes) -> Tuple[bytes, bytes]:
48
81
  headerEnd = file_data.find(b"\r\n\r\n")
49
82
  if headerEnd != -1:
50
83
  return file_data[: headerEnd + 4], file_data[headerEnd + 4 :]
51
- elif file_data.find(b"\n\n") != -1:
52
- headerEnd = file_data.index(b"\n\n") + 2
53
- return file_data[:headerEnd], file_data[headerEnd:]
84
+
85
+ headerEnd = file_data.find(b"\n\n")
86
+ if headerEnd != -1:
87
+ return file_data[: headerEnd + 2], file_data[headerEnd + 2 :]
88
+
54
89
  return b"", file_data
55
90
 
56
91
  @staticmethod
@@ -73,6 +108,10 @@ class PacketParser:
73
108
  raise ValueError("Not chunked data")
74
109
 
75
110
  size_line = file_data[cursor:newline_idx].strip()
111
+ # Handle chunk extension: ignore everything after ';'
112
+ if b";" in size_line:
113
+ size_line = size_line.split(b";", 1)[0].strip()
114
+
76
115
  if not size_line:
77
116
  cursor = newline_idx + 1
78
117
  continue
@@ -135,49 +174,3 @@ class PacketParser:
135
174
  except Exception as e:
136
175
  logger.error(f"解析HTTP数据未知错误: {e}")
137
176
  return b"", b""
138
-
139
- @staticmethod
140
- def process_row(line: bytes) -> Optional[dict]:
141
- """
142
- 处理单行数据,返回结构化结果供主线程写入
143
- """
144
- line = line.rstrip(b"\r\n")
145
- if not line:
146
- return None
147
-
148
- row = line.split(b"\t")
149
- try:
150
- frame_num, request_in, time_epoch, full_uri, full_request = PacketParser.parse_packet_data(row)
151
-
152
- if not full_request:
153
- return None
154
-
155
- header, file_data = PacketParser.extract_http_file_data(full_request)
156
-
157
- # row[0] is http.response.code (bytes)
158
- is_response = bool(row[0])
159
-
160
- return {
161
- "type": "response" if is_response else "request",
162
- "frame_num": frame_num,
163
- "header": header,
164
- "file_data": file_data,
165
- "time_epoch": time_epoch,
166
- "request_in": request_in, # Only useful for Response
167
- "full_uri": full_uri, # Only useful for Request
168
- }
169
-
170
- except Exception:
171
- return None
172
-
173
- @staticmethod
174
- def process_batch(lines: List[bytes]) -> List[dict]:
175
- """
176
- 批量处理行数据,减少函数调用开销
177
- """
178
- results = []
179
- for line in lines:
180
- res = PacketParser.process_row(line)
181
- if res:
182
- results.append(res)
183
- return results
@@ -0,0 +1,196 @@
1
+ -- =========================================================================
2
+ -- 1. 字段定义
3
+ -- =========================================================================
4
+ local f_resp_code = Field.new("http.response.code")
5
+ local f_full_uri = Field.new("http.request.full_uri")
6
+ local f_frame_num = Field.new("frame.number")
7
+ local f_time_epoch = Field.new("frame.time_epoch")
8
+ local f_reassembled = Field.new("tcp.reassembled.data")
9
+ local f_payload = Field.new("tcp.payload")
10
+ local f_file_data = Field.new("http.file_data")
11
+ local f_seg_count = Field.new("tcp.segment.count")
12
+ local f_retrans = Field.new("tcp.analysis.retransmission")
13
+ local f_request_in = Field.new("http.request_in")
14
+ -- [新增] 替换 Header 源的字段
15
+ local f_exported_pdu = Field.new("exported_pdu.exported_pdu")
16
+
17
+ -- =========================================================================
18
+ -- 2. 获取过滤器
19
+ -- =========================================================================
20
+ local user_filter = os.getenv("flowanalyzer_filter")
21
+ if not user_filter or user_filter == "" then
22
+ user_filter = "http"
23
+ end
24
+
25
+ -- =========================================================================
26
+ -- 3. 初始化监听器
27
+ -- =========================================================================
28
+ local tap = Listener.new("frame", user_filter)
29
+
30
+ -- =========================================================================
31
+ -- 4. 辅助函数
32
+ -- =========================================================================
33
+
34
+ local function val_to_str(val)
35
+ if val == nil then
36
+ return ""
37
+ end
38
+ return tostring(val)
39
+ end
40
+
41
+ -- 查找 Header 结束位置
42
+ local function find_header_split_pos(hex_str)
43
+ if not hex_str then
44
+ return nil
45
+ end
46
+
47
+ -- 1. 找 0D0A0D0A (CRLF CRLF)
48
+ local start_idx = 1
49
+ while true do
50
+ local s, e = string.find(hex_str, "0D0A0D0A", start_idx, true)
51
+ if not s then
52
+ break
53
+ end
54
+ if s % 2 == 1 then
55
+ return s
56
+ end -- 确保字节对齐
57
+ start_idx = s + 1
58
+ end
59
+
60
+ -- 2. 找 0A0A (LF LF)
61
+ start_idx = 1
62
+ while true do
63
+ local s, e = string.find(hex_str, "0A0A", start_idx, true)
64
+ if not s then
65
+ break
66
+ end
67
+ if s % 2 == 1 then
68
+ return s
69
+ end
70
+ start_idx = s + 1
71
+ end
72
+ return nil
73
+ end
74
+
75
+ -- [核心性能优化] 智能提取 Header Hex
76
+ -- 即使 Body 不限制大小,Header 依然建议只扫描前 2KB,因为 Header 不会那么长
77
+ local function extract_header_smart(field_info)
78
+ if not field_info then
79
+ return ""
80
+ end
81
+
82
+ local range = field_info.range
83
+ local total_len = range:len()
84
+
85
+ -- 预览前 2KB
86
+ local cap_len = 2048
87
+ if total_len < cap_len then
88
+ cap_len = total_len
89
+ end
90
+
91
+ -- [关键] 转为 Hex 并强制转为大写
92
+ local preview_hex = string.upper(range(0, cap_len):bytes():tohex())
93
+
94
+ -- 查找分隔符
95
+ local pos = find_header_split_pos(preview_hex)
96
+
97
+ if pos then
98
+ return string.sub(preview_hex, 1, pos - 1)
99
+ else
100
+ return preview_hex
101
+ end
102
+ end
103
+
104
+ -- 直接获取完整 Hex
105
+ local function get_full_hex(field_info)
106
+ if not field_info then
107
+ return ""
108
+ end
109
+ -- 强制转大写,保持格式一致
110
+ return string.upper(field_info.range:bytes():tohex())
111
+ end
112
+
113
+ -- =========================================================================
114
+ -- 5. 主处理逻辑
115
+ -- =========================================================================
116
+ function tap.packet(pinfo, tvb)
117
+ -- 过滤 TCP 重传
118
+ if f_retrans() then
119
+ return
120
+ end
121
+
122
+ local frame_num = f_frame_num()
123
+ if not frame_num then
124
+ return
125
+ end
126
+
127
+ -- === 1. 确定类型 (req/rep) 和 信息 (URI/Code) ===
128
+ local col_type = "data"
129
+ local col_uri_or_code = ""
130
+
131
+ local code = f_resp_code()
132
+ local uri = f_full_uri()
133
+
134
+ if code then
135
+ col_type = "rep"
136
+ col_uri_or_code = tostring(code)
137
+ elseif uri then
138
+ col_type = "req"
139
+ col_uri_or_code = tostring(uri)
140
+ end
141
+
142
+ -- === 2. 基础信息 ===
143
+ local col_frame = tostring(frame_num)
144
+ local col_time = val_to_str(f_time_epoch())
145
+
146
+ -- === 3. Header Hex ===
147
+ -- 逻辑:Exported PDU > TCP Reassembled > TCP Payload
148
+ local col_header_hex = ""
149
+
150
+ local exp_pdu = f_exported_pdu()
151
+
152
+ if exp_pdu then
153
+ col_header_hex = extract_header_smart(exp_pdu)
154
+ else
155
+ local seq_count = f_seg_count()
156
+ local reass = nil
157
+ if seq_count then
158
+ reass = f_reassembled()
159
+ end
160
+
161
+ if reass then
162
+ col_header_hex = extract_header_smart(reass)
163
+ else
164
+ local pay = f_payload()
165
+ if pay then
166
+ col_header_hex = extract_header_smart(pay)
167
+ end
168
+ end
169
+ end
170
+
171
+ -- === 4. File Data (Body Hex) ===
172
+ -- [修改] 移除大小判断,无条件转换所有 Body
173
+ local col_file_data = ""
174
+ local fd = f_file_data()
175
+
176
+ if fd then
177
+ col_file_data = get_full_hex(fd)
178
+ end
179
+
180
+ -- === 5. Request In (仅响应包有) ===
181
+ local col_req_in = ""
182
+ local req_in = f_request_in()
183
+ if req_in then
184
+ col_req_in = tostring(req_in)
185
+ end
186
+
187
+ -- === 输出 (Tab 分隔) ===
188
+ print(table.concat({col_type, -- 1. req / rep
189
+ col_frame, -- 2. Frame Number
190
+ col_time, -- 3. Time Epoch
191
+ col_header_hex, -- 4. Header Bytes (Hex)
192
+ col_file_data, -- 5. File Data (Hex) [完整数据,不跳过]
193
+ col_uri_or_code, -- 6. URI / Code
194
+ col_req_in -- 7. Request In
195
+ }, "\t"))
196
+ end
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowAnalyzer
3
- Version: 0.4.5
3
+ Version: 0.4.7
4
4
  Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
5
5
  Home-page: https://github.com/Byxs20/FlowAnalyzer
6
6
  Author: Byxs20
@@ -8,6 +8,7 @@ FlowAnalyzer/Path.py
8
8
  FlowAnalyzer/PcapSplitter.py
9
9
  FlowAnalyzer/__init__.py
10
10
  FlowAnalyzer/logging_config.py
11
+ FlowAnalyzer/tshark.lua
11
12
  FlowAnalyzer.egg-info/PKG-INFO
12
13
  FlowAnalyzer.egg-info/SOURCES.txt
13
14
  FlowAnalyzer.egg-info/dependency_links.txt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowAnalyzer
3
- Version: 0.4.5
3
+ Version: 0.4.7
4
4
  Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
5
5
  Home-page: https://github.com/Byxs20/FlowAnalyzer
6
6
  Author: Byxs20
@@ -7,16 +7,16 @@ with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8"
7
7
 
8
8
  setup(
9
9
  name="FlowAnalyzer",
10
- version="0.4.5",
10
+ version="0.4.7",
11
11
  description="FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件",
12
12
  author="Byxs20",
13
13
  author_email="97766819@qq.com",
14
14
  packages=find_packages(exclude=["tests", "*.egg-info"]),
15
15
  package_data={
16
- '': ['LICENSE', 'README.md', 'setup.py'],
16
+ "": ["LICENSE", "README.md", "setup.py"],
17
+ "FlowAnalyzer": ["*.lua"],
17
18
  },
18
- install_requires=[
19
- ],
19
+ install_requires=[],
20
20
  classifiers=[
21
21
  "Development Status :: 3 - Alpha",
22
22
  "Intended Audience :: Developers",
@@ -27,7 +27,6 @@ setup(
27
27
  "Programming Language :: Python :: 3.8",
28
28
  "Programming Language :: Python :: 3.9",
29
29
  ],
30
-
31
30
  long_description=long_description,
32
31
  long_description_content_type="text/markdown",
33
32
  url="https://github.com/Byxs20/FlowAnalyzer",
@@ -1,7 +1,5 @@
1
1
  import os
2
2
 
3
- from viztracer import VizTracer
4
-
5
3
  from FlowAnalyzer.FlowAnalyzer import FlowAnalyzer
6
4
 
7
5
  # ============================
@@ -20,8 +18,7 @@ def main():
20
18
  return
21
19
 
22
20
  print("[*] 开始解析 PCAP 文件...")
23
- with VizTracer():
24
- db_path = FlowAnalyzer.get_db_data(PCAP_FILE, DISPLAY_FILTER)
21
+ db_path = FlowAnalyzer.get_db_data(PCAP_FILE, DISPLAY_FILTER)
25
22
  print(f"[*] 解析完成,数据库生成: {db_path}")
26
23
 
27
24
  print("[*] 遍历 HTTP 请求-响应对:")
@@ -0,0 +1,52 @@
1
+ import binascii
2
+ import gzip
3
+ import unittest
4
+ from urllib import parse
5
+
6
+ from FlowAnalyzer.PacketParser import PacketParser
7
+
8
+
9
+ class TestPacketParserOptimization(unittest.TestCase):
10
+ def test_gzip_decompression(self):
11
+ # Construct a fake HTTP response with GZIP body
12
+ content = b"Hello, Gzip World!"
13
+ compressed = gzip.compress(content)
14
+ header = b"HTTP/1.1 200 OK\r\nContent-Encoding: gzip\r\n\r\n"
15
+ full_response = header + compressed
16
+
17
+ full_request_hex = binascii.hexlify(full_response)
18
+
19
+ # Test extract_http_file_data
20
+ extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
21
+
22
+ self.assertEqual(extracted_header, header)
23
+ self.assertEqual(extracted_body, content)
24
+
25
+ def test_basic_extraction(self):
26
+ # Case: Simple text body, no chunking
27
+ content = b"Simple Body"
28
+ header = b"HTTP/1.1 200 OK\r\n\r\n"
29
+ full_response = header + content
30
+ full_request_hex = binascii.hexlify(full_response)
31
+
32
+ extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
33
+ self.assertEqual(extracted_body, content)
34
+
35
+ def test_chunked_decoding(self):
36
+ # Case: Chunked body
37
+ # 5\r\nHello\r\n0\r\n\r\n
38
+ chunked_body = bytes.fromhex(
39
+ "333b577644436167386f6a6d41707950734d36456d590d0a6e616d0d0a323b5052457a347a3678686b797875775656506d645a7757700d0a653d0d0a333b30544350750d0a6868680d0a333b4271694b6e7056486c4338750d0a2532370d0a333b4562636d544855354b6a58485976725575615074414d0d0a2537430d0a313b6a694f4542774c44624b3267620d0a250d0a333b376c6447726b3663350d0a3743250d0a313b3232424d6c5838426f360d0a320d0a333b684d61354547593339740d0a3853450d0a313b41723843390d0a4c0d0a333b31693052423453360d0a4543540d0a313b444968514d3164633870560d0a250d0a333b4d73736a630d0a3230250d0a323b4a77756d74324636440d0a32370d0a323b37654844337a30646430454d55643353636c0d0a52430d0a323b797549574643783137377330476630530d0a75470d0a323b6552794878625735625175485a64575832450d0a25320d0a313b5a7145444c32690d0a370d0a333b346a637a720d0a2532300d0a333b39627a59497650544b39714655376f6a6d374d664d305976480d0a46524f0d0a313b5556477645615749584e62784f0d0a4d0d0a323b4b503748426f7a6f687530744e514a6c59634a44417156630d0a25320d0a313b3458467945466a3347366f765869694f650d0a300d0a333b696a3054754c3578595768705049390d0a4455410d0a313b595730494f0d0a4c0d0a323b4e3567485a4861504d3233346b50564b4b4f45464e390d0a25320d0a313b495254683876435468625137334a773669397a0d0a300d0a333b414635416a7265585977356e35496b67453952513252420d0a5748450d0a313b67565846465a586b547074313752574d580d0a520d0a323b614e7a5947754948774339790d0a45250d0a313b3076757a444f43446e36613162313269424c73796b616e65430d0a320d0a323b41666e524d716447540d0a30390d0a333b427a3773504a534d370d0a3736360d0a333b34544757775458484a51687a7666596238326a6c6b39440d0a2533440d0a323b48334b35646c4c0d0a39370d0a323b7362517563480d0a36360d0a333b326c6f45534c79684f32495535306865756f300d0a2532300d0a333b65415169345751456e4c4d30446d39636d537836430d0a414e440d0a333b65556a4c6b4a6635635441364c346c3731305547376c67570d0a2532300d0a333b63416f4d546771444c590d0a3937390d0a323b523778364233616167387648310d0a39250d0a333b7765764b577a52530d0a3344490d0a323b527344505845754a517563314c54434d0d0a46250d0a313b45617431624b354c4e76365465384c0d0a320d0a313b48725072376d6c7231666265446c7353454d6f4d550d0a380d0a313b634137714b43516e5671387155794d7046367a4c38665058650d0a250d0a323b4f664c343252727364356f4d6855644548336878745459720d0a32380d0a333b70336e437046720d0a4f52440d0a323b4972505332386b344f42416b414b306d6e7769724156370d0a25320d0a323b7476674d72366363670d0a384d0d0a323b317a4f77623045774256516641486a7266386858576d6946710d0a49440d0a323b3779366b7077375a304b6a46777a724e0d0a25320d0a323b5259684b71336e4853656d786b564952514b53444877346d0d0a38250d0a333b3264365a4d3643674451700d0a3238530d0a323b4d61443468473772496b59336c565a476f6d0d0a454c0d0a323b31476336376e6f46750d0a45430d0a333b746774754b0d0a5425320d0a313b6549464d676a594b6b4b4f487143654169540d0a300d0a313b327530776264486f5363737536327370757076580d0a490d0a333b716d733841370d0a464e550d0a333b78563962414232630d0a4c4c250d0a313b3733507567370d0a320d0a333b454b4e6268326a316271415635440d0a3843410d0a313b52787738716c78454f4a6d6d700d0a530d0a323b6a337954446476564d516372714f360d0a54250d0a333b4e47566976674144590d0a3238660d0a323b5954615959654b474c564b41536e78650d0a6c610d0a323b6272557a796e324179304d667a6c6f6e0d0a67250d0a323b684870416876446c706f570d0a32300d0a323b6172786778475938714c51655677503931687453344d456c530d0a41530d0a313b4a52304472594a69427972794b74646d31666950340d0a250d0a313b546835467a0d0a320d0a313b6d6c366d39774b514435745841725059750d0a300d0a313b314a484a690d0a430d0a313b6c524b5531477152466e6e454d46754e64780d0a480d0a313b61447071630d0a410d0a313b6d7a65557152486455337a756a730d0a520d0a333b474e4d375164436f4a3042696d0d0a2532390d0a323b7655686967646f3373727739456f67547a4c0d0a25320d0a323b34576a6d747a394c5744384d77434831567175786d7a706d350d0a43300d0a313b343831656f4c5a720d0a780d0a333b456e64787155527049734372540d0a3230250d0a313b63797356646954553537510d0a320d0a323b4e447159334557710d0a39250d0a323b776d44496a4f0d0a32300d0a333b535839487375573269476247440d0a46524f0d0a333b4d7636616e53516e586a3072714337487936536b576558570d0a4d25320d0a323b5441674444626d6437480d0a30740d0a333b784f346a79584f677251676f390d0a6573740d0a333b6b5766563469557466430d0a2e666c0d0a323b6d334b4d416565430d0a61670d0a333b6a6141744d4631576d540d0a2532300d0a313b3150596e30506e75570d0a4f0d0a333b683638724e36554a684f6a476a65450d0a5244450d0a313b4f51316c736a6e344b337049540d0a520d0a333b7257625142566452613868464b4c7352780d0a2532300d0a333b3941694b4d4454596976647476423561347642676739440d0a4259250d0a323b66626d764c5475434855505064644d796d320d0a32300d0a313b716933793571563066467a63556166643245770d0a660d0a313b524b424673594f3574694342360d0a6c0d0a323b6d494343414c0d0a61670d0a313b6c4b4236585632384b72316e33750d0a250d0a333b6d77794e6765520d0a32304c0d0a333b4849337031346574450d0a494d490d0a333b425836546a4a68306e43754c6a6b4d7436646c760d0a5425320d0a333b356e363078326d4b6f5a7063484f6b654f5a7861386774594d0d0a3030250d0a313b39753457534e655a0d0a320d0a323b77536e544967446b0d0a43310d0a323b3372446f630d0a25320d0a333b32636f744f4759700d0a3925320d0a333b35464f6d34706b636346376a5a7973304e776a750d0a4334320d0a313b686f645257386c6a776c6e5a47675a6d4e4c690d0a250d0a323b624e6d784c0d0a32430d0a333b66374e6931536d6c0d0a3125320d0a313b4d46466e573043645637340d0a390d0a313b774b4e4645454445747a4a537075325a4c626d78656a49304c0d0a250d0a333b636e595a5843373633767a5343636c6b5936790d0a3239250d0a323b343731684f545179580d0a33450d0a333b5166684944494c4a675556754937616244410d0a3125320d0a323b753442385358704742536e0d0a39250d0a323b4f374c73474b775169324b30330d0a32430d0a313b6b375639414951544d4661713879745541540d0a530d0a313b6533336944690d0a4c0d0a323b6f75386d44324343504e4870316232474f0d0a45450d0a323b6e4b6f644d0d0a50250d0a323b4b45584b6b43750d0a32380d0a333b495357635771784f39507750724244766772370d0a3125320d0a323b42596b4a4948704c62623147760d0a39250d0a333b783347786e434b776e48370d0a3243390d0a313b7837396f49670d0a370d0a323b734634364168736c7968547a667a756f32520d0a39390d0a333b5a786e37734e69534d7356386d51685244637872594953490d0a2532390d0a333b416e586f37345071447531500d0a2532390d0a323b6f7164664144327247580d0a25370d0a313b39384f494f4358644a6d66664f386c770d0a430d0a333b47565a6e593965457769584a0d0a2537430d0a323b614e376c4b6147544c77735172786342410d0a25320d0a333b6563585745797364467679474162306f61515a7966327239660d0a3726700d0a333b6e7878464c545a65527779646639310d0a6173730d0a323b4952463149314a36490d0a3d780d0a323b3567327643664e0d0a78780d0a300d0a0d0a"
40
+ )
41
+ header = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
42
+ full_response = header + chunked_body
43
+ full_request_hex = binascii.hexlify(full_response)
44
+ extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
45
+ self.assertEqual(
46
+ parse.unquote_to_bytes(extracted_body), # 手动url解码一下
47
+ b"name=hhh'||(SELECT 'RCuG' FROM DUAL WHERE 9766=9766 AND 9799=IF((ORD(MID((SELECT IFNULL(CAST(flag AS CHAR),0x20) FROM test.flag ORDER BY flag LIMIT 0,1),42,1))>1),SLEEP(1),9799))||'&pass=xxx",
48
+ )
49
+
50
+
51
+ if __name__ == "__main__":
52
+ unittest.main()
@@ -1,47 +0,0 @@
1
- import binascii
2
- import gzip
3
- import unittest
4
-
5
- from FlowAnalyzer.PacketParser import PacketParser
6
-
7
-
8
- class TestPacketParserOptimization(unittest.TestCase):
9
- def test_gzip_decompression(self):
10
- # Construct a fake HTTP response with GZIP body
11
- content = b"Hello, Gzip World!"
12
- compressed = gzip.compress(content)
13
- header = b"HTTP/1.1 200 OK\r\nContent-Encoding: gzip\r\n\r\n"
14
- full_response = header + compressed
15
-
16
- full_request_hex = binascii.hexlify(full_response)
17
-
18
- # Test extract_http_file_data
19
- extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
20
-
21
- self.assertEqual(extracted_header, header)
22
- self.assertEqual(extracted_body, content)
23
-
24
- def test_basic_extraction(self):
25
- # Case: Simple text body, no chunking
26
- content = b"Simple Body"
27
- header = b"HTTP/1.1 200 OK\r\n\r\n"
28
- full_response = header + content
29
- full_request_hex = binascii.hexlify(full_response)
30
-
31
- extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
32
- self.assertEqual(extracted_body, content)
33
-
34
- def test_chunked_decoding(self):
35
- # Case: Chunked body
36
- # 5\r\nHello\r\n0\r\n\r\n
37
- chunked_body = b"5\r\nHello\r\n0\r\n\r\n"
38
- header = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n"
39
- full_response = header + chunked_body
40
- full_request_hex = binascii.hexlify(full_response)
41
-
42
- extracted_header, extracted_body = PacketParser.extract_http_file_data(full_request_hex)
43
- self.assertEqual(extracted_body, b"Hello")
44
-
45
-
46
- if __name__ == "__main__":
47
- unittest.main()
File without changes
File without changes
File without changes