FlowAnalyzer 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ import gzip
3
3
  import hashlib
4
4
  import json
5
5
  import os
6
- import shutil
7
6
  import subprocess
8
7
  from typing import Dict, Iterable, NamedTuple, Optional, Tuple
9
8
  from urllib import parse
@@ -36,15 +35,15 @@ class HttpPair(NamedTuple):
36
35
  class FlowAnalyzer:
37
36
  """FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件"""
38
37
 
39
- def __init__(self, jsonPath: str):
38
+ def __init__(self, json_path: str):
40
39
  """初始化FlowAnalyzer对象
41
40
 
42
41
  Parameters
43
42
  ----------
44
- jsonPath : str
43
+ json_path : str
45
44
  tshark导出的JSON文件路径
46
45
  """
47
- self.jsonPath = jsonPath
46
+ self.json_path = json_path
48
47
  self.check_json_file()
49
48
 
50
49
  def check_json_file(self):
@@ -58,11 +57,11 @@ class FlowAnalyzer:
58
57
  ValueError
59
58
  当JSON文件内容为空时抛出异常
60
59
  """
61
- if not os.path.exists(self.jsonPath):
62
- raise FileNotFoundError("您的tshark导出的JSON文件没有找到!JSON路径:%s" % self.jsonPath)
60
+ if not os.path.exists(self.json_path):
61
+ raise FileNotFoundError("您的tshark导出的JSON文件没有找到!JSON路径:%s" % self.json_path)
63
62
 
64
- if os.path.getsize(self.jsonPath) == 0:
65
- raise ValueError("您的tshark导出的JSON文件内容为空!JSON路径:%s" % self.jsonPath)
63
+ if os.path.getsize(self.json_path) == 0:
64
+ raise ValueError("您的tshark导出的JSON文件内容为空!JSON路径:%s" % self.json_path)
66
65
 
67
66
  def parse_packet(self, packet: dict) -> Tuple[int, int, float, str, str]:
68
67
  """解析Json中的关键信息字段
@@ -92,7 +91,6 @@ class FlowAnalyzer:
92
91
  return frame_num, request_in, time_epoch, full_uri, full_request
93
92
 
94
93
  def parse_http_json(self) -> Tuple[Dict[int, Request], Dict[int, Response]]:
95
- # sourcery skip: use-named-expression
96
94
  """解析JSON数据文件中的HTTP请求和响应信息
97
95
 
98
96
  Returns
@@ -100,7 +98,7 @@ class FlowAnalyzer:
100
98
  tuple
101
99
  包含请求字典和响应列表的元组
102
100
  """
103
- with open(self.jsonPath, "r", encoding="utf-8") as f:
101
+ with open(self.json_path, "r", encoding="utf-8") as f:
104
102
  data = json.load(f)
105
103
 
106
104
  requests, responses = {}, {}
@@ -149,17 +147,16 @@ class FlowAnalyzer:
149
147
  yield HttpPair(request=None, response=resp)
150
148
 
151
149
  @staticmethod
152
- def get_hash(filePath: str, display_filter: str) -> str:
153
- with open(filePath, "rb") as f:
150
+ def get_hash(file_path: str, display_filter: str) -> str:
151
+ with open(file_path, "rb") as f:
154
152
  return hashlib.md5(f.read() + display_filter.encode()).hexdigest()
155
153
 
156
154
  @staticmethod
157
- def extract_json_file(fileName: str, display_filter: str, tshark_workDir: str, tshark_path: str) -> None:
158
- # sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
155
+ def extract_json_file(file_name: str, display_filter: str, tshark_path: str, tshark_work_dir: str, json_work_path: str) -> None:
159
156
  command = [
160
157
  tshark_path,
161
- "-r", fileName,
162
- "-Y", f"(tcp.reassembled_in) or ({display_filter})",
158
+ "-r", file_name,
159
+ "-Y", f"({display_filter})",
163
160
  "-T", "json",
164
161
  "-e", "http.response.code",
165
162
  "-e", "http.request_in",
@@ -169,33 +166,42 @@ class FlowAnalyzer:
169
166
  "-e", "frame.time_epoch",
170
167
  "-e", "exported_pdu.exported_pdu",
171
168
  "-e", "http.request.full_uri",
172
- ">", "output.json",
173
169
  ]
174
-
175
- _, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=tshark_workDir).communicate()
176
- if stderr != b"" and b"WARNING" not in stderr:
177
- print(f"[Waring/Error]: {stderr}")
170
+ logger.debug(f"导出Json命令: {command}")
171
+
172
+ with open(json_work_path, "wb") as output_file:
173
+ process = subprocess.Popen(
174
+ command,
175
+ stdout=output_file,
176
+ stderr=subprocess.PIPE,
177
+ cwd=tshark_work_dir
178
+ )
179
+ _, stderr = process.communicate()
180
+ logger.debug(f"导出Json文件路径: {json_work_path}")
181
+
182
+ if stderr and b"WARNING" not in stderr:
183
+ try:
184
+ print(f"[Warning/Error]: {stderr.decode('utf-8')}")
185
+ except Exception:
186
+ print(f"[Warning/Error]: {stderr.decode('gbk')}")
178
187
 
179
188
  @staticmethod
180
- def move_and_addMD5Sum(tshark_jsonPath: str, jsonWordPath: str, MD5Sum: str) -> None:
181
- if tshark_jsonPath != jsonWordPath:
182
- shutil.move(tshark_jsonPath, jsonWordPath)
183
-
184
- with open(jsonWordPath, "r", encoding="utf-8") as f:
189
+ def add_md5sum(json_work_path: str, md5_sum: str) -> None:
190
+ with open(json_work_path, "r", encoding="utf-8") as f:
185
191
  data = json.load(f)
186
- data[0]["MD5Sum"] = MD5Sum
192
+ data[0]["MD5Sum"] = md5_sum
187
193
 
188
- with open(jsonWordPath, "w", encoding="utf-8") as f:
194
+ with open(json_work_path, "w", encoding="utf-8") as f:
189
195
  json.dump(data, f, indent=2)
190
196
 
191
197
  @staticmethod
192
- def get_json_data(filePath: str, display_filter: str, tshark_path: Optional[str] = None) -> str:
198
+ def get_json_data(file_path: str, display_filter: str, tshark_path: Optional[str] = None) -> str:
193
199
  # sourcery skip: replace-interpolation-with-fstring
194
200
  """获取JSON数据并保存至文件,保存目录是当前工作目录,也就是您运行脚本所在目录
195
201
 
196
202
  Parameters
197
203
  ----------
198
- filePath : str
204
+ file_path : str
199
205
  待处理的数据文件路径
200
206
  display_filter : str
201
207
  WireShark的显示过滤器
@@ -205,36 +211,39 @@ class FlowAnalyzer:
205
211
  str
206
212
  保存JSON数据的文件路径
207
213
  """
208
- if not os.path.exists(filePath):
209
- raise FileNotFoundError("您的填写的流量包没有找到!流量包路径:%s" % filePath)
214
+ if not os.path.exists(file_path):
215
+ raise FileNotFoundError("您的填写的流量包没有找到!流量包路径:%s" % file_path)
210
216
 
211
- MD5Sum = FlowAnalyzer.get_hash(filePath, display_filter)
212
- workDir = os.getcwd()
213
- tshark_workDir = os.path.dirname(os.path.abspath(filePath))
214
- tshark_jsonPath = os.path.join(tshark_workDir, "output.json")
215
- jsonWordPath = os.path.join(workDir, "output.json")
216
- fileName = os.path.basename(filePath)
217
+ md5_sum = FlowAnalyzer.get_hash(file_path, display_filter)
218
+ logger.debug(f"md5校验值: {md5_sum}")
219
+
220
+ work_dir = os.getcwd()
221
+ tshark_command_work_dir = os.path.dirname(os.path.abspath(file_path))
222
+ json_work_path = os.path.join(work_dir, "output.json")
223
+ file_name = os.path.basename(file_path)
217
224
 
218
- if os.path.exists(jsonWordPath):
225
+ if os.path.exists(json_work_path):
219
226
  try:
220
- with open(jsonWordPath, "r", encoding="utf-8") as f:
227
+ with open(json_work_path, "r", encoding="utf-8") as f:
221
228
  data = json.load(f)
222
- if data[0].get("MD5Sum") == MD5Sum:
223
- logger.debug("匹配HASH校验无误,自动返回Json文件路径!")
224
- return jsonWordPath
229
+ if data[0].get("MD5Sum") == md5_sum:
230
+ logger.debug("匹配md5校验无误,自动返回Json文件路径!")
231
+ return json_work_path
225
232
  except Exception:
226
233
  logger.debug("默认的Json文件无法被正常解析, 正在重新生成Json文件中")
227
234
 
228
235
  tshark_path = FlowAnalyzer.get_tshark_path(tshark_path)
229
- FlowAnalyzer.extract_json_file(fileName, display_filter, tshark_workDir, tshark_path)
230
- FlowAnalyzer.move_and_addMD5Sum(tshark_jsonPath, jsonWordPath, MD5Sum)
231
- return jsonWordPath
236
+ FlowAnalyzer.extract_json_file(file_name, display_filter, tshark_path, tshark_command_work_dir, json_work_path)
237
+ FlowAnalyzer.add_md5sum(json_work_path, md5_sum)
238
+ return json_work_path
232
239
 
233
240
  @staticmethod
234
241
  def get_tshark_path(tshark_path: Optional[str]) -> str:
235
242
  default_tshark_path = get_default_tshark_path()
236
243
  if not os.path.exists(default_tshark_path):
237
244
  logger.debug("没有检测到tshark存在, 请查看并检查tshark_path")
245
+ else:
246
+ logger.debug("检测到默认tshark存在!")
238
247
 
239
248
  if tshark_path is None:
240
249
  logger.debug("您没有传入tshark_path, 请传入tshark_path")
@@ -253,8 +262,7 @@ class FlowAnalyzer:
253
262
  exit(-1)
254
263
  return use_tshark_path
255
264
 
256
- def Split_HTTP_headers(self, file_data: bytes) -> Tuple[bytes, bytes]:
257
- # sourcery skip: use-named-expression
265
+ def split_http_headers(self, file_data: bytes) -> Tuple[bytes, bytes]:
258
266
  headerEnd = file_data.find(b"\r\n\r\n")
259
267
  if headerEnd != -1:
260
268
  headerEnd += 4
@@ -266,7 +274,7 @@ class FlowAnalyzer:
266
274
  print("[Warning] 没有找到headers和response的划分位置!")
267
275
  return b"", file_data
268
276
 
269
- def Dechunck_HTTP_response(self, file_data: bytes) -> bytes:
277
+ def dechunck_http_response(self, file_data: bytes) -> bytes:
270
278
  """解码分块TCP数据
271
279
 
272
280
  Parameters
@@ -306,10 +314,10 @@ class FlowAnalyzer:
306
314
  tuple
307
315
  包含header和file_data的元组
308
316
  """
309
- header, file_data = self.Split_HTTP_headers(bytes.fromhex(full_request))
317
+ header, file_data = self.split_http_headers(bytes.fromhex(full_request))
310
318
 
311
319
  with contextlib.suppress(Exception):
312
- file_data = self.Dechunck_HTTP_response(file_data)
320
+ file_data = self.dechunck_http_response(file_data)
313
321
 
314
322
  with contextlib.suppress(Exception):
315
323
  if file_data.startswith(b"\x1F\x8B"):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: FlowAnalyzer
3
- Version: 0.3.7
3
+ Version: 0.3.9
4
4
  Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
5
5
  Home-page: https://github.com/Byxs20/FlowAnalyzer
6
6
  Author: Byxs20
@@ -52,7 +52,7 @@ tshark_path = r"C:\Program Files\Wireshark\tshark.exe"
52
52
  ```
53
53
  $ git clone https://github.com/Byxs20/FlowAnalyzer.git
54
54
  $ cd ./FlowAnalyzer/
55
- $ python -m tests.demo
55
+ $ python tests\demo.py
56
56
  ```
57
57
 
58
58
  运行结果:
@@ -0,0 +1,9 @@
1
+ FlowAnalyzer/FlowAnalyzer.py,sha256=ErHea4wQEeGmCgAmWr4xmEuKSSYfXE0kFe7It0xD6Is,12203
2
+ FlowAnalyzer/Path.py,sha256=E5VvucTftp8VTQUffFzFWHotQEYtZL-j7IQPOaleiug,130
3
+ FlowAnalyzer/__init__.py,sha256=vfiHONPTrvjUU3MwhjFOEo3sWfzlhkA6gOLn_4UJ7sg,70
4
+ FlowAnalyzer/logging_config.py,sha256=-RntNJhrBiW7ToXIP1WJjZ4Yf9jmZQ1PTX_er3tDxhw,730
5
+ FlowAnalyzer-0.3.9.dist-info/LICENSE,sha256=ybAV0ECduYBZCpjkHyNALVWRRmT_eM0BDgqUszhwEFU,1080
6
+ FlowAnalyzer-0.3.9.dist-info/METADATA,sha256=OcwMs0sqeUmUv1Y-9NWDaGFswMupCLf-FuJYr68DQX8,1956
7
+ FlowAnalyzer-0.3.9.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
8
+ FlowAnalyzer-0.3.9.dist-info/top_level.txt,sha256=2MtvAF6dEe_eHipw_6G5pFLb2uOCbGnlH0bC4iBtm5A,13
9
+ FlowAnalyzer-0.3.9.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- FlowAnalyzer/FlowAnalyzer.py,sha256=E4hp7anNMlELKF_EOZcO2QCVxRjwrQv3pQcSs3lPw5o,12031
2
- FlowAnalyzer/Path.py,sha256=E5VvucTftp8VTQUffFzFWHotQEYtZL-j7IQPOaleiug,130
3
- FlowAnalyzer/__init__.py,sha256=vfiHONPTrvjUU3MwhjFOEo3sWfzlhkA6gOLn_4UJ7sg,70
4
- FlowAnalyzer/logging_config.py,sha256=-RntNJhrBiW7ToXIP1WJjZ4Yf9jmZQ1PTX_er3tDxhw,730
5
- FlowAnalyzer-0.3.7.dist-info/LICENSE,sha256=ybAV0ECduYBZCpjkHyNALVWRRmT_eM0BDgqUszhwEFU,1080
6
- FlowAnalyzer-0.3.7.dist-info/METADATA,sha256=6vdbTYk2wCH58J6dvnclz5odouEw4EPIossdV75PHu4,1956
7
- FlowAnalyzer-0.3.7.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
8
- FlowAnalyzer-0.3.7.dist-info/top_level.txt,sha256=2MtvAF6dEe_eHipw_6G5pFLb2uOCbGnlH0bC4iBtm5A,13
9
- FlowAnalyzer-0.3.7.dist-info/RECORD,,