FlowAnalyzer 0.2.7__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,35 +1,42 @@
1
- import os
2
- import json
1
+ import contextlib
3
2
  import gzip
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import os
4
7
  import shutil
5
- import contextlib
6
8
  import subprocess
9
+ from typing import Dict, Iterable, NamedTuple, Optional, Tuple
7
10
  from urllib import parse
8
- from typing import Tuple, Dict, Iterable, NamedTuple
9
11
 
12
+ from .logging_config import configure_logger
13
+
14
+ logger = configure_logger("FlowAnalyzer", logging.INFO)
10
15
 
11
16
  class Request(NamedTuple):
12
- frame_num: int
17
+ frame_num: Optional[int]
13
18
  header: bytes
14
19
  file_data: bytes
15
- time_epoch: bytes
16
- full_uri: str
20
+ full_uri: Optional[str]
21
+ time_epoch: Optional[float]
22
+
17
23
 
18
24
  class Response(NamedTuple):
19
- frame_num: int
25
+ frame_num: Optional[int]
20
26
  header: bytes
21
- request_in: int
22
27
  file_data: bytes
23
- time_epoch: bytes
28
+ request_in: Optional[int]
29
+ time_epoch: Optional[float]
30
+
24
31
 
25
32
  class HttpPair(NamedTuple):
26
- request: Request
27
- response: Response
33
+ request: Optional[Request]
34
+ response: Optional[Response]
28
35
 
29
36
 
30
37
  class FlowAnalyzer:
31
38
  """FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件"""
32
-
39
+
33
40
  def __init__(self, jsonPath: str):
34
41
  """初始化FlowAnalyzer对象
35
42
 
@@ -42,7 +49,7 @@ class FlowAnalyzer:
42
49
  self.check_json_file()
43
50
 
44
51
  def check_json_file(self):
45
- # sourcery skip: remove-redundant-fstring, replace-interpolation-with-fstring
52
+ # sourcery skip: replace-interpolation-with-fstring
46
53
  """检查JSON文件是否存在并非空
47
54
 
48
55
  Raises
@@ -53,7 +60,7 @@ class FlowAnalyzer:
53
60
  当JSON文件内容为空时抛出异常
54
61
  """
55
62
  if not os.path.exists(self.jsonPath):
56
- raise FileNotFoundError(f"您的tshark导出的JSON文件没有找到!JSON路径:%s" % self.jsonPath)
63
+ raise FileNotFoundError("您的tshark导出的JSON文件没有找到!JSON路径:%s" % self.jsonPath)
57
64
 
58
65
  if os.path.getsize(self.jsonPath) == 0:
59
66
  raise ValueError("您的tshark导出的JSON文件内容为空!JSON路径:%s" % self.jsonPath)
@@ -67,24 +74,42 @@ class FlowAnalyzer:
67
74
  tuple
68
75
  包含请求字典和响应列表的元组
69
76
  """
70
- with open(self.jsonPath, "r", encoding='utf-8') as f:
77
+ with open(self.jsonPath, "r", encoding="utf-8") as f:
71
78
  data = json.load(f)
72
-
79
+
73
80
  requests, responses = {}, {}
74
81
  for packet in data:
75
82
  packet = packet["_source"]["layers"]
76
83
  time_epoch = float(packet["frame.time_epoch"][0]) if packet.get("frame.time_epoch") else None
77
- full_request = packet["tcp.reassembled.data"][0] if packet.get("tcp.reassembled.data") else packet["tcp.payload"][0]
84
+
85
+ if packet.get("tcp.reassembled.data"):
86
+ full_request = packet["tcp.reassembled.data"][0]
87
+ elif packet.get("tcp.payload"):
88
+ full_request = packet["tcp.payload"][0]
89
+ else:
90
+ # exported_pdu.exported_pdu
91
+ full_request = packet["exported_pdu.exported_pdu"][0]
92
+
78
93
  frame_num = int(packet["frame.number"][0]) if packet.get("frame.number") else None
79
94
  request_in = int(packet["http.request_in"][0]) if packet.get("http.request_in") else frame_num
80
- full_uri = parse.unquote(packet["http.request.full_uri"][0]) if packet.get("http.request.full_uri") else None
81
-
95
+ full_uri = (
96
+ parse.unquote(packet["http.request.full_uri"][0]) if packet.get("http.request.full_uri") else None
97
+ )
98
+
82
99
  header, file_data = self.extract_http_file_data(full_request)
83
-
100
+
84
101
  if packet.get("http.response_number"):
85
- responses[frame_num] = Response(frame_num=frame_num, request_in=request_in, header=header, file_data=file_data, time_epoch=time_epoch)
102
+ responses[frame_num] = Response(
103
+ frame_num=frame_num,
104
+ request_in=request_in,
105
+ header=header,
106
+ file_data=file_data,
107
+ time_epoch=time_epoch,
108
+ )
86
109
  else:
87
- requests[frame_num] = Request(frame_num=frame_num, header=header, file_data=file_data, time_epoch=time_epoch, full_uri=full_uri)
110
+ requests[frame_num] = Request(
111
+ frame_num=frame_num, header=header, file_data=file_data, time_epoch=time_epoch, full_uri=full_uri
112
+ )
88
113
  return requests, responses
89
114
 
90
115
  def generate_http_dict_pairs(self) -> Iterable[HttpPair]: # sourcery skip: use-named-expression
@@ -95,7 +120,7 @@ class FlowAnalyzer:
95
120
  包含请求和响应信息的字典迭代器
96
121
  """
97
122
  requests, responses = self.parse_http_json()
98
- response_map = {r.request_in : r for r in responses.values()}
123
+ response_map = {r.request_in: r for r in responses.values()}
99
124
  yielded_resps = []
100
125
  for req_id, req in requests.items():
101
126
  resp = response_map.get(req_id)
@@ -111,8 +136,36 @@ class FlowAnalyzer:
111
136
  resp = resp._replace(request_in=None)
112
137
  yield HttpPair(request=None, response=resp)
113
138
 
139
+ @staticmethod
140
+ def get_hash(filePath: str, display_filter: str) -> str:
141
+ with open(filePath, "rb") as f:
142
+ return hashlib.md5(f.read() + display_filter.encode()).hexdigest()
143
+
144
+ @staticmethod
145
+ def extract_json_file(fileName: str, display_filter: str, tshark_workDir: str) -> None:
146
+ # sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
147
+ # tshark -r {} -Y "{}" -T json -e http.request_number -e http.response_number -e http.request_in -e tcp.reassembled.data -e frame.number -e tcp.payload -e frame.time_epoch -e http.request.full_uri > output.json
148
+ command = (
149
+ 'tshark -r {} -Y "(tcp.reassembled_in) or ({})" -T json '
150
+ '-e http.request_number '
151
+ '-e http.response_number '
152
+ '-e http.request_in '
153
+ '-e tcp.reassembled.data '
154
+ '-e frame.number '
155
+ '-e tcp.payload '
156
+ '-e frame.time_epoch '
157
+ '-e exported_pdu.exported_pdu '
158
+ '-e http.request.full_uri '
159
+ '> output.json'.format(
160
+ fileName, display_filter
161
+ ))
162
+ _, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=tshark_workDir).communicate()
163
+ if stderr != b"" and b"WARNING" not in stderr:
164
+ print(f"[Waring/Error]: {stderr}")
165
+
114
166
  @staticmethod
115
167
  def get_json_data(filePath: str, display_filter: str) -> str:
168
+ # sourcery skip: replace-interpolation-with-fstring
116
169
  """获取JSON数据并保存至文件,保存目录是当前工作目录,也就是您运行脚本所在目录
117
170
 
118
171
  Parameters
@@ -127,28 +180,34 @@ class FlowAnalyzer:
127
180
  str
128
181
  保存JSON数据的文件路径
129
182
  """
130
- # sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
131
183
  if not os.path.exists(filePath):
132
184
  raise FileNotFoundError("您的填写的流量包没有找到!流量包路径:%s" % filePath)
133
185
 
134
- oriDir = os.getcwd()
135
- fileDir = os.path.dirname(filePath)
136
- jsonPath = os.path.join(fileDir, "output.json")
137
-
138
- os.chdir(fileDir)
186
+ MD5Sum = FlowAnalyzer.get_hash(filePath, display_filter)
187
+ workDir = os.getcwd()
188
+ tshark_workDir = os.path.dirname(filePath)
189
+ tshark_jsonPath = os.path.join(tshark_workDir, "output.json")
190
+ jsonWordPath = os.path.join(workDir, "output.json")
139
191
  fileName = os.path.basename(filePath)
140
- command = 'tshark -r {} -Y "{}" -T json -e http.request_number -e http.response_number -e http.request_in -e tcp.reassembled.data -e frame.number -e tcp.payload -e frame.time_epoch -e http.request.full_uri > output.json'.format(
141
- fileName, display_filter)
142
192
 
143
- _, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
144
- if stderr != b"" and b"WARNING" not in stderr:
145
- print(f"[Waring/Error]: {stderr}")
193
+ if os.path.exists(jsonWordPath):
194
+ with open(jsonWordPath, "r", encoding="utf-8") as f:
195
+ data = json.load(f)
196
+ if data[0].get('MD5Sum') == MD5Sum:
197
+ logger.debug("匹配HASH校验无误,自动返回Json文件路径!")
198
+ return jsonWordPath
199
+ FlowAnalyzer.extract_json_file(fileName, display_filter, tshark_workDir)
146
200
 
147
- os.chdir(oriDir)
148
- dst_JsonPath = os.path.join(oriDir, "output.json")
149
- if jsonPath != dst_JsonPath:
150
- shutil.move(jsonPath, dst_JsonPath)
151
- return dst_JsonPath
201
+ if tshark_jsonPath != jsonWordPath:
202
+ shutil.move(tshark_jsonPath, jsonWordPath)
203
+
204
+ with open(jsonWordPath, "r", encoding="utf-8") as f:
205
+ data = json.load(f)
206
+ data[0]['MD5Sum'] = MD5Sum
207
+
208
+ with open(jsonWordPath, "w", encoding="utf-8") as f:
209
+ json.dump(data, f, indent=2)
210
+ return jsonWordPath
152
211
 
153
212
  def Split_HTTP_headers(self, file_data: bytes) -> Tuple[bytes, bytes]:
154
213
  # sourcery skip: use-named-expression
@@ -178,39 +237,37 @@ class FlowAnalyzer:
178
237
  """
179
238
  chunks = []
180
239
  chunkSizeEnd = file_data.find(b"\n") + 1
181
- lineEndings = b'\r\n' if bytes([file_data[chunkSizeEnd-2]]) == b'\r' else b'\n'
240
+ lineEndings = b"\r\n" if bytes([file_data[chunkSizeEnd - 2]]) == b"\r" else b"\n"
182
241
  lineEndingsLength = len(lineEndings)
183
242
  while True:
184
243
  chunkSize = int(file_data[:chunkSizeEnd], 16)
185
244
  if not chunkSize:
186
245
  break
187
-
188
- chunks.append(file_data[chunkSizeEnd:chunkSize + chunkSizeEnd])
189
- file_data = file_data[chunkSizeEnd + chunkSize + lineEndingsLength:]
246
+
247
+ chunks.append(file_data[chunkSizeEnd : chunkSize + chunkSizeEnd])
248
+ file_data = file_data[chunkSizeEnd + chunkSize + lineEndingsLength :]
190
249
  chunkSizeEnd = file_data.find(lineEndings) + lineEndingsLength
191
- return b''.join(chunks)
250
+ return b"".join(chunks)
192
251
 
193
- def extract_http_file_data(self, full_request: bytes) -> Tuple[bytes, bytes]:
194
- # sourcery skip: merge-else-if-into-elif, swap-if-else-branches
252
+ def extract_http_file_data(self, full_request: str) -> Tuple[bytes, bytes]:
195
253
  """提取HTTP请求或响应中的文件数据
196
-
254
+
197
255
  Parameters
198
256
  ----------
199
257
  full_request : bytes
200
258
  HTTP请求或响应的原始字节流
201
-
259
+
202
260
  Returns
203
261
  -------
204
262
  tuple
205
263
  包含header和file_data的元组
206
264
  """
207
- full_request = bytes.fromhex(full_request)
208
- header, file_data = self.Split_HTTP_headers(full_request)
265
+ header, file_data = self.Split_HTTP_headers(bytes.fromhex(full_request))
209
266
 
210
267
  with contextlib.suppress(Exception):
211
268
  file_data = self.Dechunck_HTTP_response(file_data)
212
-
269
+
213
270
  with contextlib.suppress(Exception):
214
271
  if file_data.startswith(b"\x1F\x8B"):
215
272
  file_data = gzip.decompress(file_data)
216
- return header, file_data
273
+ return header, file_data
@@ -0,0 +1,20 @@
1
+ import logging
2
+
3
+
4
+ def configure_logger(logger_name, level=logging.DEBUG) -> logging.Logger:
5
+ # 创建一个 logger 对象
6
+ logger = logging.getLogger(logger_name)
7
+ logger.setLevel(level)
8
+
9
+ # 创建一个处理器,将日志输出到控制台
10
+ console_handler = logging.StreamHandler()
11
+ logger.addHandler(console_handler)
12
+
13
+ # 创建一个格式化器,定义日志的输出格式
14
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
15
+ console_handler.setFormatter(formatter)
16
+ return logger
17
+
18
+ if __name__ == '__main__':
19
+ logger = configure_logger("FlowAnalyzer")
20
+ logger.info("This is a test!")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: FlowAnalyzer
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
5
5
  Home-page: https://github.com/Byxs20/FlowAnalyzer
6
6
  Author: Byxs20
@@ -22,6 +22,8 @@ Description: # FlowAnalyzer
22
22
 
23
23
  # Usage
24
24
 
25
+ 请务必添加 `tshark.exe` 到环境变量,否则找不到会出错!
26
+
25
27
  ```
26
28
  $ git clone https://github.com/Byxs20/FlowAnalyzer.git
27
29
  $ cd ./FlowAnalyzer/
@@ -3,6 +3,7 @@ README.md
3
3
  setup.py
4
4
  FlowAnalyzer/FlowAnalyzer.py
5
5
  FlowAnalyzer/__init__.py
6
+ FlowAnalyzer/logging_config.py
6
7
  FlowAnalyzer.egg-info/PKG-INFO
7
8
  FlowAnalyzer.egg-info/SOURCES.txt
8
9
  FlowAnalyzer.egg-info/dependency_links.txt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: FlowAnalyzer
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
5
5
  Home-page: https://github.com/Byxs20/FlowAnalyzer
6
6
  Author: Byxs20
@@ -22,6 +22,8 @@ Description: # FlowAnalyzer
22
22
 
23
23
  # Usage
24
24
 
25
+ 请务必添加 `tshark.exe` 到环境变量,否则找不到会出错!
26
+
25
27
  ```
26
28
  $ git clone https://github.com/Byxs20/FlowAnalyzer.git
27
29
  $ cd ./FlowAnalyzer/
@@ -14,6 +14,8 @@ pip3 install FlowAnalyzer -i https://pypi.org/simple
14
14
 
15
15
  # Usage
16
16
 
17
+ 请务必添加 `tshark.exe` 到环境变量,否则找不到会出错!
18
+
17
19
  ```
18
20
  $ git clone https://github.com/Byxs20/FlowAnalyzer.git
19
21
  $ cd ./FlowAnalyzer/
@@ -1,12 +1,13 @@
1
1
  import os
2
- from setuptools import setup, find_packages
2
+
3
+ from setuptools import find_packages, setup
3
4
 
4
5
  with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8") as f:
5
6
  long_description = f.read()
6
7
 
7
8
  setup(
8
9
  name="FlowAnalyzer",
9
- version="0.2.7",
10
+ version="0.2.9",
10
11
  description="FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件",
11
12
  author="Byxs20",
12
13
  author_email="97766819@qq.com",
File without changes
File without changes