FlowAnalyzer 0.3.7__tar.gz → 0.3.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer/FlowAnalyzer.py +59 -51
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer.egg-info/PKG-INFO +2 -2
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/PKG-INFO +2 -2
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/README.md +1 -1
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/setup.py +1 -1
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer/Path.py +0 -0
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer/__init__.py +0 -0
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer/logging_config.py +0 -0
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer.egg-info/SOURCES.txt +0 -0
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer.egg-info/dependency_links.txt +0 -0
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/FlowAnalyzer.egg-info/top_level.txt +0 -0
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/LICENSE +0 -0
- {flowanalyzer-0.3.7 → flowanalyzer-0.3.9}/setup.cfg +0 -0
|
@@ -3,7 +3,6 @@ import gzip
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
5
|
import os
|
|
6
|
-
import shutil
|
|
7
6
|
import subprocess
|
|
8
7
|
from typing import Dict, Iterable, NamedTuple, Optional, Tuple
|
|
9
8
|
from urllib import parse
|
|
@@ -36,15 +35,15 @@ class HttpPair(NamedTuple):
|
|
|
36
35
|
class FlowAnalyzer:
|
|
37
36
|
"""FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件"""
|
|
38
37
|
|
|
39
|
-
def __init__(self,
|
|
38
|
+
def __init__(self, json_path: str):
|
|
40
39
|
"""初始化FlowAnalyzer对象
|
|
41
40
|
|
|
42
41
|
Parameters
|
|
43
42
|
----------
|
|
44
|
-
|
|
43
|
+
json_path : str
|
|
45
44
|
tshark导出的JSON文件路径
|
|
46
45
|
"""
|
|
47
|
-
self.
|
|
46
|
+
self.json_path = json_path
|
|
48
47
|
self.check_json_file()
|
|
49
48
|
|
|
50
49
|
def check_json_file(self):
|
|
@@ -58,11 +57,11 @@ class FlowAnalyzer:
|
|
|
58
57
|
ValueError
|
|
59
58
|
当JSON文件内容为空时抛出异常
|
|
60
59
|
"""
|
|
61
|
-
if not os.path.exists(self.
|
|
62
|
-
raise FileNotFoundError("您的tshark导出的JSON文件没有找到!JSON路径:%s" % self.
|
|
60
|
+
if not os.path.exists(self.json_path):
|
|
61
|
+
raise FileNotFoundError("您的tshark导出的JSON文件没有找到!JSON路径:%s" % self.json_path)
|
|
63
62
|
|
|
64
|
-
if os.path.getsize(self.
|
|
65
|
-
raise ValueError("您的tshark导出的JSON文件内容为空!JSON路径:%s" % self.
|
|
63
|
+
if os.path.getsize(self.json_path) == 0:
|
|
64
|
+
raise ValueError("您的tshark导出的JSON文件内容为空!JSON路径:%s" % self.json_path)
|
|
66
65
|
|
|
67
66
|
def parse_packet(self, packet: dict) -> Tuple[int, int, float, str, str]:
|
|
68
67
|
"""解析Json中的关键信息字段
|
|
@@ -92,7 +91,6 @@ class FlowAnalyzer:
|
|
|
92
91
|
return frame_num, request_in, time_epoch, full_uri, full_request
|
|
93
92
|
|
|
94
93
|
def parse_http_json(self) -> Tuple[Dict[int, Request], Dict[int, Response]]:
|
|
95
|
-
# sourcery skip: use-named-expression
|
|
96
94
|
"""解析JSON数据文件中的HTTP请求和响应信息
|
|
97
95
|
|
|
98
96
|
Returns
|
|
@@ -100,7 +98,7 @@ class FlowAnalyzer:
|
|
|
100
98
|
tuple
|
|
101
99
|
包含请求字典和响应列表的元组
|
|
102
100
|
"""
|
|
103
|
-
with open(self.
|
|
101
|
+
with open(self.json_path, "r", encoding="utf-8") as f:
|
|
104
102
|
data = json.load(f)
|
|
105
103
|
|
|
106
104
|
requests, responses = {}, {}
|
|
@@ -149,17 +147,16 @@ class FlowAnalyzer:
|
|
|
149
147
|
yield HttpPair(request=None, response=resp)
|
|
150
148
|
|
|
151
149
|
@staticmethod
|
|
152
|
-
def get_hash(
|
|
153
|
-
with open(
|
|
150
|
+
def get_hash(file_path: str, display_filter: str) -> str:
|
|
151
|
+
with open(file_path, "rb") as f:
|
|
154
152
|
return hashlib.md5(f.read() + display_filter.encode()).hexdigest()
|
|
155
153
|
|
|
156
154
|
@staticmethod
|
|
157
|
-
def extract_json_file(
|
|
158
|
-
# sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
|
|
155
|
+
def extract_json_file(file_name: str, display_filter: str, tshark_path: str, tshark_work_dir: str, json_work_path: str) -> None:
|
|
159
156
|
command = [
|
|
160
157
|
tshark_path,
|
|
161
|
-
"-r",
|
|
162
|
-
"-Y", f"(
|
|
158
|
+
"-r", file_name,
|
|
159
|
+
"-Y", f"({display_filter})",
|
|
163
160
|
"-T", "json",
|
|
164
161
|
"-e", "http.response.code",
|
|
165
162
|
"-e", "http.request_in",
|
|
@@ -169,33 +166,42 @@ class FlowAnalyzer:
|
|
|
169
166
|
"-e", "frame.time_epoch",
|
|
170
167
|
"-e", "exported_pdu.exported_pdu",
|
|
171
168
|
"-e", "http.request.full_uri",
|
|
172
|
-
">", "output.json",
|
|
173
169
|
]
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
170
|
+
logger.debug(f"导出Json命令: {command}")
|
|
171
|
+
|
|
172
|
+
with open(json_work_path, "wb") as output_file:
|
|
173
|
+
process = subprocess.Popen(
|
|
174
|
+
command,
|
|
175
|
+
stdout=output_file,
|
|
176
|
+
stderr=subprocess.PIPE,
|
|
177
|
+
cwd=tshark_work_dir
|
|
178
|
+
)
|
|
179
|
+
_, stderr = process.communicate()
|
|
180
|
+
logger.debug(f"导出Json文件路径: {json_work_path}")
|
|
181
|
+
|
|
182
|
+
if stderr and b"WARNING" not in stderr:
|
|
183
|
+
try:
|
|
184
|
+
print(f"[Warning/Error]: {stderr.decode('utf-8')}")
|
|
185
|
+
except Exception:
|
|
186
|
+
print(f"[Warning/Error]: {stderr.decode('gbk')}")
|
|
178
187
|
|
|
179
188
|
@staticmethod
|
|
180
|
-
def
|
|
181
|
-
|
|
182
|
-
shutil.move(tshark_jsonPath, jsonWordPath)
|
|
183
|
-
|
|
184
|
-
with open(jsonWordPath, "r", encoding="utf-8") as f:
|
|
189
|
+
def add_md5sum(json_work_path: str, md5_sum: str) -> None:
|
|
190
|
+
with open(json_work_path, "r", encoding="utf-8") as f:
|
|
185
191
|
data = json.load(f)
|
|
186
|
-
data[0]["MD5Sum"] =
|
|
192
|
+
data[0]["MD5Sum"] = md5_sum
|
|
187
193
|
|
|
188
|
-
with open(
|
|
194
|
+
with open(json_work_path, "w", encoding="utf-8") as f:
|
|
189
195
|
json.dump(data, f, indent=2)
|
|
190
196
|
|
|
191
197
|
@staticmethod
|
|
192
|
-
def get_json_data(
|
|
198
|
+
def get_json_data(file_path: str, display_filter: str, tshark_path: Optional[str] = None) -> str:
|
|
193
199
|
# sourcery skip: replace-interpolation-with-fstring
|
|
194
200
|
"""获取JSON数据并保存至文件,保存目录是当前工作目录,也就是您运行脚本所在目录
|
|
195
201
|
|
|
196
202
|
Parameters
|
|
197
203
|
----------
|
|
198
|
-
|
|
204
|
+
file_path : str
|
|
199
205
|
待处理的数据文件路径
|
|
200
206
|
display_filter : str
|
|
201
207
|
WireShark的显示过滤器
|
|
@@ -205,36 +211,39 @@ class FlowAnalyzer:
|
|
|
205
211
|
str
|
|
206
212
|
保存JSON数据的文件路径
|
|
207
213
|
"""
|
|
208
|
-
if not os.path.exists(
|
|
209
|
-
raise FileNotFoundError("您的填写的流量包没有找到!流量包路径:%s" %
|
|
214
|
+
if not os.path.exists(file_path):
|
|
215
|
+
raise FileNotFoundError("您的填写的流量包没有找到!流量包路径:%s" % file_path)
|
|
210
216
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
+
md5_sum = FlowAnalyzer.get_hash(file_path, display_filter)
|
|
218
|
+
logger.debug(f"md5校验值: {md5_sum}")
|
|
219
|
+
|
|
220
|
+
work_dir = os.getcwd()
|
|
221
|
+
tshark_command_work_dir = os.path.dirname(os.path.abspath(file_path))
|
|
222
|
+
json_work_path = os.path.join(work_dir, "output.json")
|
|
223
|
+
file_name = os.path.basename(file_path)
|
|
217
224
|
|
|
218
|
-
if os.path.exists(
|
|
225
|
+
if os.path.exists(json_work_path):
|
|
219
226
|
try:
|
|
220
|
-
with open(
|
|
227
|
+
with open(json_work_path, "r", encoding="utf-8") as f:
|
|
221
228
|
data = json.load(f)
|
|
222
|
-
if data[0].get("MD5Sum") ==
|
|
223
|
-
logger.debug("匹配
|
|
224
|
-
return
|
|
229
|
+
if data[0].get("MD5Sum") == md5_sum:
|
|
230
|
+
logger.debug("匹配md5校验无误,自动返回Json文件路径!")
|
|
231
|
+
return json_work_path
|
|
225
232
|
except Exception:
|
|
226
233
|
logger.debug("默认的Json文件无法被正常解析, 正在重新生成Json文件中")
|
|
227
234
|
|
|
228
235
|
tshark_path = FlowAnalyzer.get_tshark_path(tshark_path)
|
|
229
|
-
FlowAnalyzer.extract_json_file(
|
|
230
|
-
FlowAnalyzer.
|
|
231
|
-
return
|
|
236
|
+
FlowAnalyzer.extract_json_file(file_name, display_filter, tshark_path, tshark_command_work_dir, json_work_path)
|
|
237
|
+
FlowAnalyzer.add_md5sum(json_work_path, md5_sum)
|
|
238
|
+
return json_work_path
|
|
232
239
|
|
|
233
240
|
@staticmethod
|
|
234
241
|
def get_tshark_path(tshark_path: Optional[str]) -> str:
|
|
235
242
|
default_tshark_path = get_default_tshark_path()
|
|
236
243
|
if not os.path.exists(default_tshark_path):
|
|
237
244
|
logger.debug("没有检测到tshark存在, 请查看并检查tshark_path")
|
|
245
|
+
else:
|
|
246
|
+
logger.debug("检测到默认tshark存在!")
|
|
238
247
|
|
|
239
248
|
if tshark_path is None:
|
|
240
249
|
logger.debug("您没有传入tshark_path, 请传入tshark_path")
|
|
@@ -253,8 +262,7 @@ class FlowAnalyzer:
|
|
|
253
262
|
exit(-1)
|
|
254
263
|
return use_tshark_path
|
|
255
264
|
|
|
256
|
-
def
|
|
257
|
-
# sourcery skip: use-named-expression
|
|
265
|
+
def split_http_headers(self, file_data: bytes) -> Tuple[bytes, bytes]:
|
|
258
266
|
headerEnd = file_data.find(b"\r\n\r\n")
|
|
259
267
|
if headerEnd != -1:
|
|
260
268
|
headerEnd += 4
|
|
@@ -266,7 +274,7 @@ class FlowAnalyzer:
|
|
|
266
274
|
print("[Warning] 没有找到headers和response的划分位置!")
|
|
267
275
|
return b"", file_data
|
|
268
276
|
|
|
269
|
-
def
|
|
277
|
+
def dechunck_http_response(self, file_data: bytes) -> bytes:
|
|
270
278
|
"""解码分块TCP数据
|
|
271
279
|
|
|
272
280
|
Parameters
|
|
@@ -306,10 +314,10 @@ class FlowAnalyzer:
|
|
|
306
314
|
tuple
|
|
307
315
|
包含header和file_data的元组
|
|
308
316
|
"""
|
|
309
|
-
header, file_data = self.
|
|
317
|
+
header, file_data = self.split_http_headers(bytes.fromhex(full_request))
|
|
310
318
|
|
|
311
319
|
with contextlib.suppress(Exception):
|
|
312
|
-
file_data = self.
|
|
320
|
+
file_data = self.dechunck_http_response(file_data)
|
|
313
321
|
|
|
314
322
|
with contextlib.suppress(Exception):
|
|
315
323
|
if file_data.startswith(b"\x1F\x8B"):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: FlowAnalyzer
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
|
|
5
5
|
Home-page: https://github.com/Byxs20/FlowAnalyzer
|
|
6
6
|
Author: Byxs20
|
|
@@ -52,7 +52,7 @@ tshark_path = r"C:\Program Files\Wireshark\tshark.exe"
|
|
|
52
52
|
```
|
|
53
53
|
$ git clone https://github.com/Byxs20/FlowAnalyzer.git
|
|
54
54
|
$ cd ./FlowAnalyzer/
|
|
55
|
-
$ python
|
|
55
|
+
$ python tests\demo.py
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
运行结果:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: FlowAnalyzer
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
|
|
5
5
|
Home-page: https://github.com/Byxs20/FlowAnalyzer
|
|
6
6
|
Author: Byxs20
|
|
@@ -52,7 +52,7 @@ tshark_path = r"C:\Program Files\Wireshark\tshark.exe"
|
|
|
52
52
|
```
|
|
53
53
|
$ git clone https://github.com/Byxs20/FlowAnalyzer.git
|
|
54
54
|
$ cd ./FlowAnalyzer/
|
|
55
|
-
$ python
|
|
55
|
+
$ python tests\demo.py
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
运行结果:
|
|
@@ -7,7 +7,7 @@ with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8"
|
|
|
7
7
|
|
|
8
8
|
setup(
|
|
9
9
|
name="FlowAnalyzer",
|
|
10
|
-
version="0.3.
|
|
10
|
+
version="0.3.9",
|
|
11
11
|
description="FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件",
|
|
12
12
|
author="Byxs20",
|
|
13
13
|
author_email="97766819@qq.com",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|