FlowAnalyzer 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer/FlowAnalyzer.py +110 -53
- FlowAnalyzer-0.2.9/FlowAnalyzer/logging_config.py +20 -0
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/PKG-INFO +3 -1
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/SOURCES.txt +1 -0
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/PKG-INFO +3 -1
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/README.md +2 -0
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/setup.py +3 -2
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer/__init__.py +0 -0
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/dependency_links.txt +0 -0
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/top_level.txt +0 -0
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/LICENSE +0 -0
- {FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/setup.cfg +0 -0
|
@@ -1,35 +1,42 @@
|
|
|
1
|
-
import
|
|
2
|
-
import json
|
|
1
|
+
import contextlib
|
|
3
2
|
import gzip
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
4
7
|
import shutil
|
|
5
|
-
import contextlib
|
|
6
8
|
import subprocess
|
|
9
|
+
from typing import Dict, Iterable, NamedTuple, Optional, Tuple
|
|
7
10
|
from urllib import parse
|
|
8
|
-
from typing import Tuple, Dict, Iterable, NamedTuple
|
|
9
11
|
|
|
12
|
+
from .logging_config import configure_logger
|
|
13
|
+
|
|
14
|
+
logger = configure_logger("FlowAnalyzer", logging.INFO)
|
|
10
15
|
|
|
11
16
|
class Request(NamedTuple):
|
|
12
|
-
frame_num: int
|
|
17
|
+
frame_num: Optional[int]
|
|
13
18
|
header: bytes
|
|
14
19
|
file_data: bytes
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
full_uri: Optional[str]
|
|
21
|
+
time_epoch: Optional[float]
|
|
22
|
+
|
|
17
23
|
|
|
18
24
|
class Response(NamedTuple):
|
|
19
|
-
frame_num: int
|
|
25
|
+
frame_num: Optional[int]
|
|
20
26
|
header: bytes
|
|
21
|
-
request_in: int
|
|
22
27
|
file_data: bytes
|
|
23
|
-
|
|
28
|
+
request_in: Optional[int]
|
|
29
|
+
time_epoch: Optional[float]
|
|
30
|
+
|
|
24
31
|
|
|
25
32
|
class HttpPair(NamedTuple):
|
|
26
|
-
request: Request
|
|
27
|
-
response: Response
|
|
33
|
+
request: Optional[Request]
|
|
34
|
+
response: Optional[Response]
|
|
28
35
|
|
|
29
36
|
|
|
30
37
|
class FlowAnalyzer:
|
|
31
38
|
"""FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件"""
|
|
32
|
-
|
|
39
|
+
|
|
33
40
|
def __init__(self, jsonPath: str):
|
|
34
41
|
"""初始化FlowAnalyzer对象
|
|
35
42
|
|
|
@@ -42,7 +49,7 @@ class FlowAnalyzer:
|
|
|
42
49
|
self.check_json_file()
|
|
43
50
|
|
|
44
51
|
def check_json_file(self):
|
|
45
|
-
# sourcery skip:
|
|
52
|
+
# sourcery skip: replace-interpolation-with-fstring
|
|
46
53
|
"""检查JSON文件是否存在并非空
|
|
47
54
|
|
|
48
55
|
Raises
|
|
@@ -53,7 +60,7 @@ class FlowAnalyzer:
|
|
|
53
60
|
当JSON文件内容为空时抛出异常
|
|
54
61
|
"""
|
|
55
62
|
if not os.path.exists(self.jsonPath):
|
|
56
|
-
raise FileNotFoundError(
|
|
63
|
+
raise FileNotFoundError("您的tshark导出的JSON文件没有找到!JSON路径:%s" % self.jsonPath)
|
|
57
64
|
|
|
58
65
|
if os.path.getsize(self.jsonPath) == 0:
|
|
59
66
|
raise ValueError("您的tshark导出的JSON文件内容为空!JSON路径:%s" % self.jsonPath)
|
|
@@ -67,24 +74,42 @@ class FlowAnalyzer:
|
|
|
67
74
|
tuple
|
|
68
75
|
包含请求字典和响应列表的元组
|
|
69
76
|
"""
|
|
70
|
-
with open(self.jsonPath, "r", encoding=
|
|
77
|
+
with open(self.jsonPath, "r", encoding="utf-8") as f:
|
|
71
78
|
data = json.load(f)
|
|
72
|
-
|
|
79
|
+
|
|
73
80
|
requests, responses = {}, {}
|
|
74
81
|
for packet in data:
|
|
75
82
|
packet = packet["_source"]["layers"]
|
|
76
83
|
time_epoch = float(packet["frame.time_epoch"][0]) if packet.get("frame.time_epoch") else None
|
|
77
|
-
|
|
84
|
+
|
|
85
|
+
if packet.get("tcp.reassembled.data"):
|
|
86
|
+
full_request = packet["tcp.reassembled.data"][0]
|
|
87
|
+
elif packet.get("tcp.payload"):
|
|
88
|
+
full_request = packet["tcp.payload"][0]
|
|
89
|
+
else:
|
|
90
|
+
# exported_pdu.exported_pdu
|
|
91
|
+
full_request = packet["exported_pdu.exported_pdu"][0]
|
|
92
|
+
|
|
78
93
|
frame_num = int(packet["frame.number"][0]) if packet.get("frame.number") else None
|
|
79
94
|
request_in = int(packet["http.request_in"][0]) if packet.get("http.request_in") else frame_num
|
|
80
|
-
full_uri =
|
|
81
|
-
|
|
95
|
+
full_uri = (
|
|
96
|
+
parse.unquote(packet["http.request.full_uri"][0]) if packet.get("http.request.full_uri") else None
|
|
97
|
+
)
|
|
98
|
+
|
|
82
99
|
header, file_data = self.extract_http_file_data(full_request)
|
|
83
|
-
|
|
100
|
+
|
|
84
101
|
if packet.get("http.response_number"):
|
|
85
|
-
responses[frame_num] = Response(
|
|
102
|
+
responses[frame_num] = Response(
|
|
103
|
+
frame_num=frame_num,
|
|
104
|
+
request_in=request_in,
|
|
105
|
+
header=header,
|
|
106
|
+
file_data=file_data,
|
|
107
|
+
time_epoch=time_epoch,
|
|
108
|
+
)
|
|
86
109
|
else:
|
|
87
|
-
requests[frame_num] = Request(
|
|
110
|
+
requests[frame_num] = Request(
|
|
111
|
+
frame_num=frame_num, header=header, file_data=file_data, time_epoch=time_epoch, full_uri=full_uri
|
|
112
|
+
)
|
|
88
113
|
return requests, responses
|
|
89
114
|
|
|
90
115
|
def generate_http_dict_pairs(self) -> Iterable[HttpPair]: # sourcery skip: use-named-expression
|
|
@@ -95,7 +120,7 @@ class FlowAnalyzer:
|
|
|
95
120
|
包含请求和响应信息的字典迭代器
|
|
96
121
|
"""
|
|
97
122
|
requests, responses = self.parse_http_json()
|
|
98
|
-
response_map = {r.request_in
|
|
123
|
+
response_map = {r.request_in: r for r in responses.values()}
|
|
99
124
|
yielded_resps = []
|
|
100
125
|
for req_id, req in requests.items():
|
|
101
126
|
resp = response_map.get(req_id)
|
|
@@ -111,8 +136,36 @@ class FlowAnalyzer:
|
|
|
111
136
|
resp = resp._replace(request_in=None)
|
|
112
137
|
yield HttpPair(request=None, response=resp)
|
|
113
138
|
|
|
139
|
+
@staticmethod
|
|
140
|
+
def get_hash(filePath: str, display_filter: str) -> str:
|
|
141
|
+
with open(filePath, "rb") as f:
|
|
142
|
+
return hashlib.md5(f.read() + display_filter.encode()).hexdigest()
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def extract_json_file(fileName: str, display_filter: str, tshark_workDir: str) -> None:
|
|
146
|
+
# sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
|
|
147
|
+
# tshark -r {} -Y "{}" -T json -e http.request_number -e http.response_number -e http.request_in -e tcp.reassembled.data -e frame.number -e tcp.payload -e frame.time_epoch -e http.request.full_uri > output.json
|
|
148
|
+
command = (
|
|
149
|
+
'tshark -r {} -Y "(tcp.reassembled_in) or ({})" -T json '
|
|
150
|
+
'-e http.request_number '
|
|
151
|
+
'-e http.response_number '
|
|
152
|
+
'-e http.request_in '
|
|
153
|
+
'-e tcp.reassembled.data '
|
|
154
|
+
'-e frame.number '
|
|
155
|
+
'-e tcp.payload '
|
|
156
|
+
'-e frame.time_epoch '
|
|
157
|
+
'-e exported_pdu.exported_pdu '
|
|
158
|
+
'-e http.request.full_uri '
|
|
159
|
+
'> output.json'.format(
|
|
160
|
+
fileName, display_filter
|
|
161
|
+
))
|
|
162
|
+
_, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=tshark_workDir).communicate()
|
|
163
|
+
if stderr != b"" and b"WARNING" not in stderr:
|
|
164
|
+
print(f"[Waring/Error]: {stderr}")
|
|
165
|
+
|
|
114
166
|
@staticmethod
|
|
115
167
|
def get_json_data(filePath: str, display_filter: str) -> str:
|
|
168
|
+
# sourcery skip: replace-interpolation-with-fstring
|
|
116
169
|
"""获取JSON数据并保存至文件,保存目录是当前工作目录,也就是您运行脚本所在目录
|
|
117
170
|
|
|
118
171
|
Parameters
|
|
@@ -127,28 +180,34 @@ class FlowAnalyzer:
|
|
|
127
180
|
str
|
|
128
181
|
保存JSON数据的文件路径
|
|
129
182
|
"""
|
|
130
|
-
# sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
|
|
131
183
|
if not os.path.exists(filePath):
|
|
132
184
|
raise FileNotFoundError("您的填写的流量包没有找到!流量包路径:%s" % filePath)
|
|
133
185
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
os.
|
|
186
|
+
MD5Sum = FlowAnalyzer.get_hash(filePath, display_filter)
|
|
187
|
+
workDir = os.getcwd()
|
|
188
|
+
tshark_workDir = os.path.dirname(filePath)
|
|
189
|
+
tshark_jsonPath = os.path.join(tshark_workDir, "output.json")
|
|
190
|
+
jsonWordPath = os.path.join(workDir, "output.json")
|
|
139
191
|
fileName = os.path.basename(filePath)
|
|
140
|
-
command = 'tshark -r {} -Y "{}" -T json -e http.request_number -e http.response_number -e http.request_in -e tcp.reassembled.data -e frame.number -e tcp.payload -e frame.time_epoch -e http.request.full_uri > output.json'.format(
|
|
141
|
-
fileName, display_filter)
|
|
142
192
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
193
|
+
if os.path.exists(jsonWordPath):
|
|
194
|
+
with open(jsonWordPath, "r", encoding="utf-8") as f:
|
|
195
|
+
data = json.load(f)
|
|
196
|
+
if data[0].get('MD5Sum') == MD5Sum:
|
|
197
|
+
logger.debug("匹配HASH校验无误,自动返回Json文件路径!")
|
|
198
|
+
return jsonWordPath
|
|
199
|
+
FlowAnalyzer.extract_json_file(fileName, display_filter, tshark_workDir)
|
|
146
200
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
201
|
+
if tshark_jsonPath != jsonWordPath:
|
|
202
|
+
shutil.move(tshark_jsonPath, jsonWordPath)
|
|
203
|
+
|
|
204
|
+
with open(jsonWordPath, "r", encoding="utf-8") as f:
|
|
205
|
+
data = json.load(f)
|
|
206
|
+
data[0]['MD5Sum'] = MD5Sum
|
|
207
|
+
|
|
208
|
+
with open(jsonWordPath, "w", encoding="utf-8") as f:
|
|
209
|
+
json.dump(data, f, indent=2)
|
|
210
|
+
return jsonWordPath
|
|
152
211
|
|
|
153
212
|
def Split_HTTP_headers(self, file_data: bytes) -> Tuple[bytes, bytes]:
|
|
154
213
|
# sourcery skip: use-named-expression
|
|
@@ -178,39 +237,37 @@ class FlowAnalyzer:
|
|
|
178
237
|
"""
|
|
179
238
|
chunks = []
|
|
180
239
|
chunkSizeEnd = file_data.find(b"\n") + 1
|
|
181
|
-
lineEndings = b
|
|
240
|
+
lineEndings = b"\r\n" if bytes([file_data[chunkSizeEnd - 2]]) == b"\r" else b"\n"
|
|
182
241
|
lineEndingsLength = len(lineEndings)
|
|
183
242
|
while True:
|
|
184
243
|
chunkSize = int(file_data[:chunkSizeEnd], 16)
|
|
185
244
|
if not chunkSize:
|
|
186
245
|
break
|
|
187
|
-
|
|
188
|
-
chunks.append(file_data[chunkSizeEnd:chunkSize + chunkSizeEnd])
|
|
189
|
-
file_data = file_data[chunkSizeEnd + chunkSize + lineEndingsLength:]
|
|
246
|
+
|
|
247
|
+
chunks.append(file_data[chunkSizeEnd : chunkSize + chunkSizeEnd])
|
|
248
|
+
file_data = file_data[chunkSizeEnd + chunkSize + lineEndingsLength :]
|
|
190
249
|
chunkSizeEnd = file_data.find(lineEndings) + lineEndingsLength
|
|
191
|
-
return b
|
|
250
|
+
return b"".join(chunks)
|
|
192
251
|
|
|
193
|
-
def extract_http_file_data(self, full_request:
|
|
194
|
-
# sourcery skip: merge-else-if-into-elif, swap-if-else-branches
|
|
252
|
+
def extract_http_file_data(self, full_request: str) -> Tuple[bytes, bytes]:
|
|
195
253
|
"""提取HTTP请求或响应中的文件数据
|
|
196
|
-
|
|
254
|
+
|
|
197
255
|
Parameters
|
|
198
256
|
----------
|
|
199
257
|
full_request : bytes
|
|
200
258
|
HTTP请求或响应的原始字节流
|
|
201
|
-
|
|
259
|
+
|
|
202
260
|
Returns
|
|
203
261
|
-------
|
|
204
262
|
tuple
|
|
205
263
|
包含header和file_data的元组
|
|
206
264
|
"""
|
|
207
|
-
|
|
208
|
-
header, file_data = self.Split_HTTP_headers(full_request)
|
|
265
|
+
header, file_data = self.Split_HTTP_headers(bytes.fromhex(full_request))
|
|
209
266
|
|
|
210
267
|
with contextlib.suppress(Exception):
|
|
211
268
|
file_data = self.Dechunck_HTTP_response(file_data)
|
|
212
|
-
|
|
269
|
+
|
|
213
270
|
with contextlib.suppress(Exception):
|
|
214
271
|
if file_data.startswith(b"\x1F\x8B"):
|
|
215
272
|
file_data = gzip.decompress(file_data)
|
|
216
|
-
return header, file_data
|
|
273
|
+
return header, file_data
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def configure_logger(logger_name, level=logging.DEBUG) -> logging.Logger:
|
|
5
|
+
# 创建一个 logger 对象
|
|
6
|
+
logger = logging.getLogger(logger_name)
|
|
7
|
+
logger.setLevel(level)
|
|
8
|
+
|
|
9
|
+
# 创建一个处理器,将日志输出到控制台
|
|
10
|
+
console_handler = logging.StreamHandler()
|
|
11
|
+
logger.addHandler(console_handler)
|
|
12
|
+
|
|
13
|
+
# 创建一个格式化器,定义日志的输出格式
|
|
14
|
+
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
|
15
|
+
console_handler.setFormatter(formatter)
|
|
16
|
+
return logger
|
|
17
|
+
|
|
18
|
+
if __name__ == '__main__':
|
|
19
|
+
logger = configure_logger("FlowAnalyzer")
|
|
20
|
+
logger.info("This is a test!")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: FlowAnalyzer
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
|
|
5
5
|
Home-page: https://github.com/Byxs20/FlowAnalyzer
|
|
6
6
|
Author: Byxs20
|
|
@@ -22,6 +22,8 @@ Description: # FlowAnalyzer
|
|
|
22
22
|
|
|
23
23
|
# Usage
|
|
24
24
|
|
|
25
|
+
请务必添加 `tshark.exe` 到环境变量,否则找不到会出错!
|
|
26
|
+
|
|
25
27
|
```
|
|
26
28
|
$ git clone https://github.com/Byxs20/FlowAnalyzer.git
|
|
27
29
|
$ cd ./FlowAnalyzer/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: FlowAnalyzer
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
|
|
5
5
|
Home-page: https://github.com/Byxs20/FlowAnalyzer
|
|
6
6
|
Author: Byxs20
|
|
@@ -22,6 +22,8 @@ Description: # FlowAnalyzer
|
|
|
22
22
|
|
|
23
23
|
# Usage
|
|
24
24
|
|
|
25
|
+
请务必添加 `tshark.exe` 到环境变量,否则找不到会出错!
|
|
26
|
+
|
|
25
27
|
```
|
|
26
28
|
$ git clone https://github.com/Byxs20/FlowAnalyzer.git
|
|
27
29
|
$ cd ./FlowAnalyzer/
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from setuptools import find_packages, setup
|
|
3
4
|
|
|
4
5
|
with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8") as f:
|
|
5
6
|
long_description = f.read()
|
|
6
7
|
|
|
7
8
|
setup(
|
|
8
9
|
name="FlowAnalyzer",
|
|
9
|
-
version="0.2.
|
|
10
|
+
version="0.2.9",
|
|
10
11
|
description="FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件",
|
|
11
12
|
author="Byxs20",
|
|
12
13
|
author_email="97766819@qq.com",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|