FlowAnalyzer 0.4.2__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer/FlowAnalyzer.py +75 -34
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer.egg-info/PKG-INFO +2 -10
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/PKG-INFO +2 -10
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/setup.py +1 -1
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer/Path.py +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer/__init__.py +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer/logging_config.py +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer.egg-info/SOURCES.txt +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer.egg-info/dependency_links.txt +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/FlowAnalyzer.egg-info/top_level.txt +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/LICENSE +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/README.md +0 -0
- {flowanalyzer-0.4.2 → flowanalyzer-0.4.3}/setup.cfg +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import contextlib
|
|
2
|
+
import csv
|
|
2
3
|
import gzip
|
|
3
4
|
import os
|
|
4
5
|
import sqlite3
|
|
@@ -7,8 +8,6 @@ from dataclasses import dataclass
|
|
|
7
8
|
from typing import Iterable, NamedTuple, Optional, Tuple
|
|
8
9
|
from urllib import parse
|
|
9
10
|
|
|
10
|
-
import ijson
|
|
11
|
-
|
|
12
11
|
from .logging_config import logger
|
|
13
12
|
from .Path import get_default_tshark_path
|
|
14
13
|
|
|
@@ -205,6 +204,14 @@ class FlowAnalyzer:
|
|
|
205
204
|
@staticmethod
|
|
206
205
|
def _stream_tshark_to_db(pcap_path: str, display_filter: str, tshark_path: str, db_path: str):
|
|
207
206
|
"""流式解析并存入DB,同时记录元数据"""
|
|
207
|
+
# 增加 CSV 字段大小限制,防止超大包报错
|
|
208
|
+
# 将限制设置为系统最大值,注意 32位系统不要超过 2GB (但 Python int通常是动态的,保险起见设大一点)
|
|
209
|
+
# Windows下 sys.maxsize 通常足够大
|
|
210
|
+
try:
|
|
211
|
+
csv.field_size_limit(500 * 1024 * 1024) # 500 MB
|
|
212
|
+
except Exception:
|
|
213
|
+
# 如果失败,尝试取最大值
|
|
214
|
+
csv.field_size_limit(int(2**31 - 1))
|
|
208
215
|
|
|
209
216
|
if os.path.exists(db_path):
|
|
210
217
|
os.remove(db_path)
|
|
@@ -217,9 +224,6 @@ class FlowAnalyzer:
|
|
|
217
224
|
cursor.execute("CREATE TABLE requests (frame_num INTEGER PRIMARY KEY, header BLOB, file_data BLOB, full_uri TEXT, time_epoch REAL)")
|
|
218
225
|
cursor.execute("CREATE TABLE responses (frame_num INTEGER PRIMARY KEY, header BLOB, file_data BLOB, time_epoch REAL, request_in INTEGER)")
|
|
219
226
|
|
|
220
|
-
# === 核心优化:增加索引,极大加速 SQL JOIN 配对 ===
|
|
221
|
-
cursor.execute("CREATE INDEX idx_resp_req_in ON responses(request_in)")
|
|
222
|
-
|
|
223
227
|
cursor.execute("""
|
|
224
228
|
CREATE TABLE meta_info (
|
|
225
229
|
id INTEGER PRIMARY KEY,
|
|
@@ -231,6 +235,7 @@ class FlowAnalyzer:
|
|
|
231
235
|
""")
|
|
232
236
|
conn.commit()
|
|
233
237
|
|
|
238
|
+
# 修改命令为 -T fields 模式
|
|
234
239
|
command = [
|
|
235
240
|
tshark_path,
|
|
236
241
|
"-r",
|
|
@@ -238,55 +243,75 @@ class FlowAnalyzer:
|
|
|
238
243
|
"-Y",
|
|
239
244
|
f"({display_filter})",
|
|
240
245
|
"-T",
|
|
241
|
-
"
|
|
246
|
+
"fields",
|
|
247
|
+
# 指定输出字段
|
|
242
248
|
"-e",
|
|
243
|
-
"http.response.code",
|
|
249
|
+
"http.response.code", # 0
|
|
244
250
|
"-e",
|
|
245
|
-
"http.request_in",
|
|
251
|
+
"http.request_in", # 1
|
|
246
252
|
"-e",
|
|
247
|
-
"tcp.reassembled.data",
|
|
253
|
+
"tcp.reassembled.data", # 2
|
|
248
254
|
"-e",
|
|
249
|
-
"frame.number",
|
|
255
|
+
"frame.number", # 3
|
|
250
256
|
"-e",
|
|
251
|
-
"tcp.payload",
|
|
257
|
+
"tcp.payload", # 4
|
|
252
258
|
"-e",
|
|
253
|
-
"frame.time_epoch",
|
|
259
|
+
"frame.time_epoch", # 5
|
|
254
260
|
"-e",
|
|
255
|
-
"exported_pdu.exported_pdu",
|
|
261
|
+
"exported_pdu.exported_pdu", # 6
|
|
256
262
|
"-e",
|
|
257
|
-
"http.request.full_uri",
|
|
263
|
+
"http.request.full_uri", # 7
|
|
264
|
+
# 格式控制
|
|
265
|
+
"-E",
|
|
266
|
+
"header=n", # 不输出表头
|
|
267
|
+
"-E",
|
|
268
|
+
"separator=|", # 使用 | 分割 (比逗号更安全)
|
|
269
|
+
"-E",
|
|
270
|
+
"quote=d", # 双引号包裹
|
|
271
|
+
"-E",
|
|
272
|
+
"occurrence=f", # 每个字段只取第一个值 (First)
|
|
258
273
|
]
|
|
259
274
|
|
|
260
275
|
logger.debug(f"执行 Tshark: {command}")
|
|
261
276
|
|
|
262
|
-
|
|
277
|
+
# 使用 utf-8 编码读取 stdout text mode
|
|
278
|
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.abspath(pcap_path)), encoding="utf-8", errors="replace")
|
|
263
279
|
|
|
264
280
|
db_req_rows = []
|
|
265
281
|
db_resp_rows = []
|
|
266
282
|
BATCH_SIZE = 5000
|
|
267
283
|
|
|
268
284
|
try:
|
|
269
|
-
|
|
270
|
-
|
|
285
|
+
# 使用 csv.reader 解析 stdout 流
|
|
286
|
+
reader = csv.reader(process.stdout, delimiter="|", quotechar='"') # type: ignore
|
|
271
287
|
with sqlite3.connect(db_path) as conn:
|
|
272
288
|
cursor = conn.cursor()
|
|
273
289
|
|
|
274
|
-
for
|
|
275
|
-
|
|
276
|
-
|
|
290
|
+
for row in reader:
|
|
291
|
+
# row 是一个列表,对应上面的 -e 顺序
|
|
292
|
+
# [code, req_in, reassembled, frame, payload, epoch, pdu, uri]
|
|
293
|
+
if not row:
|
|
277
294
|
continue
|
|
278
295
|
|
|
279
296
|
try:
|
|
280
|
-
|
|
297
|
+
# 解析数据
|
|
298
|
+
frame_num, request_in, time_epoch, full_uri, full_request = FlowAnalyzer.parse_packet_data(row)
|
|
299
|
+
|
|
281
300
|
if not full_request:
|
|
282
301
|
continue
|
|
302
|
+
|
|
283
303
|
header, file_data = FlowAnalyzer.extract_http_file_data(full_request)
|
|
284
304
|
|
|
285
|
-
|
|
305
|
+
# 判断是请求还是响应
|
|
306
|
+
# http.response.code (index 0) 是否为空
|
|
307
|
+
if row[0]:
|
|
308
|
+
# Response
|
|
286
309
|
db_resp_rows.append((frame_num, header, file_data, time_epoch, request_in))
|
|
287
310
|
else:
|
|
311
|
+
# Request
|
|
288
312
|
db_req_rows.append((frame_num, header, file_data, full_uri, time_epoch))
|
|
289
313
|
|
|
314
|
+
# 批量插入
|
|
290
315
|
if len(db_req_rows) >= BATCH_SIZE:
|
|
291
316
|
cursor.executemany("INSERT OR REPLACE INTO requests VALUES (?,?,?,?,?)", db_req_rows)
|
|
292
317
|
db_req_rows.clear()
|
|
@@ -294,14 +319,19 @@ class FlowAnalyzer:
|
|
|
294
319
|
cursor.executemany("INSERT OR REPLACE INTO responses VALUES (?,?,?,?,?)", db_resp_rows)
|
|
295
320
|
db_resp_rows.clear()
|
|
296
321
|
|
|
297
|
-
except Exception:
|
|
322
|
+
except Exception as e:
|
|
323
|
+
# 偶尔可能会有解析失败的行,跳过即可
|
|
298
324
|
pass
|
|
299
325
|
|
|
326
|
+
# 插入剩余数据
|
|
300
327
|
if db_req_rows:
|
|
301
328
|
cursor.executemany("INSERT OR REPLACE INTO requests VALUES (?,?,?,?,?)", db_req_rows)
|
|
302
329
|
if db_resp_rows:
|
|
303
330
|
cursor.executemany("INSERT OR REPLACE INTO responses VALUES (?,?,?,?,?)", db_resp_rows)
|
|
304
331
|
|
|
332
|
+
# --- 优化点:插入完数据后再创建索引,速度更快 ---
|
|
333
|
+
cursor.execute("CREATE INDEX idx_resp_req_in ON responses(request_in)")
|
|
334
|
+
|
|
305
335
|
pcap_mtime = os.path.getmtime(pcap_path)
|
|
306
336
|
pcap_size = os.path.getsize(pcap_path)
|
|
307
337
|
cursor.execute("INSERT INTO meta_info (filter, pcap_path, pcap_mtime, pcap_size) VALUES (?, ?, ?, ?)", (display_filter, pcap_path, pcap_mtime, pcap_size))
|
|
@@ -319,18 +349,29 @@ class FlowAnalyzer:
|
|
|
319
349
|
# --- 辅助静态方法 ---
|
|
320
350
|
|
|
321
351
|
@staticmethod
|
|
322
|
-
def parse_packet_data(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
352
|
+
def parse_packet_data(row: list) -> Tuple[int, int, float, str, str]:
|
|
353
|
+
# row definition:
|
|
354
|
+
# 0: http.response.code
|
|
355
|
+
# 1: http.request_in
|
|
356
|
+
# 2: tcp.reassembled.data
|
|
357
|
+
# 3: frame.number
|
|
358
|
+
# 4: tcp.payload
|
|
359
|
+
# 5: frame.time_epoch
|
|
360
|
+
# 6: exported_pdu.exported_pdu
|
|
361
|
+
# 7: http.request.full_uri
|
|
362
|
+
|
|
363
|
+
frame_num = int(row[3])
|
|
364
|
+
request_in = int(row[1]) if row[1] else frame_num
|
|
365
|
+
full_uri = parse.unquote(row[7]) if row[7] else ""
|
|
366
|
+
time_epoch = float(row[5])
|
|
367
|
+
|
|
368
|
+
if row[2]:
|
|
369
|
+
full_request = row[2]
|
|
370
|
+
elif row[4]:
|
|
371
|
+
full_request = row[4]
|
|
332
372
|
else:
|
|
333
|
-
full_request =
|
|
373
|
+
full_request = row[6] if row[6] else ""
|
|
374
|
+
|
|
334
375
|
return frame_num, request_in, time_epoch, full_uri, full_request
|
|
335
376
|
|
|
336
377
|
@staticmethod
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: FlowAnalyzer
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
|
|
5
5
|
Home-page: https://github.com/Byxs20/FlowAnalyzer
|
|
6
6
|
Author: Byxs20
|
|
@@ -15,14 +15,6 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Dynamic: author
|
|
19
|
-
Dynamic: author-email
|
|
20
|
-
Dynamic: classifier
|
|
21
|
-
Dynamic: description
|
|
22
|
-
Dynamic: description-content-type
|
|
23
|
-
Dynamic: home-page
|
|
24
|
-
Dynamic: license-file
|
|
25
|
-
Dynamic: summary
|
|
26
18
|
|
|
27
19
|
# FlowAnalyzer
|
|
28
20
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: FlowAnalyzer
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件
|
|
5
5
|
Home-page: https://github.com/Byxs20/FlowAnalyzer
|
|
6
6
|
Author: Byxs20
|
|
@@ -15,14 +15,6 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Dynamic: author
|
|
19
|
-
Dynamic: author-email
|
|
20
|
-
Dynamic: classifier
|
|
21
|
-
Dynamic: description
|
|
22
|
-
Dynamic: description-content-type
|
|
23
|
-
Dynamic: home-page
|
|
24
|
-
Dynamic: license-file
|
|
25
|
-
Dynamic: summary
|
|
26
18
|
|
|
27
19
|
# FlowAnalyzer
|
|
28
20
|
|
|
@@ -7,7 +7,7 @@ with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8"
|
|
|
7
7
|
|
|
8
8
|
setup(
|
|
9
9
|
name="FlowAnalyzer",
|
|
10
|
-
version="0.4.
|
|
10
|
+
version="0.4.3",
|
|
11
11
|
description="FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件",
|
|
12
12
|
author="Byxs20",
|
|
13
13
|
author_email="97766819@qq.com",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|