FlowAnalyzer 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer/FlowAnalyzer.py +49 -52
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer.egg-info/PKG-INFO +1 -1
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/PKG-INFO +1 -1
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/setup.py +1 -1
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer/Path.py +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer/__init__.py +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer/logging_config.py +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer.egg-info/SOURCES.txt +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer.egg-info/dependency_links.txt +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/FlowAnalyzer.egg-info/top_level.txt +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/LICENSE +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/README.md +0 -0
- {flowanalyzer-0.4.1 → flowanalyzer-0.4.2}/setup.cfg +0 -0
|
@@ -4,7 +4,7 @@ import os
|
|
|
4
4
|
import sqlite3
|
|
5
5
|
import subprocess
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Iterable, NamedTuple, Optional, Tuple
|
|
8
8
|
from urllib import parse
|
|
9
9
|
|
|
10
10
|
import ijson
|
|
@@ -69,57 +69,54 @@ class FlowAnalyzer:
|
|
|
69
69
|
if not os.path.exists(self.db_path):
|
|
70
70
|
raise FileNotFoundError(f"未找到数据文件或缓存数据库: {self.db_path},请先调用 get_json_data 生成。")
|
|
71
71
|
|
|
72
|
-
def
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
with sqlite3.connect(self.db_path) as conn:
|
|
77
|
-
cursor = conn.cursor()
|
|
72
|
+
def generate_http_dict_pairs(self) -> Iterable[HttpPair]:
|
|
73
|
+
"""生成HTTP请求和响应信息的字典对 (SQL JOIN 高性能版)"""
|
|
74
|
+
if not os.path.exists(self.db_path):
|
|
75
|
+
return
|
|
78
76
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
77
|
+
with sqlite3.connect(self.db_path) as conn:
|
|
78
|
+
cursor = conn.cursor()
|
|
79
|
+
# 开启查询优化
|
|
80
|
+
cursor.execute("PRAGMA query_only = 1;")
|
|
81
|
+
|
|
82
|
+
# === 第一步:配对查询 ===
|
|
83
|
+
# 利用 SQLite 的 LEFT JOIN 直接匹配请求和响应
|
|
84
|
+
# 避免将所有数据加载到 Python 内存中
|
|
85
|
+
sql_pair = """
|
|
86
|
+
SELECT
|
|
87
|
+
req.frame_num, req.header, req.file_data, req.full_uri, req.time_epoch, -- 0-4 (Request)
|
|
88
|
+
resp.frame_num, resp.header, resp.file_data, resp.time_epoch, resp.request_in -- 5-9 (Response)
|
|
89
|
+
FROM requests req
|
|
90
|
+
LEFT JOIN responses resp ON req.frame_num = resp.request_in
|
|
91
|
+
ORDER BY req.frame_num ASC
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
cursor.execute(sql_pair)
|
|
95
|
+
|
|
96
|
+
# 流式遍历结果,内存占用极低
|
|
97
|
+
for row in cursor:
|
|
98
|
+
# 构建 Request 对象
|
|
99
|
+
# 注意处理 NULL 情况,虽然 requests 表理论上不为空,但防万一用 or b''
|
|
100
|
+
req = Request(frame_num=row[0], header=row[1] or b"", file_data=row[2] or b"", full_uri=row[3] or "", time_epoch=row[4])
|
|
101
|
+
|
|
102
|
+
resp = None
|
|
103
|
+
# 如果 row[5] (Response frame_num) 不为空,说明匹配到了响应
|
|
104
|
+
if row[5] is not None:
|
|
105
|
+
resp = Response(frame_num=row[5], header=row[6] or b"", file_data=row[7] or b"", time_epoch=row[8], _request_in=row[9])
|
|
106
106
|
|
|
107
|
-
def generate_http_dict_pairs(self) -> Iterable[HttpPair]:
|
|
108
|
-
"""生成HTTP请求和响应信息的字典对"""
|
|
109
|
-
requests, responses = self._load_from_db()
|
|
110
|
-
response_map = {r._request_in: r for r in responses.values()}
|
|
111
|
-
yielded_resps = set()
|
|
112
|
-
|
|
113
|
-
for req_id, req in requests.items():
|
|
114
|
-
resp = response_map.get(req_id)
|
|
115
|
-
if resp:
|
|
116
|
-
yielded_resps.add(resp.frame_num)
|
|
117
107
|
yield HttpPair(request=req, response=resp)
|
|
118
|
-
else:
|
|
119
|
-
yield HttpPair(request=req, response=None)
|
|
120
108
|
|
|
121
|
-
|
|
122
|
-
|
|
109
|
+
# === 第二步:孤儿响应查询 ===
|
|
110
|
+
# 找出那些有 request_in 但找不到对应 Request 的响应包
|
|
111
|
+
sql_orphan = """
|
|
112
|
+
SELECT frame_num, header, file_data, time_epoch, request_in
|
|
113
|
+
FROM responses
|
|
114
|
+
WHERE request_in NOT IN (SELECT frame_num FROM requests)
|
|
115
|
+
"""
|
|
116
|
+
cursor.execute(sql_orphan)
|
|
117
|
+
|
|
118
|
+
for row in cursor:
|
|
119
|
+
resp = Response(frame_num=row[0], header=row[1] or b"", file_data=row[2] or b"", time_epoch=row[3], _request_in=row[4])
|
|
123
120
|
yield HttpPair(request=None, response=resp)
|
|
124
121
|
|
|
125
122
|
# =========================================================================
|
|
@@ -220,6 +217,9 @@ class FlowAnalyzer:
|
|
|
220
217
|
cursor.execute("CREATE TABLE requests (frame_num INTEGER PRIMARY KEY, header BLOB, file_data BLOB, full_uri TEXT, time_epoch REAL)")
|
|
221
218
|
cursor.execute("CREATE TABLE responses (frame_num INTEGER PRIMARY KEY, header BLOB, file_data BLOB, time_epoch REAL, request_in INTEGER)")
|
|
222
219
|
|
|
220
|
+
# === 核心优化:增加索引,极大加速 SQL JOIN 配对 ===
|
|
221
|
+
cursor.execute("CREATE INDEX idx_resp_req_in ON responses(request_in)")
|
|
222
|
+
|
|
223
223
|
cursor.execute("""
|
|
224
224
|
CREATE TABLE meta_info (
|
|
225
225
|
id INTEGER PRIMARY KEY,
|
|
@@ -345,10 +345,7 @@ class FlowAnalyzer:
|
|
|
345
345
|
|
|
346
346
|
@staticmethod
|
|
347
347
|
def dechunck_http_response(file_data: bytes) -> bytes:
|
|
348
|
-
"""解码分块TCP数据
|
|
349
|
-
注意:如果数据不是 Chunked 格式,此函数必须抛出异常,
|
|
350
|
-
以便外层逻辑回退到使用原始数据。
|
|
351
|
-
"""
|
|
348
|
+
"""解码分块TCP数据"""
|
|
352
349
|
if not file_data:
|
|
353
350
|
return b""
|
|
354
351
|
|
|
@@ -7,7 +7,7 @@ with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8"
|
|
|
7
7
|
|
|
8
8
|
setup(
|
|
9
9
|
name="FlowAnalyzer",
|
|
10
|
-
version="0.4.
|
|
10
|
+
version="0.4.2",
|
|
11
11
|
description="FlowAnalyzer是一个流量分析器,用于解析和处理tshark导出的JSON数据文件",
|
|
12
12
|
author="Byxs20",
|
|
13
13
|
author_email="97766819@qq.com",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|