PyPI - FlowAnalyzer - Versions diffs - 0.2.7__tar.gz → 0.2.9__tar.gz - Mend

FlowAnalyzer 0.2.7tar.gz → 0.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer/FlowAnalyzer.py RENAMED Viewed

@@ -1,35 +1,42 @@
-import os
-import json
+import contextlib
 import gzip
+import hashlib
+import json
+import logging
+import os
 import shutil
-import contextlib
 import subprocess
+from typing import Dict, Iterable, NamedTuple, Optional, Tuple
 from urllib import parse
-from typing import Tuple, Dict, Iterable, NamedTuple
+from .logging_config import configure_logger
+logger = configure_logger("FlowAnalyzer", logging.INFO)
 class Request(NamedTuple):
-    frame_num: int
+    frame_num: Optional[int]
     header: bytes
     file_data: bytes
-    time_epoch: bytes
-    full_uri: str
+    full_uri: Optional[str]
+    time_epoch: Optional[float]
 class Response(NamedTuple):
-    frame_num: int
+    frame_num: Optional[int]
     header: bytes
-    request_in: int
     file_data: bytes
-    time_epoch: bytes
+    request_in: Optional[int]
+    time_epoch: Optional[float]
 class HttpPair(NamedTuple):
-    request: Request
-    response: Response
+    request: Optional[Request]
+    response: Optional[Response]
 class FlowAnalyzer:
     """FlowAnalyzer是一个流量分析器，用于解析和处理tshark导出的JSON数据文件"""
     def __init__(self, jsonPath: str):
         """初始化FlowAnalyzer对象
@@ -42,7 +49,7 @@ class FlowAnalyzer:
         self.check_json_file()
     def check_json_file(self):
-        # sourcery skip: remove-redundant-fstring, replace-interpolation-with-fstring
+        # sourcery skip: replace-interpolation-with-fstring
         """检查JSON文件是否存在并非空
         Raises
@@ -53,7 +60,7 @@ class FlowAnalyzer:
             当JSON文件内容为空时抛出异常
         """
         if not os.path.exists(self.jsonPath):
-            raise FileNotFoundError(f"您的tshark导出的JSON文件没有找到！JSON路径：%s" % self.jsonPath)
+            raise FileNotFoundError("您的tshark导出的JSON文件没有找到！JSON路径：%s" % self.jsonPath)
         if os.path.getsize(self.jsonPath) == 0:
             raise ValueError("您的tshark导出的JSON文件内容为空！JSON路径：%s" % self.jsonPath)
@@ -67,24 +74,42 @@ class FlowAnalyzer:
         tuple
             包含请求字典和响应列表的元组
         """
-        with open(self.jsonPath, "r", encoding='utf-8') as f:
+        with open(self.jsonPath, "r", encoding="utf-8") as f:
             data = json.load(f)
         requests, responses = {}, {}
         for packet in data:
             packet = packet["_source"]["layers"]
             time_epoch = float(packet["frame.time_epoch"][0]) if packet.get("frame.time_epoch") else None
-            full_request = packet["tcp.reassembled.data"][0] if packet.get("tcp.reassembled.data") else packet["tcp.payload"][0]
+            if packet.get("tcp.reassembled.data"):
+                full_request = packet["tcp.reassembled.data"][0]
+            elif packet.get("tcp.payload"):
+                full_request = packet["tcp.payload"][0]
+            else:
+                # exported_pdu.exported_pdu
+                full_request = packet["exported_pdu.exported_pdu"][0]
             frame_num = int(packet["frame.number"][0]) if packet.get("frame.number") else None
             request_in = int(packet["http.request_in"][0]) if packet.get("http.request_in") else frame_num
-            full_uri = parse.unquote(packet["http.request.full_uri"][0]) if packet.get("http.request.full_uri") else None
+            full_uri = (
+                parse.unquote(packet["http.request.full_uri"][0]) if packet.get("http.request.full_uri") else None
+            )
             header, file_data = self.extract_http_file_data(full_request)
             if packet.get("http.response_number"):
-                responses[frame_num] = Response(frame_num=frame_num, request_in=request_in, header=header, file_data=file_data, time_epoch=time_epoch)
+                responses[frame_num] = Response(
+                    frame_num=frame_num,
+                    request_in=request_in,
+                    header=header,
+                    file_data=file_data,
+                    time_epoch=time_epoch,
+                )
             else:
-                requests[frame_num] = Request(frame_num=frame_num, header=header, file_data=file_data, time_epoch=time_epoch, full_uri=full_uri)
+                requests[frame_num] = Request(
+                    frame_num=frame_num, header=header, file_data=file_data, time_epoch=time_epoch, full_uri=full_uri
+                )
         return requests, responses
     def generate_http_dict_pairs(self) -> Iterable[HttpPair]:  # sourcery skip: use-named-expression
@@ -95,7 +120,7 @@ class FlowAnalyzer:
             包含请求和响应信息的字典迭代器
         """
         requests, responses = self.parse_http_json()
-        response_map = {r.request_in : r for r in responses.values()}
+        response_map = {r.request_in: r for r in responses.values()}
         yielded_resps = []
         for req_id, req in requests.items():
             resp = response_map.get(req_id)
@@ -111,8 +136,36 @@ class FlowAnalyzer:
                 resp = resp._replace(request_in=None)
                 yield HttpPair(request=None, response=resp)
+    @staticmethod
+    def get_hash(filePath: str, display_filter: str) -> str:
+        with open(filePath, "rb") as f:
+            return hashlib.md5(f.read() + display_filter.encode()).hexdigest()
+    @staticmethod
+    def extract_json_file(fileName: str, display_filter: str, tshark_workDir: str) -> None:
+        # sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
+        # tshark -r {} -Y "{}" -T json -e http.request_number -e http.response_number -e http.request_in -e tcp.reassembled.data -e frame.number -e tcp.payload -e frame.time_epoch -e http.request.full_uri > output.json
+        command = (
+            'tshark -r {} -Y "(tcp.reassembled_in) or ({})" -T json '
+            '-e http.request_number '
+            '-e http.response_number '
+            '-e http.request_in '
+            '-e tcp.reassembled.data '
+            '-e frame.number '
+            '-e tcp.payload '
+            '-e frame.time_epoch '
+            '-e exported_pdu.exported_pdu '
+            '-e http.request.full_uri '
+            '> output.json'.format(
+                fileName, display_filter
+        ))
+        _, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=tshark_workDir).communicate()
+        if stderr != b"" and b"WARNING" not in stderr:
+            print(f"[Waring/Error]: {stderr}")
     @staticmethod
     def get_json_data(filePath: str, display_filter: str) -> str:
+        # sourcery skip: replace-interpolation-with-fstring
         """获取JSON数据并保存至文件，保存目录是当前工作目录，也就是您运行脚本所在目录
         Parameters
@@ -127,28 +180,34 @@ class FlowAnalyzer:
         str
             保存JSON数据的文件路径
         """
-        # sourcery skip: replace-interpolation-with-fstring, use-fstring-for-formatting
         if not os.path.exists(filePath):
             raise FileNotFoundError("您的填写的流量包没有找到！流量包路径：%s" % filePath)
-        oriDir = os.getcwd()
-        fileDir = os.path.dirname(filePath)
-        jsonPath = os.path.join(fileDir, "output.json")
-        os.chdir(fileDir)
+        MD5Sum = FlowAnalyzer.get_hash(filePath, display_filter)
+        workDir = os.getcwd()
+        tshark_workDir = os.path.dirname(filePath)
+        tshark_jsonPath = os.path.join(tshark_workDir, "output.json")
+        jsonWordPath = os.path.join(workDir, "output.json")
         fileName = os.path.basename(filePath)
-        command = 'tshark -r {} -Y "{}" -T json -e http.request_number -e http.response_number -e http.request_in -e tcp.reassembled.data -e frame.number -e tcp.payload -e frame.time_epoch -e http.request.full_uri > output.json'.format(
-            fileName, display_filter)
-        _, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
-        if stderr != b"" and b"WARNING" not in stderr:
-            print(f"[Waring/Error]: {stderr}")
+        if os.path.exists(jsonWordPath):
+            with open(jsonWordPath, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if data[0].get('MD5Sum') == MD5Sum:
+                logger.debug("匹配HASH校验无误，自动返回Json文件路径!")
+                return jsonWordPath
+        FlowAnalyzer.extract_json_file(fileName, display_filter, tshark_workDir)
-        os.chdir(oriDir)
-        dst_JsonPath = os.path.join(oriDir, "output.json")
-        if jsonPath != dst_JsonPath:
-            shutil.move(jsonPath, dst_JsonPath)
-        return dst_JsonPath
+        if tshark_jsonPath != jsonWordPath:
+            shutil.move(tshark_jsonPath, jsonWordPath)
+        with open(jsonWordPath, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        data[0]['MD5Sum'] = MD5Sum
+        with open(jsonWordPath, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2)
+        return jsonWordPath
     def Split_HTTP_headers(self, file_data: bytes) -> Tuple[bytes, bytes]:
         # sourcery skip: use-named-expression
@@ -178,39 +237,37 @@ class FlowAnalyzer:
         """
         chunks = []
         chunkSizeEnd = file_data.find(b"\n") + 1
-        lineEndings = b'\r\n' if bytes([file_data[chunkSizeEnd-2]]) == b'\r' else b'\n'
+        lineEndings = b"\r\n" if bytes([file_data[chunkSizeEnd - 2]]) == b"\r" else b"\n"
         lineEndingsLength = len(lineEndings)
         while True:
             chunkSize = int(file_data[:chunkSizeEnd], 16)
             if not chunkSize:
                 break
-            chunks.append(file_data[chunkSizeEnd:chunkSize + chunkSizeEnd])
-            file_data = file_data[chunkSizeEnd + chunkSize + lineEndingsLength:]
+            chunks.append(file_data[chunkSizeEnd : chunkSize + chunkSizeEnd])
+            file_data = file_data[chunkSizeEnd + chunkSize + lineEndingsLength :]
             chunkSizeEnd = file_data.find(lineEndings) + lineEndingsLength
-        return b''.join(chunks)
+        return b"".join(chunks)
-    def extract_http_file_data(self, full_request: bytes) -> Tuple[bytes, bytes]:
-        # sourcery skip: merge-else-if-into-elif, swap-if-else-branches
+    def extract_http_file_data(self, full_request: str) -> Tuple[bytes, bytes]:
         """提取HTTP请求或响应中的文件数据
         Parameters
         ----------
         full_request : bytes
             HTTP请求或响应的原始字节流
         Returns
         -------
         tuple
             包含header和file_data的元组
         """
-        full_request = bytes.fromhex(full_request)
-        header, file_data = self.Split_HTTP_headers(full_request)
+        header, file_data = self.Split_HTTP_headers(bytes.fromhex(full_request))
         with contextlib.suppress(Exception):
             file_data = self.Dechunck_HTTP_response(file_data)
         with contextlib.suppress(Exception):
             if file_data.startswith(b"\x1F\x8B"):
                 file_data = gzip.decompress(file_data)
-        return header, file_data
+        return header, file_data

FlowAnalyzer-0.2.9/FlowAnalyzer/logging_config.py ADDED Viewed

@@ -0,0 +1,20 @@
+import logging
+def configure_logger(logger_name, level=logging.DEBUG) -> logging.Logger:
+    # 创建一个 logger 对象
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(level)
+    # 创建一个处理器，将日志输出到控制台
+    console_handler = logging.StreamHandler()
+    logger.addHandler(console_handler)
+    # 创建一个格式化器，定义日志的输出格式
+    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+    console_handler.setFormatter(formatter)
+    return logger
+if __name__ == '__main__':
+    logger = configure_logger("FlowAnalyzer")
+    logger.info("This is a test!")

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: FlowAnalyzer
-Version: 0.2.7
+Version: 0.2.9
 Summary: FlowAnalyzer是一个流量分析器，用于解析和处理tshark导出的JSON数据文件
 Home-page: https://github.com/Byxs20/FlowAnalyzer
 Author: Byxs20
@@ -22,6 +22,8 @@ Description: # FlowAnalyzer
         # Usage
+        请务必添加 `tshark.exe` 到环境变量，否则找不到会出错！
         ```
         $ git clone https://github.com/Byxs20/FlowAnalyzer.git
         $ cd ./FlowAnalyzer/

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,6 +3,7 @@ README.md
 setup.py
 FlowAnalyzer/FlowAnalyzer.py
 FlowAnalyzer/__init__.py
+FlowAnalyzer/logging_config.py
 FlowAnalyzer.egg-info/PKG-INFO
 FlowAnalyzer.egg-info/SOURCES.txt
 FlowAnalyzer.egg-info/dependency_links.txt

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: FlowAnalyzer
-Version: 0.2.7
+Version: 0.2.9
 Summary: FlowAnalyzer是一个流量分析器，用于解析和处理tshark导出的JSON数据文件
 Home-page: https://github.com/Byxs20/FlowAnalyzer
 Author: Byxs20
@@ -22,6 +22,8 @@ Description: # FlowAnalyzer
         # Usage
+        请务必添加 `tshark.exe` 到环境变量，否则找不到会出错！
         ```
         $ git clone https://github.com/Byxs20/FlowAnalyzer.git
         $ cd ./FlowAnalyzer/

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/README.md RENAMED Viewed

@@ -14,6 +14,8 @@ pip3 install FlowAnalyzer -i https://pypi.org/simple
 # Usage
+请务必添加 `tshark.exe` 到环境变量，否则找不到会出错！
 ```
 $ git clone https://github.com/Byxs20/FlowAnalyzer.git
 $ cd ./FlowAnalyzer/

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/setup.py RENAMED Viewed

@@ -1,12 +1,13 @@
 import os
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
 with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8") as f:
     long_description = f.read()
 setup(
     name="FlowAnalyzer",
-    version="0.2.7",
+    version="0.2.9",
     description="FlowAnalyzer是一个流量分析器，用于解析和处理tshark导出的JSON数据文件",
     author="Byxs20",
     author_email="97766819@qq.com",

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer/__init__.py RENAMED Viewed

File without changes

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/FlowAnalyzer.egg-info/top_level.txt RENAMED Viewed

File without changes

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/LICENSE RENAMED Viewed

File without changes

{FlowAnalyzer-0.2.7 → FlowAnalyzer-0.2.9}/setup.cfg RENAMED Viewed

File without changes

FlowAnalyzer 0.2.7__tar.gz → 0.2.9__tar.gz

FlowAnalyzer 0.2.7tar.gz → 0.2.9tar.gz