PyPI - http-content-parser - Versions diffs - 0.0.27__tar.gz → 0.0.29__tar.gz - Mend

http-content-parser 0.0.27tar.gz → 0.0.29tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{http_content_parser-0.0.27 → http_content_parser-0.0.29}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: http_content_parser
-Version: 0.0.27
+Version: 0.0.29
 Summary: parse http's payload and response
 Author-email: leo <suleiabc@gmail.com>
 License-File: LICENSE

{http_content_parser-0.0.27 → http_content_parser-0.0.29}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "http_content_parser"
-version = "0.0.27"
+version = "0.0.29"
 authors = [{ name = "leo", email = "suleiabc@gmail.com" }]
 description = "parse http's payload and response"
 readme = "README.md"

{http_content_parser-0.0.27 → http_content_parser-0.0.29}/src/http_content_parser/api_parser.py RENAMED Viewed

@@ -37,36 +37,65 @@ class ApiModelParser:
         new_payload_list = self.__handle_duplicate_api_label_for_dict(payload_list)
         return new_payload_list
-    def convert_curl_to_list(
-        self, curl_file_path: str, url_filter=None
-    ) -> list[dict]:
+    def convert_curl_to_list(self, curl_file_path: str, url_filter=None) -> list[dict]:
+        with open(curl_file_path, "rt") as f:
+            raw_text = f.read()
         curl_parser = CurlParser()
+        requests = curl_parser.parse_multiple(raw_text, url_filter=url_filter)
         payload_list = []
-        with open(curl_file_path, "rt") as f:
-            lines = f.readlines()
-            line_num_array = curl_parser.get_curl_line_num_scope(lines=lines)
-            for s, e in line_num_array:
-                res = curl_parser.split_curl_to_struct(lines, s, e, url_filter)
-                url_content = curl_parser.parse_url(res.get("original_url", ""))
-                path_str = url_content.get("path", "")[1:]
-                method = (res.get("method", "") or "").lower()
-                label = self.__replace_api_label_chars(path_str) + "_" + method
-                req_data = {
+        for req in requests:
+            path_str = req.path.lstrip("/")
+            method = req.method.lower()
+            label = self.__replace_api_label_chars(path_str) + "_" + method
+            payload_list.append(
+                {
                     "path": path_str,
-                    "header": json.dumps(res.get("header", {})),
-                    "body": res.get("body", {}),
+                    "header": json.dumps(req.headers),
+                    "body": req.body or {},
                     "query_param": (
-                        json.dumps(url_content.get("query_params", {}))
-                        if url_content.get("query_params")
-                        else {}
+                        json.dumps(req.query_params) if req.query_params else {}
                     ),
-                    "original_url": res.get("original_url", ""),
+                    "original_url": req.original_url,
                     "method": method,
                     "temp_api_label": label,
                 }
-                payload_list.append(req_data)
+            )
         return payload_list
+    # def convert_curl_to_list_old(
+    #     self, curl_file_path: str, url_filter=None
+    # ) -> list[dict]:
+    #     curl_parser = CurlParser()
+    #     payload_list = []
+    #     with open(curl_file_path, "rt") as f:
+    #         lines = f.readlines()
+    #         line_num_array = curl_parser.get_curl_line_num_scope(lines=lines)
+    #         for s, e in line_num_array:
+    #             res = curl_parser.split_curl_to_struct(lines, s, e, url_filter)
+    #             url_content = curl_parser.parse_url(res.get("original_url", ""))
+    #             path_str = url_content.get("path", "")[1:]
+    #             method = (res.get("method", "") or "").lower()
+    #             label = self.__replace_api_label_chars(path_str) + "_" + method
+    #             req_data = {
+    #                 "path": path_str,
+    #                 "header": json.dumps(res.get("header", {})),
+    #                 "body": res.get("body", {}),
+    #                 "query_param": (
+    #                     json.dumps(url_content.get("query_params", {}))
+    #                     if url_content.get("query_params")
+    #                     else {}
+    #                 ),
+    #                 "original_url": res.get("original_url", ""),
+    #                 "method": method,
+    #                 "temp_api_label": label,
+    #             }
+    #             payload_list.append(req_data)
+    #     return payload_list
     def convert_postman_to_list(self, postman_dict: dict) -> list[dict]:
         api_infos = parse_postman(postman_dict) or []
         payload_list = []

http_content_parser-0.0.29/src/http_content_parser/curl_parser.py ADDED Viewed

@@ -0,0 +1,327 @@
+# -*- coding: UTF-8 -*-
+"""
+curl_parser.py
+==============
+A structured parser for curl commands.
+Supports:
+  - Single and multi-line curl commands
+  - -X / --request METHOD
+  - -H / --header 'Key: Value'
+  - --data / --data-raw / --data-binary / --data-urlencode body
+  - -u / --user 'user:pass'  →  Basic-Auth header
+  - -b / --cookie 'key=val'
+  - --compressed, -L, etc. (silently ignored)
+  - URL query-string decomposition
+  - Multiple curl commands in one text block
+"""
+from collections import defaultdict
+import re
+from urllib.parse import parse_qsl, urlparse
+from dataclasses import dataclass, field
+from typing import Optional
+# ---------------------------------------------------------------------------
+# Data model
+# ---------------------------------------------------------------------------
+@dataclass
+class CurlRequest:
+    method: str = "get"
+    original_url: str = ""
+    headers: dict = field(default_factory=dict)
+    body: Optional[str] = None
+    # Parsed URL components (populated lazily)
+    scheme: str = ""
+    netloc: str = ""
+    path: str = ""
+    query: str = ""
+    query_params: dict = field(default_factory=dict)
+    def populate_url_parts(self) -> None:
+        """Decompose original_url into scheme / netloc / path / query."""
+        url = self.original_url
+        if url and "://" not in url:
+            url = "http://" + url
+        parsed = urlparse(url)
+        self.scheme = parsed.scheme
+        self.netloc = parsed.netloc
+        self.path = parsed.path
+        self.query = parsed.query
+        params = defaultdict(list)
+        for k, v in parse_qsl(parsed.query):
+            params[k].append(v)
+        # 再按需决定单值展开还是保持列表
+        self.query_params = {k: v[0] if len(v) == 1 else v for k, v in params.items()}
+    def to_dict(self) -> dict:
+        return {
+            "method": self.method,
+            "original_url": self.original_url,
+            "scheme": self.scheme,
+            "netloc": self.netloc,
+            "path": self.path,
+            "query": self.query,
+            "query_params": self.query_params,
+            "headers": self.headers,
+            "body": self.body,
+        }
+# ---------------------------------------------------------------------------
+# Token / line helpers
+# ---------------------------------------------------------------------------
+class _LineNormalizer:
+    """Collapse multi-line curl commands into logical single lines."""
+    @staticmethod
+    def join_continuations(raw_text: str) -> list[str]:
+        """
+        Replace line-continuation backslashes so each logical curl command
+        becomes one long string, then split on actual line breaks.
+        """
+        # A backslash immediately before a newline means "continue this command"
+        joined = re.sub(r"\\\s*\n", " ", raw_text)
+        return [line.strip() for line in joined.splitlines() if line.strip()]
+    @staticmethod
+    def split_into_curl_blocks(lines: list[str]) -> list[str]:
+        """
+        Group lines that belong to the same curl invocation.
+        Each block starts with a line containing 'curl'.
+        """
+        blocks: list[list[str]] = []
+        current: list[str] = []
+        for line in lines:
+            if re.match(r"^\s*curl\b", line, re.IGNORECASE):
+                if current:
+                    blocks.append(" ".join(current))
+                current = [line]
+            else:
+                current.append(line)
+        if current:
+            blocks.append(" ".join(current))
+        return blocks
+# ---------------------------------------------------------------------------
+# Field extractors  (one per curl concept)
+# ---------------------------------------------------------------------------
+class _UrlExtractor:
+    # Match http(s):// URLs – capture everything up to whitespace or quote
+    _HTTP_RE = re.compile(r"""(https?://[^\s'"\\]+)""")
+    # Bare-path fallback (Postman style): starts with /
+    _PATH_RE = re.compile(r"""(?<!\w)(/[^\s'"\\]+)""")
+    @classmethod
+    def extract(cls, command: str) -> str:
+        m = cls._HTTP_RE.search(command)
+        if m:
+            return m.group(1).rstrip("'\"")
+        m = cls._PATH_RE.search(command)
+        if m:
+            return m.group(1).rstrip("'\"")
+        return ""
+class _MethodExtractor:
+    # Explicit: -X POST  or  --request DELETE
+    _EXPLICIT_RE = re.compile(
+        r"(?:-X|--request)\s+['\"]?([A-Za-z]+)['\"]?", re.IGNORECASE
+    )
+    # Inline on curl line: curl -X POST  or  curl POST http://...
+    _INLINE_RE = re.compile(
+        r"\bcurl\b.*?\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b",
+        re.IGNORECASE,
+    )
+    @classmethod
+    def extract(cls, command: str) -> Optional[str]:
+        m = cls._EXPLICIT_RE.search(command)
+        if m:
+            return m.group(1).lower()
+        m = cls._INLINE_RE.search(command)
+        if m:
+            return m.group(1).lower()
+        return None
+class _HeaderExtractor:
+    # -H 'Key: Value'  or  --header "Key: Value"
+    _RE = re.compile(r"""(?:-H|--header)\s+['"](.+?)['"]""")
+    @classmethod
+    def extract_all(cls, command: str) -> dict[str, str]:
+        headers: dict[str, str] = {}
+        for m in cls._RE.finditer(command):
+            raw = m.group(1)
+            # Split on the first colon only
+            parts = raw.split(":", 1)
+            key = parts[0].strip()
+            val = parts[1].strip() if len(parts) > 1 else ""
+            headers[key] = val
+        return headers
+class _BasicAuthExtractor:
+    # -u user:pass  or  --user 'user:pass'
+    _RE = re.compile(r"""(?:-u|--user)\s+['"]?([^\s'"]+)['"]?""")
+    @classmethod
+    def extract(cls, command: str) -> Optional[str]:
+        """Return a Basic-Auth header value string, or None."""
+        import base64
+        m = cls._RE.search(command)
+        if m:
+            encoded = base64.b64encode(m.group(1).encode()).decode()
+            return f"Basic {encoded}"
+        return None
+class _CookieExtractor:
+    # -b 'name=value; ...'  or  --cookie '...'
+    _RE = re.compile(r"""(?:-b|--cookie)\s+['"]([^'"]+)['"]""")
+    @classmethod
+    def extract(cls, command: str) -> Optional[str]:
+        m = cls._RE.search(command)
+        return m.group(1) if m else None
+class _BodyExtractor:
+    """
+    Handles:
+      --data / -d
+      --data-raw
+      --data-binary
+      --data-urlencode
+    """
+    _FLAG_RE = re.compile(
+        r"""(?:--data(?:-raw|-binary|-urlencode)?|-d)\s+(['"])(.*?)\1""",
+        re.DOTALL,
+    )
+    # Fallback: no surrounding quotes
+    _NO_QUOTE_RE = re.compile(r"""(?:--data(?:-raw|-binary|-urlencode)?|-d)\s+(\S+)""")
+    @classmethod
+    def extract(cls, command: str) -> Optional[str]:
+        m = cls._FLAG_RE.search(command)
+        if m:
+            raw = m.group(2)
+            # Normalise internal whitespace / escaped newlines
+            raw = re.sub(r"\s+", " ", raw).strip()
+            return raw
+        m = cls._NO_QUOTE_RE.search(command)
+        if m:
+            return m.group(1).strip("'\"")
+        return None
+# ---------------------------------------------------------------------------
+# Main parser
+# ---------------------------------------------------------------------------
+class CurlParser:
+    """
+    Parse one or more curl commands (as a raw string or list of lines).
+    Usage
+    -----
+    parser = CurlParser()
+    # Single command
+    req = parser.parse("curl -X POST https://api.example.com/v1/data \\
+        -H 'Content-Type: application/json' \\
+        --data-raw '{\"key\": \"value\"}'")
+    # Multiple commands
+    requests = parser.parse_multiple(raw_text)
+    """
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def parse(self, raw: str, url_filter: Optional[str] = None) -> CurlRequest:
+        """Parse a single curl command string into a CurlRequest."""
+        command = self._normalize(raw)
+        if url_filter and url_filter not in command:
+            return CurlRequest()
+        return self._build_request(command)
+    def parse_multiple(
+        self, raw: str, url_filter: Optional[str] = None
+    ) -> list[CurlRequest]:
+        """Parse a text block containing one or more curl commands."""
+        lines = _LineNormalizer.join_continuations(raw)
+        blocks = _LineNormalizer.split_into_curl_blocks(lines)
+        results = []
+        for block in blocks:
+            req = self.parse(block, url_filter=url_filter)
+            if req.original_url:  # skip empty / filtered-out blocks
+                results.append(req)
+        return results
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _normalize(raw: str) -> str:
+        """Collapse line continuations into a single command string."""
+        return re.sub(r"\\\s*\n", " ", raw).strip()
+    def _build_request(self, command: str) -> CurlRequest:
+        req = CurlRequest()
+        # --- URL ---
+        req.original_url = _UrlExtractor.extract(command)
+        # --- Method ---
+        req.method = _MethodExtractor.extract(command) or self._infer_method(command)
+        # --- Headers ---
+        req.headers = _HeaderExtractor.extract_all(command)
+        # --- Basic auth → Authorization header ---
+        auth_val = _BasicAuthExtractor.extract(command)
+        if auth_val:
+            req.headers.setdefault("Authorization", auth_val)
+        # --- Cookie → Cookie header ---
+        cookie_val = _CookieExtractor.extract(command)
+        if cookie_val:
+            req.headers.setdefault("Cookie", cookie_val)
+        # --- Body ---
+        req.body = _BodyExtractor.extract(command)
+        if req.body and req.method == "get":
+            req.method = "post"
+        # --- URL decomposition ---
+        req.populate_url_parts()
+        return req
+    @staticmethod
+    def _infer_method(command: str) -> str:
+        """Fall back to GET unless a body flag is present."""
+        has_body_flag = bool(
+            re.search(r"(?:--data(?:-raw|-binary|-urlencode)?|-d)\s", command)
+        )
+        return "post" if has_body_flag else "get"

http_content_parser-0.0.29/tests/test_api_parser.py ADDED Viewed

@@ -0,0 +1,34 @@
+import json
+import os
+from http_content_parser.api_parser import ApiModelParser
+from http_content_parser.curl_parser import CurlParser
+api_parser = ApiModelParser()
+curl_file = os.path.dirname(os.path.abspath(__file__)) + "/tmp"
+def test_curl_parser():
+    api_info = api_parser.get_api_list_for_curl(curl_file=curl_file)
+    print(json.dumps(api_info, indent=4))
+def test_curl_parser_with_filter():
+    sample = r"""
+curl -X POST 'https://api.example.com/v1/items?page=1&size=10' \
+  -H 'Content-Type: application/json' \
+  -H 'Authorization: Bearer TOKEN123' \
+  --data-raw '{"name": "hello", "value": 42}'
+curl 'https://api.example.com/v1/health' \
+  -H 'Accept: application/json'
+"""
+    parser = CurlParser()
+    requests = parser.parse_multiple(sample)
+    for i, req in enumerate(requests, 1):
+        print(f"\n{'='*60}")
+        print(f"Request #{i}")
+        print(json.dumps(req.to_dict(), indent=2, ensure_ascii=False))

http_content_parser-0.0.27/tests/test_api_parser.py DELETED Viewed

@@ -1,14 +0,0 @@
-import json
-import os
-from http_content_parser.api_parser import ApiModelParser
-api_parser = ApiModelParser()
-curl_file = os.path.dirname(os.path.abspath(__file__)) + "/tmp"
-def test_curl_parser():
-    api_info = api_parser.get_api_list_for_curl(curl_file=curl_file)
-    print(json.dumps(api_info, indent=4))