http-content-parser 0.0.27__tar.gz → 0.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/PKG-INFO +1 -1
  2. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/pyproject.toml +1 -1
  3. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/src/http_content_parser/api_parser.py +49 -20
  4. http_content_parser-0.0.29/src/http_content_parser/curl_parser.py +327 -0
  5. http_content_parser-0.0.29/tests/test_api_parser.py +34 -0
  6. http_content_parser-0.0.27/tests/test_api_parser.py +0 -14
  7. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/.gitignore +0 -0
  8. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/LICENSE +0 -0
  9. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/README.md +0 -0
  10. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/requirements.txt +0 -0
  11. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/src/http_content_parser/__init__.py +0 -0
  12. /http_content_parser-0.0.27/src/http_content_parser/curl_parser.py → /http_content_parser-0.0.29/src/http_content_parser/curl_parser_old.py +0 -0
  13. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/src/http_content_parser/openapi_parser.py +0 -0
  14. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/src/http_content_parser/param_util.py +0 -0
  15. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/src/http_content_parser/postman_parser.py +0 -0
  16. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/src/http_content_parser/swagger2_parser.py +0 -0
  17. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/tests/test_fix.py +0 -0
  18. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/tests/test_param_util.py +0 -0
  19. {http_content_parser-0.0.27 → http_content_parser-0.0.29}/tests/test_postman.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: http_content_parser
3
- Version: 0.0.27
3
+ Version: 0.0.29
4
4
  Summary: parse http's payload and response
5
5
  Author-email: leo <suleiabc@gmail.com>
6
6
  License-File: LICENSE
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "http_content_parser"
7
- version = "0.0.27"
7
+ version = "0.0.29"
8
8
  authors = [{ name = "leo", email = "suleiabc@gmail.com" }]
9
9
  description = "parse http's payload and response"
10
10
  readme = "README.md"
@@ -37,36 +37,65 @@ class ApiModelParser:
37
37
  new_payload_list = self.__handle_duplicate_api_label_for_dict(payload_list)
38
38
  return new_payload_list
39
39
 
40
- def convert_curl_to_list(
41
- self, curl_file_path: str, url_filter=None
42
- ) -> list[dict]:
40
+ def convert_curl_to_list(self, curl_file_path: str, url_filter=None) -> list[dict]:
41
+ with open(curl_file_path, "rt") as f:
42
+ raw_text = f.read()
43
+
43
44
  curl_parser = CurlParser()
45
+ requests = curl_parser.parse_multiple(raw_text, url_filter=url_filter)
46
+
44
47
  payload_list = []
45
- with open(curl_file_path, "rt") as f:
46
- lines = f.readlines()
47
- line_num_array = curl_parser.get_curl_line_num_scope(lines=lines)
48
- for s, e in line_num_array:
49
- res = curl_parser.split_curl_to_struct(lines, s, e, url_filter)
50
- url_content = curl_parser.parse_url(res.get("original_url", ""))
51
- path_str = url_content.get("path", "")[1:]
52
- method = (res.get("method", "") or "").lower()
53
- label = self.__replace_api_label_chars(path_str) + "_" + method
54
- req_data = {
48
+ for req in requests:
49
+ path_str = req.path.lstrip("/")
50
+ method = req.method.lower()
51
+ label = self.__replace_api_label_chars(path_str) + "_" + method
52
+
53
+ payload_list.append(
54
+ {
55
55
  "path": path_str,
56
- "header": json.dumps(res.get("header", {})),
57
- "body": res.get("body", {}),
56
+ "header": json.dumps(req.headers),
57
+ "body": req.body or {},
58
58
  "query_param": (
59
- json.dumps(url_content.get("query_params", {}))
60
- if url_content.get("query_params")
61
- else {}
59
+ json.dumps(req.query_params) if req.query_params else {}
62
60
  ),
63
- "original_url": res.get("original_url", ""),
61
+ "original_url": req.original_url,
64
62
  "method": method,
65
63
  "temp_api_label": label,
66
64
  }
67
- payload_list.append(req_data)
65
+ )
66
+
68
67
  return payload_list
69
68
 
69
+ # def convert_curl_to_list_old(
70
+ # self, curl_file_path: str, url_filter=None
71
+ # ) -> list[dict]:
72
+ # curl_parser = CurlParser()
73
+ # payload_list = []
74
+ # with open(curl_file_path, "rt") as f:
75
+ # lines = f.readlines()
76
+ # line_num_array = curl_parser.get_curl_line_num_scope(lines=lines)
77
+ # for s, e in line_num_array:
78
+ # res = curl_parser.split_curl_to_struct(lines, s, e, url_filter)
79
+ # url_content = curl_parser.parse_url(res.get("original_url", ""))
80
+ # path_str = url_content.get("path", "")[1:]
81
+ # method = (res.get("method", "") or "").lower()
82
+ # label = self.__replace_api_label_chars(path_str) + "_" + method
83
+ # req_data = {
84
+ # "path": path_str,
85
+ # "header": json.dumps(res.get("header", {})),
86
+ # "body": res.get("body", {}),
87
+ # "query_param": (
88
+ # json.dumps(url_content.get("query_params", {}))
89
+ # if url_content.get("query_params")
90
+ # else {}
91
+ # ),
92
+ # "original_url": res.get("original_url", ""),
93
+ # "method": method,
94
+ # "temp_api_label": label,
95
+ # }
96
+ # payload_list.append(req_data)
97
+ # return payload_list
98
+
70
99
  def convert_postman_to_list(self, postman_dict: dict) -> list[dict]:
71
100
  api_infos = parse_postman(postman_dict) or []
72
101
  payload_list = []
@@ -0,0 +1,327 @@
1
+ # -*- coding: UTF-8 -*-
2
+ """
3
+ curl_parser.py
4
+ ==============
5
+ A structured parser for curl commands.
6
+
7
+ Supports:
8
+ - Single and multi-line curl commands
9
+ - -X / --request METHOD
10
+ - -H / --header 'Key: Value'
11
+ - --data / --data-raw / --data-binary / --data-urlencode body
12
+ - -u / --user 'user:pass' → Basic-Auth header
13
+ - -b / --cookie 'key=val'
14
+ - --compressed, -L, etc. (silently ignored)
15
+ - URL query-string decomposition
16
+ - Multiple curl commands in one text block
17
+ """
18
+
19
+ from collections import defaultdict
20
+ import re
21
+ from urllib.parse import parse_qsl, urlparse
22
+ from dataclasses import dataclass, field
23
+ from typing import Optional
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Data model
28
+ # ---------------------------------------------------------------------------
29
+
30
+
31
+ @dataclass
32
+ class CurlRequest:
33
+ method: str = "get"
34
+ original_url: str = ""
35
+ headers: dict = field(default_factory=dict)
36
+ body: Optional[str] = None
37
+ # Parsed URL components (populated lazily)
38
+ scheme: str = ""
39
+ netloc: str = ""
40
+ path: str = ""
41
+ query: str = ""
42
+ query_params: dict = field(default_factory=dict)
43
+
44
+ def populate_url_parts(self) -> None:
45
+ """Decompose original_url into scheme / netloc / path / query."""
46
+ url = self.original_url
47
+ if url and "://" not in url:
48
+ url = "http://" + url
49
+ parsed = urlparse(url)
50
+ self.scheme = parsed.scheme
51
+ self.netloc = parsed.netloc
52
+ self.path = parsed.path
53
+ self.query = parsed.query
54
+ params = defaultdict(list)
55
+ for k, v in parse_qsl(parsed.query):
56
+ params[k].append(v)
57
+ # 再按需决定单值展开还是保持列表
58
+ self.query_params = {k: v[0] if len(v) == 1 else v for k, v in params.items()}
59
+
60
+ def to_dict(self) -> dict:
61
+ return {
62
+ "method": self.method,
63
+ "original_url": self.original_url,
64
+ "scheme": self.scheme,
65
+ "netloc": self.netloc,
66
+ "path": self.path,
67
+ "query": self.query,
68
+ "query_params": self.query_params,
69
+ "headers": self.headers,
70
+ "body": self.body,
71
+ }
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Token / line helpers
76
+ # ---------------------------------------------------------------------------
77
+
78
+
79
+ class _LineNormalizer:
80
+ """Collapse multi-line curl commands into logical single lines."""
81
+
82
+ @staticmethod
83
+ def join_continuations(raw_text: str) -> list[str]:
84
+ """
85
+ Replace line-continuation backslashes so each logical curl command
86
+ becomes one long string, then split on actual line breaks.
87
+ """
88
+ # A backslash immediately before a newline means "continue this command"
89
+ joined = re.sub(r"\\\s*\n", " ", raw_text)
90
+ return [line.strip() for line in joined.splitlines() if line.strip()]
91
+
92
+ @staticmethod
93
+ def split_into_curl_blocks(lines: list[str]) -> list[str]:
94
+ """
95
+ Group lines that belong to the same curl invocation.
96
+ Each block starts with a line containing 'curl'.
97
+ """
98
+ blocks: list[list[str]] = []
99
+ current: list[str] = []
100
+
101
+ for line in lines:
102
+ if re.match(r"^\s*curl\b", line, re.IGNORECASE):
103
+ if current:
104
+ blocks.append(" ".join(current))
105
+ current = [line]
106
+ else:
107
+ current.append(line)
108
+
109
+ if current:
110
+ blocks.append(" ".join(current))
111
+
112
+ return blocks
113
+
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Field extractors (one per curl concept)
117
+ # ---------------------------------------------------------------------------
118
+
119
+
120
+ class _UrlExtractor:
121
+ # Match http(s):// URLs – capture everything up to whitespace or quote
122
+ _HTTP_RE = re.compile(r"""(https?://[^\s'"\\]+)""")
123
+ # Bare-path fallback (Postman style): starts with /
124
+ _PATH_RE = re.compile(r"""(?<!\w)(/[^\s'"\\]+)""")
125
+
126
+ @classmethod
127
+ def extract(cls, command: str) -> str:
128
+ m = cls._HTTP_RE.search(command)
129
+ if m:
130
+ return m.group(1).rstrip("'\"")
131
+ m = cls._PATH_RE.search(command)
132
+ if m:
133
+ return m.group(1).rstrip("'\"")
134
+ return ""
135
+
136
+
137
+ class _MethodExtractor:
138
+ # Explicit: -X POST or --request DELETE
139
+ _EXPLICIT_RE = re.compile(
140
+ r"(?:-X|--request)\s+['\"]?([A-Za-z]+)['\"]?", re.IGNORECASE
141
+ )
142
+ # Inline on curl line: curl -X POST or curl POST http://...
143
+ _INLINE_RE = re.compile(
144
+ r"\bcurl\b.*?\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b",
145
+ re.IGNORECASE,
146
+ )
147
+
148
+ @classmethod
149
+ def extract(cls, command: str) -> Optional[str]:
150
+ m = cls._EXPLICIT_RE.search(command)
151
+ if m:
152
+ return m.group(1).lower()
153
+ m = cls._INLINE_RE.search(command)
154
+ if m:
155
+ return m.group(1).lower()
156
+ return None
157
+
158
+
159
+ class _HeaderExtractor:
160
+ # -H 'Key: Value' or --header "Key: Value"
161
+ _RE = re.compile(r"""(?:-H|--header)\s+['"](.+?)['"]""")
162
+
163
+ @classmethod
164
+ def extract_all(cls, command: str) -> dict[str, str]:
165
+ headers: dict[str, str] = {}
166
+ for m in cls._RE.finditer(command):
167
+ raw = m.group(1)
168
+ # Split on the first colon only
169
+ parts = raw.split(":", 1)
170
+ key = parts[0].strip()
171
+ val = parts[1].strip() if len(parts) > 1 else ""
172
+ headers[key] = val
173
+ return headers
174
+
175
+
176
+ class _BasicAuthExtractor:
177
+ # -u user:pass or --user 'user:pass'
178
+ _RE = re.compile(r"""(?:-u|--user)\s+['"]?([^\s'"]+)['"]?""")
179
+
180
+ @classmethod
181
+ def extract(cls, command: str) -> Optional[str]:
182
+ """Return a Basic-Auth header value string, or None."""
183
+ import base64
184
+
185
+ m = cls._RE.search(command)
186
+ if m:
187
+ encoded = base64.b64encode(m.group(1).encode()).decode()
188
+ return f"Basic {encoded}"
189
+ return None
190
+
191
+
192
+ class _CookieExtractor:
193
+ # -b 'name=value; ...' or --cookie '...'
194
+ _RE = re.compile(r"""(?:-b|--cookie)\s+['"]([^'"]+)['"]""")
195
+
196
+ @classmethod
197
+ def extract(cls, command: str) -> Optional[str]:
198
+ m = cls._RE.search(command)
199
+ return m.group(1) if m else None
200
+
201
+
202
+ class _BodyExtractor:
203
+ """
204
+ Handles:
205
+ --data / -d
206
+ --data-raw
207
+ --data-binary
208
+ --data-urlencode
209
+ """
210
+
211
+ _FLAG_RE = re.compile(
212
+ r"""(?:--data(?:-raw|-binary|-urlencode)?|-d)\s+(['"])(.*?)\1""",
213
+ re.DOTALL,
214
+ )
215
+ # Fallback: no surrounding quotes
216
+ _NO_QUOTE_RE = re.compile(r"""(?:--data(?:-raw|-binary|-urlencode)?|-d)\s+(\S+)""")
217
+
218
+ @classmethod
219
+ def extract(cls, command: str) -> Optional[str]:
220
+ m = cls._FLAG_RE.search(command)
221
+ if m:
222
+ raw = m.group(2)
223
+ # Normalise internal whitespace / escaped newlines
224
+ raw = re.sub(r"\s+", " ", raw).strip()
225
+ return raw
226
+
227
+ m = cls._NO_QUOTE_RE.search(command)
228
+ if m:
229
+ return m.group(1).strip("'\"")
230
+
231
+ return None
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # Main parser
236
+ # ---------------------------------------------------------------------------
237
+
238
+
239
+ class CurlParser:
240
+ """
241
+ Parse one or more curl commands (as a raw string or list of lines).
242
+
243
+ Usage
244
+ -----
245
+ parser = CurlParser()
246
+
247
+ # Single command
248
+ req = parser.parse("curl -X POST https://api.example.com/v1/data \\
249
+ -H 'Content-Type: application/json' \\
250
+ --data-raw '{\"key\": \"value\"}'")
251
+
252
+ # Multiple commands
253
+ requests = parser.parse_multiple(raw_text)
254
+ """
255
+
256
+ # ------------------------------------------------------------------
257
+ # Public API
258
+ # ------------------------------------------------------------------
259
+
260
+ def parse(self, raw: str, url_filter: Optional[str] = None) -> CurlRequest:
261
+ """Parse a single curl command string into a CurlRequest."""
262
+ command = self._normalize(raw)
263
+ if url_filter and url_filter not in command:
264
+ return CurlRequest()
265
+ return self._build_request(command)
266
+
267
+ def parse_multiple(
268
+ self, raw: str, url_filter: Optional[str] = None
269
+ ) -> list[CurlRequest]:
270
+ """Parse a text block containing one or more curl commands."""
271
+ lines = _LineNormalizer.join_continuations(raw)
272
+ blocks = _LineNormalizer.split_into_curl_blocks(lines)
273
+ results = []
274
+ for block in blocks:
275
+ req = self.parse(block, url_filter=url_filter)
276
+ if req.original_url: # skip empty / filtered-out blocks
277
+ results.append(req)
278
+ return results
279
+
280
+ # ------------------------------------------------------------------
281
+ # Internal helpers
282
+ # ------------------------------------------------------------------
283
+
284
+ @staticmethod
285
+ def _normalize(raw: str) -> str:
286
+ """Collapse line continuations into a single command string."""
287
+ return re.sub(r"\\\s*\n", " ", raw).strip()
288
+
289
+ def _build_request(self, command: str) -> CurlRequest:
290
+ req = CurlRequest()
291
+
292
+ # --- URL ---
293
+ req.original_url = _UrlExtractor.extract(command)
294
+
295
+ # --- Method ---
296
+ req.method = _MethodExtractor.extract(command) or self._infer_method(command)
297
+
298
+ # --- Headers ---
299
+ req.headers = _HeaderExtractor.extract_all(command)
300
+
301
+ # --- Basic auth → Authorization header ---
302
+ auth_val = _BasicAuthExtractor.extract(command)
303
+ if auth_val:
304
+ req.headers.setdefault("Authorization", auth_val)
305
+
306
+ # --- Cookie → Cookie header ---
307
+ cookie_val = _CookieExtractor.extract(command)
308
+ if cookie_val:
309
+ req.headers.setdefault("Cookie", cookie_val)
310
+
311
+ # --- Body ---
312
+ req.body = _BodyExtractor.extract(command)
313
+ if req.body and req.method == "get":
314
+ req.method = "post"
315
+
316
+ # --- URL decomposition ---
317
+ req.populate_url_parts()
318
+
319
+ return req
320
+
321
+ @staticmethod
322
+ def _infer_method(command: str) -> str:
323
+ """Fall back to GET unless a body flag is present."""
324
+ has_body_flag = bool(
325
+ re.search(r"(?:--data(?:-raw|-binary|-urlencode)?|-d)\s", command)
326
+ )
327
+ return "post" if has_body_flag else "get"
@@ -0,0 +1,34 @@
1
+ import json
2
+ import os
3
+ from http_content_parser.api_parser import ApiModelParser
4
+ from http_content_parser.curl_parser import CurlParser
5
+
6
+
7
+ api_parser = ApiModelParser()
8
+
9
+ curl_file = os.path.dirname(os.path.abspath(__file__)) + "/tmp"
10
+
11
+
12
+ def test_curl_parser():
13
+ api_info = api_parser.get_api_list_for_curl(curl_file=curl_file)
14
+ print(json.dumps(api_info, indent=4))
15
+
16
+
17
+ def test_curl_parser_with_filter():
18
+ sample = r"""
19
+ curl -X POST 'https://api.example.com/v1/items?page=1&size=10' \
20
+ -H 'Content-Type: application/json' \
21
+ -H 'Authorization: Bearer TOKEN123' \
22
+ --data-raw '{"name": "hello", "value": 42}'
23
+
24
+ curl 'https://api.example.com/v1/health' \
25
+ -H 'Accept: application/json'
26
+ """
27
+
28
+ parser = CurlParser()
29
+ requests = parser.parse_multiple(sample)
30
+
31
+ for i, req in enumerate(requests, 1):
32
+ print(f"\n{'='*60}")
33
+ print(f"Request #{i}")
34
+ print(json.dumps(req.to_dict(), indent=2, ensure_ascii=False))
@@ -1,14 +0,0 @@
1
- import json
2
- import os
3
- from http_content_parser.api_parser import ApiModelParser
4
-
5
-
6
- api_parser = ApiModelParser()
7
-
8
- curl_file = os.path.dirname(os.path.abspath(__file__)) + "/tmp"
9
-
10
-
11
- def test_curl_parser():
12
- api_info = api_parser.get_api_list_for_curl(curl_file=curl_file)
13
- print(json.dumps(api_info, indent=4))
14
-