http-content-parser 0.0.25__tar.gz → 0.0.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/PKG-INFO +1 -1
  2. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/pyproject.toml +1 -1
  3. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/src/http_content_parser/api_parser.py +2 -140
  4. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/src/http_content_parser/curl_parser.py +17 -7
  5. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/src/http_content_parser/param_util.py +4 -25
  6. http_content_parser-0.0.27/tests/test_fix.py +51 -0
  7. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/tests/test_param_util.py +16 -1
  8. http_content_parser-0.0.25/src/http_content_parser/req_data.py +0 -116
  9. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/.gitignore +0 -0
  10. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/LICENSE +0 -0
  11. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/README.md +0 -0
  12. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/requirements.txt +0 -0
  13. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/src/http_content_parser/__init__.py +0 -0
  14. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/src/http_content_parser/openapi_parser.py +0 -0
  15. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/src/http_content_parser/postman_parser.py +0 -0
  16. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/src/http_content_parser/swagger2_parser.py +0 -0
  17. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/tests/test_api_parser.py +0 -0
  18. {http_content_parser-0.0.25 → http_content_parser-0.0.27}/tests/test_postman.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: http_content_parser
3
- Version: 0.0.25
3
+ Version: 0.0.27
4
4
  Summary: parse http's payload and response
5
5
  Author-email: leo <suleiabc@gmail.com>
6
6
  License-File: LICENSE
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "http_content_parser"
7
- version = "0.0.25"
7
+ version = "0.0.27"
8
8
  authors = [{ name = "leo", email = "suleiabc@gmail.com" }]
9
9
  description = "parse http's payload and response"
10
10
  readme = "README.md"
@@ -5,66 +5,39 @@ import re
5
5
  from http_content_parser.curl_parser import CurlParser
6
6
  from http_content_parser.openapi_parser import OpenApiParser
7
7
  from http_content_parser.postman_parser import parse_postman
8
- from http_content_parser.req_data import ReqData
9
8
  from http_content_parser.swagger2_parser import Swagger2Parser
10
9
 
11
10
 
12
11
  class ApiModelParser:
13
- def get_api_model_for_curl(self, curl_file, curl_filter=None) -> list[ReqData]:
14
- # convert curl
15
- payload_list = self.convert_curl_data_to_model(
16
- curl_file_path=curl_file, url_filter=curl_filter
17
- )
18
- # handle duplicate key
19
- new_payload_list = self.__handle_duplicate_api_label(payload_list)
20
- return new_payload_list
21
12
 
22
13
  def get_api_list_for_curl(self, curl_file, curl_filter=None) -> list[dict]:
23
14
  # convert curl
24
- payload_list = self.convert_curl_data_to_list(
15
+ payload_list = self.convert_curl_to_list(
25
16
  curl_file_path=curl_file, url_filter=curl_filter
26
17
  )
27
18
  # handle duplicate key
28
19
  new_payload_list = self.__handle_duplicate_api_label_for_dict(payload_list)
29
20
  return new_payload_list
30
21
 
31
- def get_api_model_for_postman(self, json_dict: dict) -> list[ReqData]:
32
- payload_list = self.convert_postman_to_model(postman_dict=json_dict)
33
- # handle duplicate key
34
- new_payload_list = self.__handle_duplicate_api_label(payload_list)
35
- return new_payload_list
36
-
37
22
  def get_api_list_for_postman(self, json_dict: dict) -> list[dict]:
38
23
  payload_list = self.convert_postman_to_list(postman_dict=json_dict)
39
24
  # handle duplicate key
40
25
  new_payload_list = self.__handle_duplicate_api_label_for_dict(payload_list)
41
26
  return new_payload_list
42
27
 
43
- def get_api_model_for_swagger(self, json_dict: dict) -> list[ReqData]:
44
- payload_list = self.convert_swagger_to_model(swagger2_dict=json_dict)
45
- # handle duplicate key
46
- new_payload_list = self.__handle_duplicate_api_label(payload_list)
47
- return new_payload_list
48
-
49
28
  def get_api_list_for_swagger(self, json_dict: dict) -> list[dict]:
50
29
  payload_list = self.convert_swagger_to_list(swagger2_dict=json_dict)
51
30
  # handle duplicate key
52
31
  new_payload_list = self.__handle_duplicate_api_label_for_dict(payload_list)
53
32
  return new_payload_list
54
33
 
55
- def get_api_model_for_openapi(self, json_dict: dict) -> list[ReqData]:
56
- payload_list = self.convert_openapi_to_model(openapi_dict=json_dict)
57
- # handle duplicate key
58
- new_payload_list = self.__handle_duplicate_api_label(payload_list)
59
- return new_payload_list
60
-
61
34
  def get_api_list_for_openapi(self, json_dict: dict) -> list[dict]:
62
35
  payload_list = self.convert_openapi_to_list(openapi_dict=json_dict)
63
36
  # handle duplicate key
64
37
  new_payload_list = self.__handle_duplicate_api_label_for_dict(payload_list)
65
38
  return new_payload_list
66
39
 
67
- def convert_curl_data_to_list(
40
+ def convert_curl_to_list(
68
41
  self, curl_file_path: str, url_filter=None
69
42
  ) -> list[dict]:
70
43
  curl_parser = CurlParser()
@@ -94,34 +67,6 @@ class ApiModelParser:
94
67
  payload_list.append(req_data)
95
68
  return payload_list
96
69
 
97
- def convert_curl_data_to_model(
98
- self, curl_file_path: str, url_filter=None
99
- ) -> list[ReqData]:
100
- curl_parser = CurlParser()
101
- payload_list = []
102
- with open(curl_file_path, "rt") as f:
103
- lines = f.readlines()
104
- line_num_array = curl_parser.get_curl_line_num_scope(lines=lines)
105
- for r in line_num_array:
106
- res_dict = curl_parser.split_curl_to_struct(
107
- lines, r[0], r[1], url_filter
108
- )
109
- req_model = ReqData(dd=res_dict)
110
- url_content = curl_parser.parse_url(req_model.original_url)
111
- req_model.temp_api_label = (
112
- self.__replace_api_label_chars(url_content["path"][1:])
113
- + "_"
114
- + req_model.method
115
- )
116
- req_model.header = json.dumps(req_model.header)
117
- if url_content["query_params"]:
118
- req_model.query_param = json.dumps(url_content["query_params"])
119
- else:
120
- req_model.query_param = {}
121
- req_model.path = url_content["path"][1:]
122
- payload_list.append(req_model)
123
- return payload_list
124
-
125
70
  def convert_postman_to_list(self, postman_dict: dict) -> list[dict]:
126
71
  api_infos = parse_postman(postman_dict) or []
127
72
  payload_list = []
@@ -147,52 +92,6 @@ class ApiModelParser:
147
92
  payload_list.append(req_data)
148
93
  return payload_list
149
94
 
150
- def convert_postman_to_model(self, postman_dict: dict) -> list[ReqData]:
151
- api_infos = parse_postman(postman_dict)
152
- payload_list = []
153
- for api_info in api_infos:
154
- req_data = ReqData()
155
- req_data.path = api_info["path"]
156
- req_data.header = json.dumps(api_info["header"])
157
- req_data.body = api_info["body"]
158
- req_data.query_param = json.dumps(api_info["query_param"])
159
- req_data.original_url = api_info["url"]
160
- req_data.method = api_info["method"].lower()
161
- req_data.temp_api_label = (
162
- self.__handle_http_path(api_info["path"].split("/"), "_")
163
- .replace("{", "")
164
- .replace("}", "")
165
- + "_"
166
- + api_info["method"].lower()
167
- )
168
- payload_list.append(req_data)
169
- return payload_list
170
-
171
- def convert_swagger_to_model(self, swagger2_dict: dict) -> list[ReqData]:
172
- swagger_parser = Swagger2Parser(swagger2_dict)
173
- api_dict = swagger_parser.get_swagger_api_info()
174
- if not api_dict:
175
- print("check your swagger json")
176
- return []
177
- payload_list = []
178
- for path, path_info in api_dict.items():
179
- req_data = ReqData()
180
- req_data.path = self.__handle_http_path(path.split("/")[:-1], "/")
181
- req_data.temp_api_label = (
182
- self.__handle_http_path(path.split("/"), "_")
183
- .replace("{", "")
184
- .replace("}", "")
185
- )
186
- req_data.method = path.split("/")[-1]
187
- req_data.query_param = json.dumps(path_info["query_param"])
188
- req_data.path_param = json.dumps(path_info["path_param"])
189
- req_data.response = json.dumps(path_info.get("response", {}))
190
- # swagger中body第一层和第二层key重复,只取第二层后的数据
191
- for _, v in path_info["body_param"].items():
192
- req_data.body = json.dumps(v)
193
- payload_list.append(req_data)
194
- return payload_list
195
-
196
95
  def convert_swagger_to_list(self, swagger2_dict: dict) -> list[dict]:
197
96
  swagger_parser = Swagger2Parser(swagger2_dict)
198
97
  api_dict = swagger_parser.get_swagger_api_info()
@@ -219,29 +118,6 @@ class ApiModelParser:
219
118
  payload_list.append(req_data)
220
119
  return payload_list
221
120
 
222
- def convert_openapi_to_model(self, openapi_dict: dict) -> list[ReqData]:
223
- if not openapi_dict:
224
- return []
225
- payload_list = []
226
- parser = OpenApiParser(openapi_dict)
227
- api_dict = parser.get_open_api_info()
228
- if not api_dict:
229
- return []
230
- for path, path_info in api_dict.items():
231
- parts = path.split("/")
232
- req = ReqData()
233
- req.path = self.__handle_http_path(parts[:-1], "/")
234
- req.temp_api_label = (
235
- self.__handle_http_path(parts, "_").replace("{", "").replace("}", "")
236
- )
237
- req.method = parts[-1]
238
- req.query_param = json.dumps(path_info.get("query_param", {}))
239
- req.path_param = json.dumps(path_info.get("path_param", {}))
240
- req.response = json.dumps(path_info.get("response", {}))
241
- req.body = json.dumps(path_info.get("body_param", {}))
242
- payload_list.append(req)
243
- return payload_list
244
-
245
121
  def convert_openapi_to_list(self, openapi_dict: dict) -> list[dict]:
246
122
  if not openapi_dict:
247
123
  return []
@@ -284,20 +160,6 @@ class ApiModelParser:
284
160
  new_string = re.sub(pattern, replacement, string)
285
161
  return new_string
286
162
 
287
- def __handle_duplicate_api_label(
288
- self, payload_list: list[ReqData]
289
- ) -> list[ReqData]:
290
- key_filter = {}
291
- new_payload_list = copy.deepcopy(payload_list)
292
- for payload, p_copy in zip(payload_list, new_payload_list):
293
- k = payload.temp_api_label
294
- if k in key_filter.keys():
295
- p_copy.temp_api_label = k + "_" + str(key_filter[k])
296
- key_filter[k] += 1
297
- else:
298
- key_filter[k] = 2
299
- return new_payload_list
300
-
301
163
  def __handle_duplicate_api_label_for_dict(
302
164
  self, payload_list: list[dict]
303
165
  ) -> list[dict]:
@@ -176,13 +176,23 @@ class CurlParser(object):
176
176
  # Body 解析
177
177
  # ---------------------------
178
178
  elif "--data" in line or "--data-raw" in line:
179
- # 去掉行尾的转义符
180
- clean = line.replace("\\\n", "").strip()
181
-
182
- # 提取单引号内容
183
- m = re.search(r"'(.*)'$", clean)
184
- if m:
185
- req_data["body"] = m.group(1)
179
+ # 去掉转义符和多余的空白
180
+ clean = line.replace("\\\n", "").replace("\\", "").strip()
181
+
182
+ # 提取单引号内容 - 更健壮的方式
183
+ # 找到第一个单引号的位置
184
+ first_quote = clean.find("'")
185
+ if first_quote != -1:
186
+ # 找到最后一个单引号的位置
187
+ last_quote = clean.rfind("'")
188
+ if last_quote > first_quote:
189
+ # 提取中间的内容,并清理所有换行和多余空白
190
+ body_content = clean[first_quote + 1 : last_quote]
191
+ # 移除所有换行符并压缩空白
192
+ body_content = re.sub(r"\s+", " ", body_content).strip()
193
+ req_data["body"] = body_content
194
+ else:
195
+ req_data["body"] = ""
186
196
  else:
187
197
  # 兼容无引号情况
188
198
  req_data["body"] = clean.split("--data")[-1].strip()
@@ -5,29 +5,6 @@ class ParamUtil(object):
5
5
  def __init__(self) -> None:
6
6
  self._param_list = []
7
7
 
8
- @staticmethod
9
- def merge_api_params(swagger_dict: dict, api_dict: dict) -> dict:
10
- result = {}
11
- swagger_fail_dict = []
12
- for k, v in swagger_dict.items():
13
- if v["path_param"] or v["query_param"]:
14
- if api_dict.get(k):
15
- temp_v = v
16
- temp_v["body_type"] = v["body"]
17
- temp_v["body"] = api_dict[k]["body"]
18
- temp_v["original_url"] = api_dict[k]["original_url"]
19
- temp_v["query_param"] = api_dict[k]["query_param"]
20
- result[k] = temp_v
21
- else:
22
- swagger_fail_dict.append(k.replace("_", "/"))
23
- else:
24
- if api_dict.get(k):
25
- result[k] = api_dict[k]
26
- else:
27
- swagger_fail_dict.append(k.replace("_", "/"))
28
- # result[k] = v
29
- return result, swagger_fail_dict
30
-
31
8
  @staticmethod
32
9
  def split_swagger_param_and_type(param, nontype=False):
33
10
  sp = SwaggerParam()
@@ -97,7 +74,8 @@ class ParamUtil(object):
97
74
  if isinstance(value, dict):
98
75
  return "{}"
99
76
  if isinstance(value, list):
100
- if len(value) == 1 and not isinstance(value[0], dict):
77
+ # 如果list有多个元素,且不是dict,那么直接返回原始值
78
+ if len(value) >= 1 and not isinstance(value[0], dict):
101
79
  return value
102
80
  else:
103
81
  return "[]"
@@ -170,7 +148,8 @@ class SwaggerParam(object):
170
148
  if isinstance(value, dict):
171
149
  return "{}"
172
150
  if isinstance(value, list):
173
- if len(value) == 1 and not isinstance(value[0], dict):
151
+ # 如果list有多个元素,且不是dict,那么直接返回原始值
152
+ if len(value) >= 1 and not isinstance(value[0], dict):
174
153
  return value
175
154
  else:
176
155
  return "[]"
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: UTF-8 -*-
3
+
4
+ from src.http_content_parser.curl_parser import CurlParser
5
+
6
+
7
+ def test_multiline_json_body():
8
+ # 测试用户提供的多行 JSON 数据
9
+ test_file = '/Users/bytedance/Documents/vs_workspace/myself/HttpParser/tests/tmp'
10
+
11
+ with open(test_file, 'r') as f:
12
+ lines = f.readlines()
13
+
14
+ parser = CurlParser()
15
+ scopes = parser.get_curl_line_num_scope(lines)
16
+
17
+ print(f"Found {len(scopes)} curl command(s)")
18
+
19
+ for s, e in scopes:
20
+ # 测试旧方法
21
+ print("\n--- Testing split_curl_to_struct_old ---")
22
+ result_old = parser.split_curl_to_struct_old(lines, s, e)
23
+ print(f"Method: {result_old.get('method')}")
24
+ print(f"URL: {result_old.get('original_url')}")
25
+ body_old = result_old.get('body', '')
26
+ print(f"Body length: {len(body_old)}")
27
+ print(f"Body starts with: {body_old[:100]}...")
28
+
29
+ # 测试新方法
30
+ print("\n--- Testing split_curl_to_struct ---")
31
+ result_new = parser.split_curl_to_struct(lines, s, e)
32
+ print(f"Method: {result_new.get('method')}")
33
+ print(f"URL: {result_new.get('original_url')}")
34
+ body_new = result_new.get('body', '')
35
+ print(f"Body length: {len(body_new)}")
36
+ print(f"Body starts with: {body_new[:100]}...")
37
+
38
+ # 验证 body 是否完整
39
+ try:
40
+ import json
41
+ parsed_body = json.loads(body_new)
42
+ print("\n✅ Body parsed successfully as JSON!")
43
+ print(f" - create count: {len(parsed_body.get('create', []))}")
44
+ print(f" - update count: {len(parsed_body.get('update', []))}")
45
+ print(f" - delete count: {len(parsed_body.get('delete', []))}")
46
+ except json.JSONDecodeError as e:
47
+ print(f"\n❌ Failed to parse body as JSON: {e}")
48
+
49
+
50
+ if __name__ == '__main__':
51
+ test_multiline_json_body()
@@ -1,4 +1,3 @@
1
- import json
2
1
  from http_content_parser.param_util import ParamUtil
3
2
 
4
3
 
@@ -128,3 +127,19 @@ def test_param_util4():
128
127
  {"['name']": "[]"},
129
128
  ]
130
129
  assert result == expected_result
130
+
131
+
132
+ def test_param_util_5():
133
+ s = {
134
+ "vmid": ["3690976212158729", "2"],
135
+ "type": ["1", "2", "3"],
136
+ "pn": [],
137
+ }
138
+ result = ParamUtil.split_swagger_param_and_type(s, nontype=False)
139
+ print(result)
140
+ expected_result = [
141
+ {"['vmid']": ["3690976212158729", "2"]},
142
+ {"['type']": ["1", "2", "3"]},
143
+ {"['pn']": '[]'},
144
+ ]
145
+ assert result == expected_result
@@ -1,116 +0,0 @@
1
- # -*- coding: UTF-8 -*-
2
-
3
- """
4
- __author__ = leo
5
- """
6
-
7
-
8
- class ReqData(object):
9
-
10
- def __init__(self, dd=None):
11
- if dd and isinstance(dd, dict):
12
- self.__path = dd.get('path', '')
13
- self.__method = dd.get('method', '')
14
- self.__body = dd.get('body', {})
15
- self.__header = dd.get('header', {})
16
- self.__query_param = dd.get('query_param', {})
17
- self.__path_param = dd.get('path_param', {})
18
- self.__original_url = dd.get('original_url', '')
19
- self.__temp_api_label = dd.get('temp_api_label', '')
20
- self.__host = ''
21
- self.__response = dd.get('response', {})
22
- else:
23
- self.__host = ''
24
- self.__path = ''
25
- self.__method = ''
26
- self.__body = {}
27
- self.__header = {}
28
- self.__query_param = {}
29
- self.__path_param = {}
30
- self.__original_url = ''
31
- self.__temp_api_label = ''
32
- self.__response = {}
33
-
34
- @property
35
- def temp_api_label(self):
36
- return self.__temp_api_label
37
-
38
- @temp_api_label.setter
39
- def temp_api_label(self, value):
40
- self.__temp_api_label = value
41
-
42
- @property
43
- def url(self):
44
- return self.__host + self.__path
45
-
46
- @property
47
- def original_url(self):
48
- return self.__original_url
49
-
50
- @original_url.setter
51
- def original_url(self, value):
52
- self.__original_url = value
53
-
54
- @property
55
- def method(self):
56
- return self.__method
57
-
58
- @method.setter
59
- def method(self, value):
60
- self.__method = value
61
-
62
- @property
63
- def body(self):
64
- return self.__body
65
-
66
- @body.setter
67
- def body(self, value):
68
- self.__body = value
69
-
70
- @property
71
- def header(self):
72
- return self.__header
73
-
74
- @header.setter
75
- def header(self, value):
76
- self.__header = value
77
-
78
- @property
79
- def host(self):
80
- return self.__host
81
-
82
- @host.setter
83
- def host(self, value):
84
- self.__host = value
85
-
86
- @property
87
- def path(self):
88
- return self.__path
89
-
90
- @path.setter
91
- def path(self, value):
92
- self.__path = value
93
-
94
- @property
95
- def query_param(self):
96
- return self.__query_param
97
-
98
- @query_param.setter
99
- def query_param(self, value):
100
- self.__query_param = value
101
-
102
- @property
103
- def path_param(self):
104
- return self.__path_param
105
-
106
- @path_param.setter
107
- def path_param(self, value):
108
- self.__path_param = value
109
-
110
- @property
111
- def response(self):
112
- return self.__response
113
-
114
- @response.setter
115
- def response(self, value):
116
- self.__response = value