har2tree 1.31.4__py3-none-any.whl → 1.31.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- har2tree/nodes.py +35 -9
- {har2tree-1.31.4.dist-info → har2tree-1.31.5.dist-info}/METADATA +4 -3
- {har2tree-1.31.4.dist-info → har2tree-1.31.5.dist-info}/RECORD +5 -5
- {har2tree-1.31.4.dist-info → har2tree-1.31.5.dist-info}/LICENSE +0 -0
- {har2tree-1.31.4.dist-info → har2tree-1.31.5.dist-info}/WHEEL +0 -0
har2tree/nodes.py
CHANGED
|
@@ -15,13 +15,15 @@ from base64 import b64decode
|
|
|
15
15
|
from datetime import datetime, timedelta
|
|
16
16
|
from functools import lru_cache, cached_property
|
|
17
17
|
from hashlib import sha256
|
|
18
|
-
from io import BytesIO
|
|
18
|
+
from io import BytesIO, StringIO
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
from typing import Any
|
|
21
21
|
from collections.abc import MutableMapping
|
|
22
|
-
from urllib.parse import unquote_plus, urlparse, urljoin
|
|
22
|
+
from urllib.parse import unquote_plus, urlparse, urljoin, parse_qs
|
|
23
23
|
|
|
24
24
|
import filetype # type: ignore
|
|
25
|
+
import json_stream # type: ignore
|
|
26
|
+
|
|
25
27
|
from bs4 import BeautifulSoup
|
|
26
28
|
from ete3 import TreeNode # type: ignore
|
|
27
29
|
from publicsuffixlist import PublicSuffixList # type: ignore
|
|
@@ -212,7 +214,7 @@ class URLNode(HarTreeNode):
|
|
|
212
214
|
self.add_feature('user_agent', '')
|
|
213
215
|
|
|
214
216
|
if 'method' in self.request and self.request['method'] == 'POST':
|
|
215
|
-
decoded_posted_data: str | bytes | int | float | bool | dict[str, str] | None = None
|
|
217
|
+
decoded_posted_data: list[Any] | str | bytes | int | float | bool | dict[str, str] | dict[str, list[str]] | None = None
|
|
216
218
|
if 'postData' not in self.request or 'text' not in self.request['postData']:
|
|
217
219
|
self.logger.debug('POST request with no content.')
|
|
218
220
|
elif not self.request['postData']['text']:
|
|
@@ -243,19 +245,20 @@ class URLNode(HarTreeNode):
|
|
|
243
245
|
# NOTE: this should never happen as there should
|
|
244
246
|
# be something in self.request['postData']['params']
|
|
245
247
|
# and we already processed it before but just in case...
|
|
246
|
-
self.logger.
|
|
248
|
+
self.logger.debug('Got a application/x-www-form-urlencoded without params key')
|
|
247
249
|
# 100% sure there will be websites where decode will fail
|
|
248
250
|
try:
|
|
249
251
|
if isinstance(decoded_posted_data, bytes):
|
|
250
252
|
decoded_posted_data = decoded_posted_data.decode()
|
|
251
253
|
if isinstance(decoded_posted_data, str):
|
|
252
254
|
decoded_posted_data = unquote_plus(decoded_posted_data)
|
|
255
|
+
if isinstance(decoded_posted_data, str):
|
|
256
|
+
decoded_posted_data = parse_qs(decoded_posted_data)
|
|
253
257
|
except Exception as e:
|
|
254
|
-
self.logger.warning(f'Unable to unquote form data "{decoded_posted_data!r}": {e}')
|
|
258
|
+
self.logger.warning(f'Unable to unquote or parse form data "{decoded_posted_data!r}": {e}')
|
|
255
259
|
elif (mimetype_lower.startswith('application/json')
|
|
256
260
|
or mimetype_lower.startswith('application/csp-report')
|
|
257
261
|
or mimetype_lower.startswith('application/x-amz-json-1.1')
|
|
258
|
-
or mimetype_lower.startswith('application/x-json-stream')
|
|
259
262
|
or mimetype_lower.startswith('application/reports+json')
|
|
260
263
|
or mimetype_lower.endswith('json')
|
|
261
264
|
):
|
|
@@ -269,7 +272,22 @@ class URLNode(HarTreeNode):
|
|
|
269
272
|
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
|
|
270
273
|
else:
|
|
271
274
|
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data}")
|
|
272
|
-
|
|
275
|
+
elif mimetype_lower.startswith('application/x-json-stream'):
|
|
276
|
+
try:
|
|
277
|
+
to_stream: StringIO | BytesIO
|
|
278
|
+
if isinstance(decoded_posted_data, str):
|
|
279
|
+
to_stream = StringIO(decoded_posted_data)
|
|
280
|
+
elif isinstance(decoded_posted_data, bytes):
|
|
281
|
+
to_stream = BytesIO(decoded_posted_data)
|
|
282
|
+
else:
|
|
283
|
+
raise ValueError(f'Invalid type: {type(decoded_posted_data)}')
|
|
284
|
+
streamed_data = json_stream.load(to_stream)
|
|
285
|
+
decoded_posted_data = json_stream.to_standard_types(streamed_data)
|
|
286
|
+
except Exception:
|
|
287
|
+
if isinstance(decoded_posted_data, (str, bytes)):
|
|
288
|
+
self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
|
|
289
|
+
else:
|
|
290
|
+
self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data}")
|
|
273
291
|
elif mimetype_lower.startswith('multipart/form-data'):
|
|
274
292
|
# FIXME multipart content (similar to email). Not totally sure what do do with it tight now.
|
|
275
293
|
self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
@@ -294,11 +312,17 @@ class URLNode(HarTreeNode):
|
|
|
294
312
|
# Just skip it, no need to go in the warnings
|
|
295
313
|
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
296
314
|
pass
|
|
297
|
-
elif mimetype_lower
|
|
315
|
+
elif mimetype_lower == 'application/binary':
|
|
316
|
+
# generally a broken gzipped blob
|
|
317
|
+
self.logger.debug(f'Got a POST {mimetype_lower}, most probably a broken gziped blob: {decoded_posted_data!r}')
|
|
318
|
+
elif mimetype_lower in ['application/octet-stream']:
|
|
298
319
|
# Should flag it, maybe?
|
|
299
320
|
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
300
321
|
pass
|
|
301
|
-
elif mimetype_lower in ['application/
|
|
322
|
+
elif mimetype_lower in ['application/grpc-web+proto']:
|
|
323
|
+
# Can be decoded?
|
|
324
|
+
self.logger.warning(f'Got a POST {mimetype_lower} - can be decoded: {decoded_posted_data!r}')
|
|
325
|
+
elif mimetype_lower in ['application/unknown']:
|
|
302
326
|
# Weird but already seen stuff
|
|
303
327
|
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
304
328
|
pass
|
|
@@ -322,6 +346,8 @@ class URLNode(HarTreeNode):
|
|
|
322
346
|
except Exception:
|
|
323
347
|
pass
|
|
324
348
|
self.add_feature('posted_data', decoded_posted_data)
|
|
349
|
+
if 'postData' in self.request and self.request['postData'].get('mimeType'):
|
|
350
|
+
self.add_feature('posted_data_mimetype', self.request['postData']['mimeType'])
|
|
325
351
|
|
|
326
352
|
self.add_feature('response', har_entry['response'])
|
|
327
353
|
try:
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: har2tree
|
|
3
|
-
Version: 1.31.
|
|
3
|
+
Version: 1.31.5
|
|
4
4
|
Summary: HTTP Archive (HAR) to ETE Toolkit generator
|
|
5
5
|
License: BSD-3-Clause
|
|
6
6
|
Author: Raphaël Vinot
|
|
7
7
|
Author-email: raphael.vinot@circl.lu
|
|
8
|
-
Requires-Python: >=3.9
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
9
|
Classifier: Intended Audience :: Information Technology
|
|
10
10
|
Classifier: Intended Audience :: Science/Research
|
|
11
11
|
Classifier: Intended Audience :: Telecommunications Industry
|
|
@@ -24,12 +24,13 @@ Requires-Dist: Sphinx (>=8.2.3) ; (python_version >= "3.11") and (extra == "docs
|
|
|
24
24
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)
|
|
25
25
|
Requires-Dist: ete3 (>=3.1.3)
|
|
26
26
|
Requires-Dist: filetype (>=1.2.0)
|
|
27
|
+
Requires-Dist: json-stream (>=2.3.3,<3.0.0)
|
|
27
28
|
Requires-Dist: legacy-cgi (>=2.6.3) ; python_version >= "3.13,<4.0"
|
|
28
29
|
Requires-Dist: multipart (>=1.3.0,<2.0.0)
|
|
29
30
|
Requires-Dist: numpy (<2.1) ; python_version < "3.10"
|
|
30
31
|
Requires-Dist: numpy (<2.3) ; python_version < "3.11"
|
|
31
32
|
Requires-Dist: numpy (>=2.3.2) ; python_version >= "3.11"
|
|
32
|
-
Requires-Dist: publicsuffixlist (>=1.0.2.
|
|
33
|
+
Requires-Dist: publicsuffixlist (>=1.0.2.20250809)
|
|
33
34
|
Requires-Dist: six (>=1.17.0) ; extra == "docs"
|
|
34
35
|
Requires-Dist: tinycss2 (>=1.4.0)
|
|
35
36
|
Requires-Dist: w3lib (>=2.3.1)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
har2tree/__init__.py,sha256=Na3mxHkUBq3rzYbxiLNJF37DxH5mcghSorjzXw5Teug,422
|
|
2
2
|
har2tree/har2tree.py,sha256=47x9X5tY69f9SXkYJgJsnAaX2kxgXHgzFThGz6M86Zw,44495
|
|
3
3
|
har2tree/helper.py,sha256=CgeXqfBeHs8SbkW7TRNKqJBTZLAu63KggQjbGHCZAGI,20681
|
|
4
|
-
har2tree/nodes.py,sha256=
|
|
4
|
+
har2tree/nodes.py,sha256=a-5tk_AbnIklbdujlesb_1E0KGnSyK0OsTnbnd5i0D4,32961
|
|
5
5
|
har2tree/parser.py,sha256=4yej1OcVYAIiLfzYZsO9WCw3WyM_ykDTuvpW7UO1ROE,3645
|
|
6
6
|
har2tree/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
har2tree-1.31.
|
|
8
|
-
har2tree-1.31.
|
|
9
|
-
har2tree-1.31.
|
|
10
|
-
har2tree-1.31.
|
|
7
|
+
har2tree-1.31.5.dist-info/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
|
|
8
|
+
har2tree-1.31.5.dist-info/METADATA,sha256=5QfFL4ESUuWJn7JuxcnLSgw70q3MGZoDfe9PJFS5JkA,2203
|
|
9
|
+
har2tree-1.31.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
10
|
+
har2tree-1.31.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|