har2tree 1.31.4__py3-none-any.whl → 1.31.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
har2tree/nodes.py CHANGED
@@ -15,13 +15,15 @@ from base64 import b64decode
15
15
  from datetime import datetime, timedelta
16
16
  from functools import lru_cache, cached_property
17
17
  from hashlib import sha256
18
- from io import BytesIO
18
+ from io import BytesIO, StringIO
19
19
  from pathlib import Path
20
20
  from typing import Any
21
21
  from collections.abc import MutableMapping
22
- from urllib.parse import unquote_plus, urlparse, urljoin
22
+ from urllib.parse import unquote_plus, urlparse, urljoin, parse_qs
23
23
 
24
24
  import filetype # type: ignore
25
+ import json_stream # type: ignore
26
+
25
27
  from bs4 import BeautifulSoup
26
28
  from ete3 import TreeNode # type: ignore
27
29
  from publicsuffixlist import PublicSuffixList # type: ignore
@@ -212,7 +214,7 @@ class URLNode(HarTreeNode):
212
214
  self.add_feature('user_agent', '')
213
215
 
214
216
  if 'method' in self.request and self.request['method'] == 'POST':
215
- decoded_posted_data: str | bytes | int | float | bool | dict[str, str] | None = None
217
+ decoded_posted_data: list[Any] | str | bytes | int | float | bool | dict[str, str] | dict[str, list[str]] | None = None
216
218
  if 'postData' not in self.request or 'text' not in self.request['postData']:
217
219
  self.logger.debug('POST request with no content.')
218
220
  elif not self.request['postData']['text']:
@@ -243,19 +245,20 @@ class URLNode(HarTreeNode):
243
245
  # NOTE: this should never happen as there should
244
246
  # be something in self.request['postData']['params']
245
247
  # and we already processed it before but just in case...
246
- self.logger.warning(f'Got a application/x-www-form-urlencoded without params key: {self.request}')
248
+ self.logger.debug('Got a application/x-www-form-urlencoded without params key')
247
249
  # 100% sure there will be websites where decode will fail
248
250
  try:
249
251
  if isinstance(decoded_posted_data, bytes):
250
252
  decoded_posted_data = decoded_posted_data.decode()
251
253
  if isinstance(decoded_posted_data, str):
252
254
  decoded_posted_data = unquote_plus(decoded_posted_data)
255
+ if isinstance(decoded_posted_data, str):
256
+ decoded_posted_data = parse_qs(decoded_posted_data)
253
257
  except Exception as e:
254
- self.logger.warning(f'Unable to unquote form data "{decoded_posted_data!r}": {e}')
258
+ self.logger.warning(f'Unable to unquote or parse form data "{decoded_posted_data!r}": {e}')
255
259
  elif (mimetype_lower.startswith('application/json')
256
260
  or mimetype_lower.startswith('application/csp-report')
257
261
  or mimetype_lower.startswith('application/x-amz-json-1.1')
258
- or mimetype_lower.startswith('application/x-json-stream')
259
262
  or mimetype_lower.startswith('application/reports+json')
260
263
  or mimetype_lower.endswith('json')
261
264
  ):
@@ -269,7 +272,22 @@ class URLNode(HarTreeNode):
269
272
  self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
270
273
  else:
271
274
  self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data}")
272
-
275
+ elif mimetype_lower.startswith('application/x-json-stream'):
276
+ try:
277
+ to_stream: StringIO | BytesIO
278
+ if isinstance(decoded_posted_data, str):
279
+ to_stream = StringIO(decoded_posted_data)
280
+ elif isinstance(decoded_posted_data, bytes):
281
+ to_stream = BytesIO(decoded_posted_data)
282
+ else:
283
+ raise ValueError(f'Invalid type: {type(decoded_posted_data)}')
284
+ streamed_data = json_stream.load(to_stream)
285
+ decoded_posted_data = json_stream.to_standard_types(streamed_data)
286
+ except Exception:
287
+ if isinstance(decoded_posted_data, (str, bytes)):
288
+ self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
289
+ else:
290
+ self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data}")
273
291
  elif mimetype_lower.startswith('multipart/form-data'):
274
292
  # FIXME multipart content (similar to email). Not totally sure what do do with it tight now.
275
293
  self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
@@ -294,11 +312,17 @@ class URLNode(HarTreeNode):
294
312
  # Just skip it, no need to go in the warnings
295
313
  self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
296
314
  pass
297
- elif mimetype_lower in ['application/octet-stream', 'application/binary']:
315
+ elif mimetype_lower == 'application/binary':
316
+ # generally a broken gzipped blob
317
+ self.logger.debug(f'Got a POST {mimetype_lower}, most probably a broken gziped blob: {decoded_posted_data!r}')
318
+ elif mimetype_lower in ['application/octet-stream']:
298
319
  # Should flag it, maybe?
299
320
  self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
300
321
  pass
301
- elif mimetype_lower in ['application/unknown', 'application/grpc-web+proto']:
322
+ elif mimetype_lower in ['application/grpc-web+proto']:
323
+ # Can be decoded?
324
+ self.logger.warning(f'Got a POST {mimetype_lower} - can be decoded: {decoded_posted_data!r}')
325
+ elif mimetype_lower in ['application/unknown']:
302
326
  # Weird but already seen stuff
303
327
  self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
304
328
  pass
@@ -322,6 +346,8 @@ class URLNode(HarTreeNode):
322
346
  except Exception:
323
347
  pass
324
348
  self.add_feature('posted_data', decoded_posted_data)
349
+ if 'postData' in self.request and self.request['postData'].get('mimeType'):
350
+ self.add_feature('posted_data_mimetype', self.request['postData']['mimeType'])
325
351
 
326
352
  self.add_feature('response', har_entry['response'])
327
353
  try:
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: har2tree
3
- Version: 1.31.4
3
+ Version: 1.31.5
4
4
  Summary: HTTP Archive (HAR) to ETE Toolkit generator
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
7
7
  Author-email: raphael.vinot@circl.lu
8
- Requires-Python: >=3.9
8
+ Requires-Python: >=3.9,<4.0
9
9
  Classifier: Intended Audience :: Information Technology
10
10
  Classifier: Intended Audience :: Science/Research
11
11
  Classifier: Intended Audience :: Telecommunications Industry
@@ -24,12 +24,13 @@ Requires-Dist: Sphinx (>=8.2.3) ; (python_version >= "3.11") and (extra == "docs
24
24
  Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)
25
25
  Requires-Dist: ete3 (>=3.1.3)
26
26
  Requires-Dist: filetype (>=1.2.0)
27
+ Requires-Dist: json-stream (>=2.3.3,<3.0.0)
27
28
  Requires-Dist: legacy-cgi (>=2.6.3) ; python_version >= "3.13,<4.0"
28
29
  Requires-Dist: multipart (>=1.3.0,<2.0.0)
29
30
  Requires-Dist: numpy (<2.1) ; python_version < "3.10"
30
31
  Requires-Dist: numpy (<2.3) ; python_version < "3.11"
31
32
  Requires-Dist: numpy (>=2.3.2) ; python_version >= "3.11"
32
- Requires-Dist: publicsuffixlist (>=1.0.2.20250802)
33
+ Requires-Dist: publicsuffixlist (>=1.0.2.20250809)
33
34
  Requires-Dist: six (>=1.17.0) ; extra == "docs"
34
35
  Requires-Dist: tinycss2 (>=1.4.0)
35
36
  Requires-Dist: w3lib (>=2.3.1)
@@ -1,10 +1,10 @@
1
1
  har2tree/__init__.py,sha256=Na3mxHkUBq3rzYbxiLNJF37DxH5mcghSorjzXw5Teug,422
2
2
  har2tree/har2tree.py,sha256=47x9X5tY69f9SXkYJgJsnAaX2kxgXHgzFThGz6M86Zw,44495
3
3
  har2tree/helper.py,sha256=CgeXqfBeHs8SbkW7TRNKqJBTZLAu63KggQjbGHCZAGI,20681
4
- har2tree/nodes.py,sha256=LfnVP66dhGNcbcoaao32WzzYGSkjOPqrwHE9Swh3sug,31005
4
+ har2tree/nodes.py,sha256=a-5tk_AbnIklbdujlesb_1E0KGnSyK0OsTnbnd5i0D4,32961
5
5
  har2tree/parser.py,sha256=4yej1OcVYAIiLfzYZsO9WCw3WyM_ykDTuvpW7UO1ROE,3645
6
6
  har2tree/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- har2tree-1.31.4.dist-info/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
8
- har2tree-1.31.4.dist-info/METADATA,sha256=axJorvFoHABeSnKQhWEn2oVRBtD2ghtaY7mSSiRM1Q8,2154
9
- har2tree-1.31.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
- har2tree-1.31.4.dist-info/RECORD,,
7
+ har2tree-1.31.5.dist-info/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
8
+ har2tree-1.31.5.dist-info/METADATA,sha256=5QfFL4ESUuWJn7JuxcnLSgw70q3MGZoDfe9PJFS5JkA,2203
9
+ har2tree-1.31.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
+ har2tree-1.31.5.dist-info/RECORD,,