har2tree 1.31.4__py3-none-any.whl → 1.31.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- har2tree/nodes.py +79 -19
- {har2tree-1.31.4.dist-info → har2tree-1.31.6.dist-info}/METADATA +4 -3
- {har2tree-1.31.4.dist-info → har2tree-1.31.6.dist-info}/RECORD +5 -5
- {har2tree-1.31.4.dist-info → har2tree-1.31.6.dist-info}/LICENSE +0 -0
- {har2tree-1.31.4.dist-info → har2tree-1.31.6.dist-info}/WHEEL +0 -0
har2tree/nodes.py
CHANGED
|
@@ -15,13 +15,15 @@ from base64 import b64decode
|
|
|
15
15
|
from datetime import datetime, timedelta
|
|
16
16
|
from functools import lru_cache, cached_property
|
|
17
17
|
from hashlib import sha256
|
|
18
|
-
from io import BytesIO
|
|
18
|
+
from io import BytesIO, StringIO
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
from typing import Any
|
|
21
21
|
from collections.abc import MutableMapping
|
|
22
|
-
from urllib.parse import unquote_plus, urlparse, urljoin
|
|
22
|
+
from urllib.parse import unquote_plus, urlparse, urljoin, parse_qs
|
|
23
23
|
|
|
24
24
|
import filetype # type: ignore
|
|
25
|
+
import json_stream # type: ignore
|
|
26
|
+
|
|
25
27
|
from bs4 import BeautifulSoup
|
|
26
28
|
from ete3 import TreeNode # type: ignore
|
|
27
29
|
from publicsuffixlist import PublicSuffixList # type: ignore
|
|
@@ -212,13 +214,22 @@ class URLNode(HarTreeNode):
|
|
|
212
214
|
self.add_feature('user_agent', '')
|
|
213
215
|
|
|
214
216
|
if 'method' in self.request and self.request['method'] == 'POST':
|
|
215
|
-
decoded_posted_data: str | bytes | int | float | bool | dict[str, str] | None = None
|
|
217
|
+
decoded_posted_data: list[Any] | str | bytes | int | float | bool | dict[str, str] | dict[str, list[str]] | None = None
|
|
216
218
|
if 'postData' not in self.request or 'text' not in self.request['postData']:
|
|
217
219
|
self.logger.debug('POST request with no content.')
|
|
220
|
+
self.add_feature('posted_data_info', "No content.")
|
|
218
221
|
elif not self.request['postData']['text']:
|
|
219
222
|
# If the POST content is empty
|
|
220
223
|
self.logger.debug('Empty POST request.')
|
|
221
224
|
decoded_posted_data = ''
|
|
225
|
+
self.add_feature('posted_data_info', "Empty request.")
|
|
226
|
+
elif self.request['postData']['text'].startswith('\x1f\uFFFD\x08'):
|
|
227
|
+
# b'\x1f\xef\xbf\xbd\x08', decoded to UTF-8
|
|
228
|
+
# => the replacement character
|
|
229
|
+
# https://www.cogsci.ed.ac.uk/~richard/utf-8.cgi?input=%EF%BF%BD&mode=char
|
|
230
|
+
self.logger.debug('Got a garbled gzipped POST blob.')
|
|
231
|
+
self.add_feature('posted_data_info', "It was a POSTed gzipped blob, but the data has been garbled.")
|
|
232
|
+
decoded_posted_data = self.request['postData']['text']
|
|
222
233
|
elif self.request['postData'].get('params'):
|
|
223
234
|
# NOTE 2025-08-08
|
|
224
235
|
# if the posted data mimetype is "application/x-www-form-urlencoded"
|
|
@@ -229,10 +240,11 @@ class URLNode(HarTreeNode):
|
|
|
229
240
|
# TODO: some processing on the data part (it's often a json blob)
|
|
230
241
|
self.logger.debug('Got a params POST.')
|
|
231
242
|
decoded_posted_data = {entry['name']: entry['value'] for entry in self.request['postData']['params']}
|
|
243
|
+
self.add_feature('posted_data_info', "POST request as URL params.")
|
|
232
244
|
else:
|
|
233
|
-
# NOTE 2023-08-22: Blind attempt to base64 decode the data
|
|
234
245
|
self.logger.debug('Got a normal POST')
|
|
235
246
|
try:
|
|
247
|
+
# NOTE 2023-08-22: Blind attempt to base64 decode the data
|
|
236
248
|
decoded_posted_data = self._dirty_safe_b64decode(self.request['postData']['text'])
|
|
237
249
|
except binascii.Error:
|
|
238
250
|
decoded_posted_data = self.request['postData']['text']
|
|
@@ -243,19 +255,22 @@ class URLNode(HarTreeNode):
|
|
|
243
255
|
# NOTE: this should never happen as there should
|
|
244
256
|
# be something in self.request['postData']['params']
|
|
245
257
|
# and we already processed it before but just in case...
|
|
246
|
-
self.logger.
|
|
258
|
+
self.logger.debug('Got a application/x-www-form-urlencoded without params key')
|
|
247
259
|
# 100% sure there will be websites where decode will fail
|
|
248
260
|
try:
|
|
249
261
|
if isinstance(decoded_posted_data, bytes):
|
|
250
262
|
decoded_posted_data = decoded_posted_data.decode()
|
|
251
263
|
if isinstance(decoded_posted_data, str):
|
|
252
264
|
decoded_posted_data = unquote_plus(decoded_posted_data)
|
|
265
|
+
if isinstance(decoded_posted_data, str):
|
|
266
|
+
decoded_posted_data = parse_qs(decoded_posted_data)
|
|
267
|
+
self.add_feature('posted_data_info', "Successfully decoded POST request.")
|
|
253
268
|
except Exception as e:
|
|
254
|
-
self.logger.warning(f'Unable to unquote form data "{decoded_posted_data!r}": {e}')
|
|
269
|
+
self.logger.warning(f'Unable to unquote or parse form data "{decoded_posted_data!r}": {e}')
|
|
270
|
+
self.add_feature('posted_data_info', "Unable to decode POST request.")
|
|
255
271
|
elif (mimetype_lower.startswith('application/json')
|
|
256
272
|
or mimetype_lower.startswith('application/csp-report')
|
|
257
273
|
or mimetype_lower.startswith('application/x-amz-json-1.1')
|
|
258
|
-
or mimetype_lower.startswith('application/x-json-stream')
|
|
259
274
|
or mimetype_lower.startswith('application/reports+json')
|
|
260
275
|
or mimetype_lower.endswith('json')
|
|
261
276
|
):
|
|
@@ -264,64 +279,109 @@ class URLNode(HarTreeNode):
|
|
|
264
279
|
try:
|
|
265
280
|
# NOTE 2023-08-22: loads here may give us a int, float or a bool.
|
|
266
281
|
decoded_posted_data = json.loads(decoded_posted_data)
|
|
282
|
+
self.add_feature('posted_data_info', "Successfully decoded POST request.")
|
|
267
283
|
except Exception:
|
|
284
|
+
self.add_feature('posted_data_info', "Unable to decode POST request.")
|
|
268
285
|
if isinstance(decoded_posted_data, (str, bytes)):
|
|
269
286
|
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
|
|
270
287
|
else:
|
|
271
288
|
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data}")
|
|
272
|
-
|
|
289
|
+
elif mimetype_lower.startswith('application/x-json-stream'):
|
|
290
|
+
try:
|
|
291
|
+
to_stream: StringIO | BytesIO
|
|
292
|
+
if isinstance(decoded_posted_data, str):
|
|
293
|
+
to_stream = StringIO(decoded_posted_data)
|
|
294
|
+
elif isinstance(decoded_posted_data, bytes):
|
|
295
|
+
to_stream = BytesIO(decoded_posted_data)
|
|
296
|
+
else:
|
|
297
|
+
raise ValueError(f'Invalid type: {type(decoded_posted_data)}')
|
|
298
|
+
streamed_data = json_stream.load(to_stream)
|
|
299
|
+
decoded_posted_data = json_stream.to_standard_types(streamed_data)
|
|
300
|
+
self.add_feature('posted_data_info', "Successfully decoded POST request.")
|
|
301
|
+
except Exception:
|
|
302
|
+
if isinstance(decoded_posted_data, (str, bytes)):
|
|
303
|
+
self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
|
|
304
|
+
else:
|
|
305
|
+
self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data}")
|
|
306
|
+
self.add_feature('posted_data_info', "Unable to decode POST request.")
|
|
273
307
|
elif mimetype_lower.startswith('multipart/form-data'):
|
|
274
308
|
# FIXME multipart content (similar to email). Not totally sure what do do with it tight now.
|
|
275
309
|
self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
276
|
-
|
|
310
|
+
self.add_feature('posted_data_info', f"Decoding {mimetype_lower} is not supported yet.")
|
|
277
311
|
elif mimetype_lower.startswith('application/x-protobuf'):
|
|
278
312
|
# FIXME If possible, decode?
|
|
279
313
|
self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
280
|
-
|
|
314
|
+
self.add_feature('posted_data_info', f"Decoding {mimetype_lower} is not supported yet.")
|
|
281
315
|
elif mimetype_lower.startswith('text') and isinstance(decoded_posted_data, (str, bytes)):
|
|
282
316
|
try:
|
|
283
317
|
# NOTE 2023-08-22: Quite a few text entries are in fact json, give it a shot.
|
|
284
318
|
# loads here may give us a int, float or a bool.
|
|
285
319
|
decoded_posted_data = json.loads(decoded_posted_data)
|
|
320
|
+
self.add_feature('posted_data_info', "Decoded JSON out of POST request.")
|
|
286
321
|
except Exception:
|
|
287
322
|
# keep it as it is otherwise.
|
|
288
323
|
pass
|
|
289
324
|
elif mimetype_lower.endswith('javascript'):
|
|
290
325
|
# keep it as it is
|
|
291
326
|
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
292
|
-
|
|
327
|
+
self.add_feature('posted_data_info', f"Pretty rendering of {mimetype_lower} is not supported yet.")
|
|
293
328
|
elif mimetype_lower == '?':
|
|
294
329
|
# Just skip it, no need to go in the warnings
|
|
295
330
|
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
296
|
-
|
|
297
|
-
elif mimetype_lower
|
|
331
|
+
self.add_feature('posted_data_info', f"Weird MimeType ({mimetype_lower}) is not supported yet.")
|
|
332
|
+
elif mimetype_lower == 'application/binary':
|
|
333
|
+
self.logger.warning(f'Got a POST {mimetype_lower}, not a broken gziped blob: {decoded_posted_data!r}')
|
|
334
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
335
|
+
elif mimetype_lower in ['application/octet-stream']:
|
|
298
336
|
# Should flag it, maybe?
|
|
299
337
|
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
300
|
-
|
|
301
|
-
elif mimetype_lower in ['application/
|
|
338
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
339
|
+
elif mimetype_lower in ['application/grpc-web+proto']:
|
|
340
|
+
# Can be decoded?
|
|
341
|
+
self.logger.warning(f'Got a POST {mimetype_lower} - can be decoded: {decoded_posted_data!r}')
|
|
342
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
343
|
+
elif mimetype_lower in ['application/unknown']:
|
|
302
344
|
# Weird but already seen stuff
|
|
303
345
|
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
304
|
-
|
|
346
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
305
347
|
else:
|
|
306
|
-
self.logger.warning(f'Unexpected mime type: {mimetype_lower}')
|
|
348
|
+
self.logger.warning(f'Unexpected mime type: {mimetype_lower} - {decoded_posted_data!r}')
|
|
349
|
+
self.add_feature('posted_data_info', f"Unexpected MimeType ({mimetype_lower}) is not supported yet.")
|
|
307
350
|
else:
|
|
308
351
|
self.logger.warning(f'Missing mimetype in POST: {self.request["postData"]}')
|
|
352
|
+
self.add_feature('posted_data_info', "Missing MimeType, not sure what to do.")
|
|
309
353
|
|
|
310
354
|
# NOTE 2023-08-22: Blind attempt to process the data as json
|
|
311
|
-
if isinstance(decoded_posted_data, (str, bytes)):
|
|
355
|
+
if decoded_posted_data and isinstance(decoded_posted_data, (str, bytes)):
|
|
312
356
|
try:
|
|
313
357
|
decoded_posted_data = json.loads(decoded_posted_data)
|
|
314
358
|
except Exception:
|
|
315
359
|
pass
|
|
316
360
|
|
|
317
|
-
if isinstance(decoded_posted_data, bytes):
|
|
361
|
+
if decoded_posted_data and isinstance(decoded_posted_data, bytes):
|
|
318
362
|
# NOTE 2023-08-22: Blind attempt to decode the bytes
|
|
319
363
|
# Try to decode it as utf-8
|
|
320
364
|
try:
|
|
321
365
|
decoded_posted_data = decoded_posted_data.decode('utf-8')
|
|
322
366
|
except Exception:
|
|
323
367
|
pass
|
|
368
|
+
|
|
324
369
|
self.add_feature('posted_data', decoded_posted_data)
|
|
370
|
+
if 'postData' in self.request and self.request['postData'].get('mimeType'):
|
|
371
|
+
self.add_feature('posted_data_mimetype', self.request['postData']['mimeType'])
|
|
372
|
+
# Get size, post decode.
|
|
373
|
+
if not decoded_posted_data:
|
|
374
|
+
# empty or None, set to 0
|
|
375
|
+
self.add_feature('posted_data_size', 0)
|
|
376
|
+
elif isinstance(decoded_posted_data, (list, dict)):
|
|
377
|
+
# set size to the json dump
|
|
378
|
+
self.add_feature('posted_data_size', len(json.dumps(decoded_posted_data)))
|
|
379
|
+
elif isinstance(decoded_posted_data, (str, bytes)):
|
|
380
|
+
# length
|
|
381
|
+
self.add_feature('posted_data_size', len(decoded_posted_data))
|
|
382
|
+
else:
|
|
383
|
+
# Stringify and len
|
|
384
|
+
self.add_feature('posted_data_size', len(str(decoded_posted_data)))
|
|
325
385
|
|
|
326
386
|
self.add_feature('response', har_entry['response'])
|
|
327
387
|
try:
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: har2tree
|
|
3
|
-
Version: 1.31.
|
|
3
|
+
Version: 1.31.6
|
|
4
4
|
Summary: HTTP Archive (HAR) to ETE Toolkit generator
|
|
5
5
|
License: BSD-3-Clause
|
|
6
6
|
Author: Raphaël Vinot
|
|
7
7
|
Author-email: raphael.vinot@circl.lu
|
|
8
|
-
Requires-Python: >=3.9
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
9
|
Classifier: Intended Audience :: Information Technology
|
|
10
10
|
Classifier: Intended Audience :: Science/Research
|
|
11
11
|
Classifier: Intended Audience :: Telecommunications Industry
|
|
@@ -24,12 +24,13 @@ Requires-Dist: Sphinx (>=8.2.3) ; (python_version >= "3.11") and (extra == "docs
|
|
|
24
24
|
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)
|
|
25
25
|
Requires-Dist: ete3 (>=3.1.3)
|
|
26
26
|
Requires-Dist: filetype (>=1.2.0)
|
|
27
|
+
Requires-Dist: json-stream (>=2.3.3,<3.0.0)
|
|
27
28
|
Requires-Dist: legacy-cgi (>=2.6.3) ; python_version >= "3.13,<4.0"
|
|
28
29
|
Requires-Dist: multipart (>=1.3.0,<2.0.0)
|
|
29
30
|
Requires-Dist: numpy (<2.1) ; python_version < "3.10"
|
|
30
31
|
Requires-Dist: numpy (<2.3) ; python_version < "3.11"
|
|
31
32
|
Requires-Dist: numpy (>=2.3.2) ; python_version >= "3.11"
|
|
32
|
-
Requires-Dist: publicsuffixlist (>=1.0.2.
|
|
33
|
+
Requires-Dist: publicsuffixlist (>=1.0.2.20250812)
|
|
33
34
|
Requires-Dist: six (>=1.17.0) ; extra == "docs"
|
|
34
35
|
Requires-Dist: tinycss2 (>=1.4.0)
|
|
35
36
|
Requires-Dist: w3lib (>=2.3.1)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
har2tree/__init__.py,sha256=Na3mxHkUBq3rzYbxiLNJF37DxH5mcghSorjzXw5Teug,422
|
|
2
2
|
har2tree/har2tree.py,sha256=47x9X5tY69f9SXkYJgJsnAaX2kxgXHgzFThGz6M86Zw,44495
|
|
3
3
|
har2tree/helper.py,sha256=CgeXqfBeHs8SbkW7TRNKqJBTZLAu63KggQjbGHCZAGI,20681
|
|
4
|
-
har2tree/nodes.py,sha256=
|
|
4
|
+
har2tree/nodes.py,sha256=Z-NKlcrDcBbEDwpPMFcQzqbCr3bOnb8xzPAxFI5GNSs,36111
|
|
5
5
|
har2tree/parser.py,sha256=4yej1OcVYAIiLfzYZsO9WCw3WyM_ykDTuvpW7UO1ROE,3645
|
|
6
6
|
har2tree/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
har2tree-1.31.
|
|
8
|
-
har2tree-1.31.
|
|
9
|
-
har2tree-1.31.
|
|
10
|
-
har2tree-1.31.
|
|
7
|
+
har2tree-1.31.6.dist-info/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
|
|
8
|
+
har2tree-1.31.6.dist-info/METADATA,sha256=r4-PI8eNiVboZ7B4NSarQhqxBhOsy7C1gi4e6q2G99Y,2203
|
|
9
|
+
har2tree-1.31.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
10
|
+
har2tree-1.31.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|