har2tree 1.31.2__tar.gz → 1.31.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {har2tree-1.31.2 → har2tree-1.31.4}/PKG-INFO +4 -3
- {har2tree-1.31.2 → har2tree-1.31.4}/har2tree/helper.py +1 -1
- {har2tree-1.31.2 → har2tree-1.31.4}/har2tree/nodes.py +51 -24
- {har2tree-1.31.2 → har2tree-1.31.4}/pyproject.toml +6 -5
- {har2tree-1.31.2 → har2tree-1.31.4}/LICENSE +0 -0
- {har2tree-1.31.2 → har2tree-1.31.4}/README.md +0 -0
- {har2tree-1.31.2 → har2tree-1.31.4}/har2tree/__init__.py +0 -0
- {har2tree-1.31.2 → har2tree-1.31.4}/har2tree/har2tree.py +0 -0
- {har2tree-1.31.2 → har2tree-1.31.4}/har2tree/parser.py +0 -0
- {har2tree-1.31.2 → har2tree-1.31.4}/har2tree/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: har2tree
|
|
3
|
-
Version: 1.31.
|
|
3
|
+
Version: 1.31.4
|
|
4
4
|
Summary: HTTP Archive (HAR) to ETE Toolkit generator
|
|
5
5
|
License: BSD-3-Clause
|
|
6
6
|
Author: Raphaël Vinot
|
|
@@ -25,10 +25,11 @@ Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)
|
|
|
25
25
|
Requires-Dist: ete3 (>=3.1.3)
|
|
26
26
|
Requires-Dist: filetype (>=1.2.0)
|
|
27
27
|
Requires-Dist: legacy-cgi (>=2.6.3) ; python_version >= "3.13,<4.0"
|
|
28
|
+
Requires-Dist: multipart (>=1.3.0,<2.0.0)
|
|
28
29
|
Requires-Dist: numpy (<2.1) ; python_version < "3.10"
|
|
29
30
|
Requires-Dist: numpy (<2.3) ; python_version < "3.11"
|
|
30
|
-
Requires-Dist: numpy (>=2.3.
|
|
31
|
-
Requires-Dist: publicsuffixlist (>=1.0.2.
|
|
31
|
+
Requires-Dist: numpy (>=2.3.2) ; python_version >= "3.11"
|
|
32
|
+
Requires-Dist: publicsuffixlist (>=1.0.2.20250802)
|
|
32
33
|
Requires-Dist: six (>=1.17.0) ; extra == "docs"
|
|
33
34
|
Requires-Dist: tinycss2 (>=1.4.0)
|
|
34
35
|
Requires-Dist: w3lib (>=2.3.1)
|
|
@@ -286,7 +286,7 @@ def make_soup(html: bytes) -> BeautifulSoup:
|
|
|
286
286
|
try:
|
|
287
287
|
return BeautifulSoup(doc_as_str, 'lxml')
|
|
288
288
|
except Exception as e:
|
|
289
|
-
logger.
|
|
289
|
+
logger.info(f'Unable to parse doc with lxml, try again with the default parser: {e}')
|
|
290
290
|
# Fallback to the default python parser
|
|
291
291
|
return BeautifulSoup(doc_as_str, 'html.parser')
|
|
292
292
|
|
|
@@ -211,20 +211,39 @@ class URLNode(HarTreeNode):
|
|
|
211
211
|
if 'user_agent' not in self.features:
|
|
212
212
|
self.add_feature('user_agent', '')
|
|
213
213
|
|
|
214
|
-
if 'method' in self.request and self.request['method'] == 'POST'
|
|
215
|
-
|
|
216
|
-
if self.request
|
|
217
|
-
|
|
218
|
-
|
|
214
|
+
if 'method' in self.request and self.request['method'] == 'POST':
|
|
215
|
+
decoded_posted_data: str | bytes | int | float | bool | dict[str, str] | None = None
|
|
216
|
+
if 'postData' not in self.request or 'text' not in self.request['postData']:
|
|
217
|
+
self.logger.debug('POST request with no content.')
|
|
218
|
+
elif not self.request['postData']['text']:
|
|
219
|
+
# If the POST content is empty
|
|
220
|
+
self.logger.debug('Empty POST request.')
|
|
221
|
+
decoded_posted_data = ''
|
|
222
|
+
elif self.request['postData'].get('params'):
|
|
223
|
+
# NOTE 2025-08-08
|
|
224
|
+
# if the posted data mimetype is "application/x-www-form-urlencoded"
|
|
225
|
+
# the HAR contains the decoded entry in the params key
|
|
226
|
+
# The params key is a list of dicts with a key and a value
|
|
227
|
+
# {"name": <key>, "value": <data>}
|
|
228
|
+
# I'd rather have it as {<key>: <data>}
|
|
229
|
+
# TODO: some processing on the data part (it's often a json blob)
|
|
230
|
+
self.logger.debug('Got a params POST.')
|
|
231
|
+
decoded_posted_data = {entry['name']: entry['value'] for entry in self.request['postData']['params']}
|
|
232
|
+
else:
|
|
219
233
|
# NOTE 2023-08-22: Blind attempt to base64 decode the data
|
|
234
|
+
self.logger.debug('Got a normal POST')
|
|
220
235
|
try:
|
|
221
|
-
decoded_posted_data = self._dirty_safe_b64decode(
|
|
236
|
+
decoded_posted_data = self._dirty_safe_b64decode(self.request['postData']['text'])
|
|
222
237
|
except binascii.Error:
|
|
223
|
-
decoded_posted_data =
|
|
238
|
+
decoded_posted_data = self.request['postData']['text']
|
|
224
239
|
if 'mimeType' in self.request['postData']:
|
|
225
240
|
# make it easier to compare.
|
|
226
241
|
mimetype_lower = self.request['postData']['mimeType'].lower()
|
|
227
242
|
if mimetype_lower.startswith('application/x-www-form-urlencoded'):
|
|
243
|
+
# NOTE: this should never happen as there should
|
|
244
|
+
# be something in self.request['postData']['params']
|
|
245
|
+
# and we already processed it before but just in case...
|
|
246
|
+
self.logger.warning(f'Got a application/x-www-form-urlencoded without params key: {self.request}')
|
|
228
247
|
# 100% sure there will be websites where decode will fail
|
|
229
248
|
try:
|
|
230
249
|
if isinstance(decoded_posted_data, bytes):
|
|
@@ -247,17 +266,19 @@ class URLNode(HarTreeNode):
|
|
|
247
266
|
decoded_posted_data = json.loads(decoded_posted_data)
|
|
248
267
|
except Exception:
|
|
249
268
|
if isinstance(decoded_posted_data, (str, bytes)):
|
|
250
|
-
self.logger.
|
|
269
|
+
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
|
|
251
270
|
else:
|
|
252
|
-
self.logger.
|
|
271
|
+
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data}")
|
|
253
272
|
|
|
254
273
|
elif mimetype_lower.startswith('multipart/form-data'):
|
|
255
274
|
# FIXME multipart content (similar to email). Not totally sure what do do with it tight now.
|
|
275
|
+
self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
256
276
|
pass
|
|
257
277
|
elif mimetype_lower.startswith('application/x-protobuf'):
|
|
258
278
|
# FIXME If possible, decode?
|
|
279
|
+
self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
259
280
|
pass
|
|
260
|
-
elif mimetype_lower.startswith('text'):
|
|
281
|
+
elif mimetype_lower.startswith('text') and isinstance(decoded_posted_data, (str, bytes)):
|
|
261
282
|
try:
|
|
262
283
|
# NOTE 2023-08-22: Quite a few text entries are in fact json, give it a shot.
|
|
263
284
|
# loads here may give us a int, float or a bool.
|
|
@@ -267,34 +288,40 @@ class URLNode(HarTreeNode):
|
|
|
267
288
|
pass
|
|
268
289
|
elif mimetype_lower.endswith('javascript'):
|
|
269
290
|
# keep it as it is
|
|
291
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
270
292
|
pass
|
|
271
293
|
elif mimetype_lower == '?':
|
|
272
294
|
# Just skip it, no need to go in the warnings
|
|
295
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
273
296
|
pass
|
|
274
297
|
elif mimetype_lower in ['application/octet-stream', 'application/binary']:
|
|
275
298
|
# Should flag it, maybe?
|
|
299
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
276
300
|
pass
|
|
277
301
|
elif mimetype_lower in ['application/unknown', 'application/grpc-web+proto']:
|
|
278
302
|
# Weird but already seen stuff
|
|
303
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
279
304
|
pass
|
|
280
305
|
else:
|
|
281
306
|
self.logger.warning(f'Unexpected mime type: {mimetype_lower}')
|
|
307
|
+
else:
|
|
308
|
+
self.logger.warning(f'Missing mimetype in POST: {self.request["postData"]}')
|
|
282
309
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
310
|
+
# NOTE 2023-08-22: Blind attempt to process the data as json
|
|
311
|
+
if isinstance(decoded_posted_data, (str, bytes)):
|
|
312
|
+
try:
|
|
313
|
+
decoded_posted_data = json.loads(decoded_posted_data)
|
|
314
|
+
except Exception:
|
|
315
|
+
pass
|
|
289
316
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
317
|
+
if isinstance(decoded_posted_data, bytes):
|
|
318
|
+
# NOTE 2023-08-22: Blind attempt to decode the bytes
|
|
319
|
+
# Try to decode it as utf-8
|
|
320
|
+
try:
|
|
321
|
+
decoded_posted_data = decoded_posted_data.decode('utf-8')
|
|
322
|
+
except Exception:
|
|
323
|
+
pass
|
|
324
|
+
self.add_feature('posted_data', decoded_posted_data)
|
|
298
325
|
|
|
299
326
|
self.add_feature('response', har_entry['response'])
|
|
300
327
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "har2tree"
|
|
3
|
-
version = "1.31.
|
|
3
|
+
version = "1.31.4"
|
|
4
4
|
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
|
5
5
|
authors = [
|
|
6
6
|
{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
|
|
@@ -14,16 +14,17 @@ dynamic = [ "classifiers" ]
|
|
|
14
14
|
dependencies = [
|
|
15
15
|
"ete3 (>=3.1.3)",
|
|
16
16
|
"beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)",
|
|
17
|
-
"publicsuffixlist (>=1.0.2.
|
|
17
|
+
"publicsuffixlist (>=1.0.2.20250802)",
|
|
18
18
|
"filetype (>=1.2.0)",
|
|
19
19
|
# poetry up fails with the version of numpy forced for python < 3.10.
|
|
20
20
|
# The work around is to comment it, run poetry up, uncomment it. and run poetry update.
|
|
21
21
|
"numpy (<2.1) ; python_version < \"3.10\"",
|
|
22
22
|
"numpy (<2.3) ; python_version < \"3.11\"",
|
|
23
|
-
"numpy (>=2.3.
|
|
23
|
+
"numpy (>=2.3.2) ; python_version >= \"3.11\"",
|
|
24
24
|
"w3lib (>=2.3.1)",
|
|
25
25
|
"tinycss2 (>=1.4.0)",
|
|
26
26
|
"legacy-cgi (>=2.6.3) ; python_version >= \"3.13,<4.0\"",
|
|
27
|
+
"multipart (>=1.3.0,<2.0.0)",
|
|
27
28
|
]
|
|
28
29
|
|
|
29
30
|
[project.urls]
|
|
@@ -45,9 +46,9 @@ classifiers = [
|
|
|
45
46
|
docs = ["Sphinx (>=8.2.3) ; python_version >= \"3.11\"", "six (>=1.17.0)"]
|
|
46
47
|
|
|
47
48
|
[tool.poetry.group.dev.dependencies]
|
|
48
|
-
mypy = "^1.
|
|
49
|
+
mypy = "^1.17.1"
|
|
49
50
|
pytest-cov = "^6.2.1"
|
|
50
|
-
coverage = "^7.
|
|
51
|
+
coverage = "^7.10.2"
|
|
51
52
|
types-beautifulsoup4 = "^4.12.0.20250516"
|
|
52
53
|
|
|
53
54
|
[build-system]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|