har2tree 1.31.5__tar.gz → 1.31.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: har2tree
3
- Version: 1.31.5
3
+ Version: 1.31.6
4
4
  Summary: HTTP Archive (HAR) to ETE Toolkit generator
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -30,7 +30,7 @@ Requires-Dist: multipart (>=1.3.0,<2.0.0)
30
30
  Requires-Dist: numpy (<2.1) ; python_version < "3.10"
31
31
  Requires-Dist: numpy (<2.3) ; python_version < "3.11"
32
32
  Requires-Dist: numpy (>=2.3.2) ; python_version >= "3.11"
33
- Requires-Dist: publicsuffixlist (>=1.0.2.20250809)
33
+ Requires-Dist: publicsuffixlist (>=1.0.2.20250812)
34
34
  Requires-Dist: six (>=1.17.0) ; extra == "docs"
35
35
  Requires-Dist: tinycss2 (>=1.4.0)
36
36
  Requires-Dist: w3lib (>=2.3.1)
@@ -217,10 +217,19 @@ class URLNode(HarTreeNode):
217
217
  decoded_posted_data: list[Any] | str | bytes | int | float | bool | dict[str, str] | dict[str, list[str]] | None = None
218
218
  if 'postData' not in self.request or 'text' not in self.request['postData']:
219
219
  self.logger.debug('POST request with no content.')
220
+ self.add_feature('posted_data_info', "No content.")
220
221
  elif not self.request['postData']['text']:
221
222
  # If the POST content is empty
222
223
  self.logger.debug('Empty POST request.')
223
224
  decoded_posted_data = ''
225
+ self.add_feature('posted_data_info', "Empty request.")
226
+ elif self.request['postData']['text'].startswith('\x1f\uFFFD\x08'):
227
+ # b'\x1f\xef\xbf\xbd\x08', decoded to UTF-8
228
+ # => the replacement character
229
+ # https://www.cogsci.ed.ac.uk/~richard/utf-8.cgi?input=%EF%BF%BD&mode=char
230
+ self.logger.debug('Got a garbled gzipped POST blob.')
231
+ self.add_feature('posted_data_info', "It was a POSTed gzipped blob, but the data has been garbled.")
232
+ decoded_posted_data = self.request['postData']['text']
224
233
  elif self.request['postData'].get('params'):
225
234
  # NOTE 2025-08-08
226
235
  # if the posted data mimetype is "application/x-www-form-urlencoded"
@@ -231,10 +240,11 @@ class URLNode(HarTreeNode):
231
240
  # TODO: some processing on the data part (it's often a json blob)
232
241
  self.logger.debug('Got a params POST.')
233
242
  decoded_posted_data = {entry['name']: entry['value'] for entry in self.request['postData']['params']}
243
+ self.add_feature('posted_data_info', "POST request as URL params.")
234
244
  else:
235
- # NOTE 2023-08-22: Blind attempt to base64 decode the data
236
245
  self.logger.debug('Got a normal POST')
237
246
  try:
247
+ # NOTE 2023-08-22: Blind attempt to base64 decode the data
238
248
  decoded_posted_data = self._dirty_safe_b64decode(self.request['postData']['text'])
239
249
  except binascii.Error:
240
250
  decoded_posted_data = self.request['postData']['text']
@@ -254,8 +264,10 @@ class URLNode(HarTreeNode):
254
264
  decoded_posted_data = unquote_plus(decoded_posted_data)
255
265
  if isinstance(decoded_posted_data, str):
256
266
  decoded_posted_data = parse_qs(decoded_posted_data)
267
+ self.add_feature('posted_data_info', "Successfully decoded POST request.")
257
268
  except Exception as e:
258
269
  self.logger.warning(f'Unable to unquote or parse form data "{decoded_posted_data!r}": {e}')
270
+ self.add_feature('posted_data_info', "Unable to decode POST request.")
259
271
  elif (mimetype_lower.startswith('application/json')
260
272
  or mimetype_lower.startswith('application/csp-report')
261
273
  or mimetype_lower.startswith('application/x-amz-json-1.1')
@@ -267,7 +279,9 @@ class URLNode(HarTreeNode):
267
279
  try:
268
280
  # NOTE 2023-08-22: loads here may give us a int, float or a bool.
269
281
  decoded_posted_data = json.loads(decoded_posted_data)
282
+ self.add_feature('posted_data_info', "Successfully decoded POST request.")
270
283
  except Exception:
284
+ self.add_feature('posted_data_info', "Unable to decode POST request.")
271
285
  if isinstance(decoded_posted_data, (str, bytes)):
272
286
  self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
273
287
  else:
@@ -283,71 +297,91 @@ class URLNode(HarTreeNode):
283
297
  raise ValueError(f'Invalid type: {type(decoded_posted_data)}')
284
298
  streamed_data = json_stream.load(to_stream)
285
299
  decoded_posted_data = json_stream.to_standard_types(streamed_data)
300
+ self.add_feature('posted_data_info', "Successfully decoded POST request.")
286
301
  except Exception:
287
302
  if isinstance(decoded_posted_data, (str, bytes)):
288
303
  self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
289
304
  else:
290
305
  self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data}")
306
+ self.add_feature('posted_data_info', "Unable to decode POST request.")
291
307
  elif mimetype_lower.startswith('multipart/form-data'):
292
308
  # FIXME multipart content (similar to email). Not totally sure what do do with it tight now.
293
309
  self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
294
- pass
310
+ self.add_feature('posted_data_info', f"Decoding {mimetype_lower} is not supported yet.")
295
311
  elif mimetype_lower.startswith('application/x-protobuf'):
296
312
  # FIXME If possible, decode?
297
313
  self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
298
- pass
314
+ self.add_feature('posted_data_info', f"Decoding {mimetype_lower} is not supported yet.")
299
315
  elif mimetype_lower.startswith('text') and isinstance(decoded_posted_data, (str, bytes)):
300
316
  try:
301
317
  # NOTE 2023-08-22: Quite a few text entries are in fact json, give it a shot.
302
318
  # loads here may give us a int, float or a bool.
303
319
  decoded_posted_data = json.loads(decoded_posted_data)
320
+ self.add_feature('posted_data_info', "Decoded JSON out of POST request.")
304
321
  except Exception:
305
322
  # keep it as it is otherwise.
306
323
  pass
307
324
  elif mimetype_lower.endswith('javascript'):
308
325
  # keep it as it is
309
326
  self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
310
- pass
327
+ self.add_feature('posted_data_info', f"Pretty rendering of {mimetype_lower} is not supported yet.")
311
328
  elif mimetype_lower == '?':
312
329
  # Just skip it, no need to go in the warnings
313
330
  self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
314
- pass
331
+ self.add_feature('posted_data_info', f"Weird MimeType ({mimetype_lower}) is not supported yet.")
315
332
  elif mimetype_lower == 'application/binary':
316
- # generally a broken gzipped blob
317
- self.logger.debug(f'Got a POST {mimetype_lower}, most probably a broken gziped blob: {decoded_posted_data!r}')
333
+ self.logger.warning(f'Got a POST {mimetype_lower}, not a broken gziped blob: {decoded_posted_data!r}')
334
+ self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
318
335
  elif mimetype_lower in ['application/octet-stream']:
319
336
  # Should flag it, maybe?
320
337
  self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
321
- pass
338
+ self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
322
339
  elif mimetype_lower in ['application/grpc-web+proto']:
323
340
  # Can be decoded?
324
341
  self.logger.warning(f'Got a POST {mimetype_lower} - can be decoded: {decoded_posted_data!r}')
342
+ self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
325
343
  elif mimetype_lower in ['application/unknown']:
326
344
  # Weird but already seen stuff
327
345
  self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
328
- pass
346
+ self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
329
347
  else:
330
- self.logger.warning(f'Unexpected mime type: {mimetype_lower}')
348
+ self.logger.warning(f'Unexpected mime type: {mimetype_lower} - {decoded_posted_data!r}')
349
+ self.add_feature('posted_data_info', f"Unexpected MimeType ({mimetype_lower}) is not supported yet.")
331
350
  else:
332
351
  self.logger.warning(f'Missing mimetype in POST: {self.request["postData"]}')
352
+ self.add_feature('posted_data_info', "Missing MimeType, not sure what to do.")
333
353
 
334
354
  # NOTE 2023-08-22: Blind attempt to process the data as json
335
- if isinstance(decoded_posted_data, (str, bytes)):
355
+ if decoded_posted_data and isinstance(decoded_posted_data, (str, bytes)):
336
356
  try:
337
357
  decoded_posted_data = json.loads(decoded_posted_data)
338
358
  except Exception:
339
359
  pass
340
360
 
341
- if isinstance(decoded_posted_data, bytes):
361
+ if decoded_posted_data and isinstance(decoded_posted_data, bytes):
342
362
  # NOTE 2023-08-22: Blind attempt to decode the bytes
343
363
  # Try to decode it as utf-8
344
364
  try:
345
365
  decoded_posted_data = decoded_posted_data.decode('utf-8')
346
366
  except Exception:
347
367
  pass
368
+
348
369
  self.add_feature('posted_data', decoded_posted_data)
349
370
  if 'postData' in self.request and self.request['postData'].get('mimeType'):
350
371
  self.add_feature('posted_data_mimetype', self.request['postData']['mimeType'])
372
+ # Get size, post decode.
373
+ if not decoded_posted_data:
374
+ # empty or None, set to 0
375
+ self.add_feature('posted_data_size', 0)
376
+ elif isinstance(decoded_posted_data, (list, dict)):
377
+ # set size to the json dump
378
+ self.add_feature('posted_data_size', len(json.dumps(decoded_posted_data)))
379
+ elif isinstance(decoded_posted_data, (str, bytes)):
380
+ # length
381
+ self.add_feature('posted_data_size', len(decoded_posted_data))
382
+ else:
383
+ # Stringify and len
384
+ self.add_feature('posted_data_size', len(str(decoded_posted_data)))
351
385
 
352
386
  self.add_feature('response', har_entry['response'])
353
387
  try:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "har2tree"
3
- version = "1.31.5"
3
+ version = "1.31.6"
4
4
  description = "HTTP Archive (HAR) to ETE Toolkit generator"
5
5
  authors = [
6
6
  {name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
@@ -14,7 +14,7 @@ dynamic = [ "classifiers" ]
14
14
  dependencies = [
15
15
  "ete3 (>=3.1.3)",
16
16
  "beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)",
17
- "publicsuffixlist (>=1.0.2.20250809)",
17
+ "publicsuffixlist (>=1.0.2.20250812)",
18
18
  "filetype (>=1.2.0)",
19
19
  # poetry up fails with the version of numpy forced for python < 3.10.
20
20
  # The work around is to comment it, run poetry up, uncomment it. and run poetry update.
File without changes
File without changes
File without changes
File without changes
File without changes