har2tree 1.34.2__tar.gz → 1.35.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: har2tree
3
- Version: 1.34.2
3
+ Version: 1.35.0
4
4
  Summary: HTTP Archive (HAR) to ETE Toolkit generator
5
5
  License-Expression: BSD-3-Clause
6
6
  License-File: LICENSE
@@ -28,7 +28,7 @@ Requires-Dist: json-stream (>=2.3.3,<3.0.0)
28
28
  Requires-Dist: legacy-cgi (>=2.6.4) ; python_version >= "3.13" and python_version < "4.0"
29
29
  Requires-Dist: multipart (>=1.3.0,<2.0.0)
30
30
  Requires-Dist: numpy (>=2.2,<2.3) ; python_version < "3.11"
31
- Requires-Dist: numpy (>=2.3.4) ; python_version >= "3.11" and python_version < "4.0"
31
+ Requires-Dist: numpy (>=2.3.5) ; python_version >= "3.11" and python_version < "4.0"
32
32
  Requires-Dist: publicsuffixlist (>=1.0.2.20251115)
33
33
  Requires-Dist: requests-toolbelt (>=1.0.0,<2.0.0)
34
34
  Requires-Dist: six (>=1.17.0) ; extra == "docs"
@@ -426,12 +426,20 @@ class Har2Tree:
426
426
 
427
427
  self.url_tree = self._nodes_list.pop(0)
428
428
 
429
+ def _url_to_local_only_content(self, url: str | None) -> bool:
430
+ return (url is None
431
+ or url in ['about:blank', 'about:srcdoc', ''] # not loading anything remotely
432
+ or url.startswith('data') # base64 encoded content
433
+ or url.startswith('chrome-error') # not in the HAR/tree
434
+ or url.startswith('blob') # blobs aren't URLs
435
+ )
436
+
429
437
  def _load_iframes(self, current: URLNode, frames: FramesResponse) -> None:
430
438
  if not frames.get('content') or frames['content'] is None:
431
439
  # NOTE: debug stuff, no content makes it pretty useless.
432
440
  if frames.get('url'):
433
- if frames['url'] == "about:blank":
434
- self.logger.info('Got a frame to about:blank with no content.')
441
+ if self._url_to_local_only_content(frames['url']):
442
+ self.logger.info('Got an empty frame to local content.')
435
443
  else:
436
444
  u = unquote_plus(frames['url'])
437
445
  self.logger.warning(f'Got a url ({u}) for the frame, but no content')
@@ -439,12 +447,7 @@ class Har2Tree:
439
447
  self.logger.info('Got a frame, but no content.')
440
448
  return
441
449
 
442
- if (frames.get('url')
443
- and not (frames['url'] in ['about:blank'] # not loading anything, same as empty
444
- or frames['url'].startswith('data') # base64 encoded content
445
- or frames['url'].startswith('chrome-error') # not in the HAR/tree
446
- or frames['url'].startswith('blob') # blobs aren't URLs
447
- )):
450
+ if frames.get('url') and not self._url_to_local_only_content(frames['url']):
448
451
  u = unquote_plus(frames['url'])
449
452
  possible_child_name = {u, u.split('#', 1)[0]}
450
453
  # this url should be in a node directly attached to that one
@@ -821,11 +824,7 @@ class Har2Tree:
821
824
 
822
825
  def all_real_urls_in_children(self, frame: FramesResponse) -> Iterator[str]:
823
826
  # from a frame, search all the real urls in each of the children, stop at the first one
824
- if (frame.get('url') and frame['url'] is not None
825
- and not (frame['url'] in ['about:blank', 'about:srcdoc'] # not loading anything, same as empty
826
- or frame['url'].startswith('data') # base64 encoded content
827
- or frame['url'].startswith('chrome-error') # not in the HAR/tree
828
- or frame['url'].startswith('blob'))): # blobs aren't URLs
827
+ if (frame.get('url') and frame['url'] is not None and not self._url_to_local_only_content(frame['url'])):
829
828
  yield frame['url']
830
829
  else:
831
830
  # got no real URL, try the children
@@ -72,7 +72,7 @@ def make_hhhash(entry: dict[str, Any]) -> str:
72
72
  # We need the HTTP version used for the query:
73
73
  # * The HTTP Header names in HTTP 1.1 can have uppercase characters
74
74
  # * The HTTP Header names in HTTP 2 *must* be lowercase: https://www.rfc-editor.org/rfc/rfc7540#section-8.1.2
75
- if entry['httpVersion'].lower() in ["http/1.1", "http/1.0"]:
75
+ if entry['httpVersion'].lower() in ["http/1.1", "http/1.0", "1.1"]:
76
76
  return f'hhh:1:{sha256}'
77
77
  if entry['httpVersion'].lower() == "http/2.0":
78
78
  return f'hhh:2:{sha256}'
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "har2tree"
3
- version = "1.34.2"
3
+ version = "1.35.0"
4
4
  description = "HTTP Archive (HAR) to ETE Toolkit generator"
5
5
  authors = [
6
6
  {name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
@@ -19,7 +19,7 @@ dependencies = [
19
19
  # poetry up fails with the version of numpy forced for python < 3.11.
20
20
  # The work around is to comment it, run poetry up, uncomment it. and run poetry update.
21
21
  "numpy (>=2.2,<2.3) ; python_version < '3.11'",
22
- "numpy (>=2.3.4) ; python_version >= \"3.11\" and python_version < \"4.0\"",
22
+ "numpy (>=2.3.5) ; python_version >= \"3.11\" and python_version < \"4.0\"",
23
23
  "w3lib (>=2.3.1)",
24
24
  "tinycss2 (>=1.4.0)",
25
25
  "legacy-cgi (>=2.6.4) ; python_version >= \"3.13\" and python_version < \"4.0\"",
File without changes
File without changes
File without changes
File without changes
File without changes