har2tree 1.35.1__py3-none-any.whl → 1.36.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
har2tree/har2tree.py CHANGED
@@ -750,23 +750,47 @@ class Har2Tree:
750
750
  self.logger.warning(f'Wrong format for the frames ({type(self.har.frames)}), very old capture.')
751
751
  return self.url_tree
752
752
 
753
+ def _guess_best_node_for_partial_referer(self, node: URLNode, potential_parents: list[URLNode]) -> URLNode:
754
+ # we have more than one node with the hostname of the referer *and* content.
755
+ # 2025-12-17:
756
+ # 1. find the deepest HTML node in the list
757
+ for pp in reversed(potential_parents):
758
+ if 'html' in pp.mimetype:
759
+ return pp
760
+ else:
761
+ # 2. if there are no HTML node anywhere in the list, attach to the deepest node
762
+ return potential_parents[-1]
763
+
753
764
  @trace_make_subtree_fallback
754
765
  def _make_subtree_fallback(self, node: URLNode, dev_debug: bool=False) -> None:
755
766
  if hasattr(node, 'referer'):
756
767
  # 2022-04-28: the node has a referer, but for some reason, it could't be attached to the tree
757
768
  # Probable reason: the referer is a part of the URL (hostname)
758
- # FIXME: this is a very dirty fix, but I'm not sure we can do it any better
759
769
  if (referer_hostname := urlparse(node.referer).hostname):
760
- # the referer has a hostname
761
770
  if (nodes_with_hostname := self.url_tree.search_nodes(hostname=referer_hostname)):
762
- # the hostname has at least a node in the tree
763
- for node_with_hostname in nodes_with_hostname:
764
- if not node_with_hostname.empty_response:
765
- # we got an non-empty response, breaking
766
- break
767
- # attach to the the first response with something, or to whatever we get.
768
- self._make_subtree(node_with_hostname, [node], fallback=True)
769
- return
771
+ attach_to: URLNode
772
+ # 2025-12-17: we have at least one node with that hostname.
773
+ if len(nodes_with_hostname) == 1:
774
+ # That's the only one, use it
775
+ attach_to = nodes_with_hostname[0]
776
+ else:
777
+ # check if there are empty nodes
778
+ if (nodes_with_hostname_and_response := [n for n in nodes_with_hostname if not n.empty_response]):
779
+ if len(nodes_with_hostname_and_response) == 1:
780
+ attach_to = nodes_with_hostname_and_response[0]
781
+ else:
782
+ # multiple non-empty nodes with that hostname, this is the more difficult one
783
+ attach_to = self._guess_best_node_for_partial_referer(node, nodes_with_hostname_and_response)
784
+ else:
785
+ # more than one node with that hostname, but they're all empty, attach to the first one
786
+ attach_to = nodes_with_hostname[0]
787
+ return self._make_subtree(attach_to, [node], fallback=True)
788
+ else:
789
+ # no node with that hostname at all, this should not happen
790
+ self.logger.warning(f'Unable to find any node with the hostname {referer_hostname}, despites it being set as referer.')
791
+ else:
792
+ # the referer has no hostname and it is fascinating
793
+ self.logger.warning(f'Unable to get hostname out of referer: {node.referer}')
770
794
 
771
795
  # Sometimes, the har has a list of pages, generally when we have HTTP redirects.
772
796
  # IF we have more than one page in the list
@@ -929,7 +953,6 @@ class Har2Tree:
929
953
  if dev_debug:
930
954
  self.logger.warning(f'Found via initiator from {unode.name} to {matching_urls}.')
931
955
  self._make_subtree(unode, matching_urls)
932
-
933
956
  # The node can have a redirect, but also trigger ressources refering to themselves, we need to trigger this code on each node.
934
957
  if self.all_initiator_url.get(unode.name):
935
958
  # The URL (unode.name) is in the list of known urls initiating calls
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: har2tree
3
- Version: 1.35.1
3
+ Version: 1.36.1
4
4
  Summary: HTTP Archive (HAR) to ETE Toolkit generator
5
5
  License-Expression: BSD-3-Clause
6
6
  License-File: LICENSE
@@ -20,8 +20,8 @@ Classifier: Programming Language :: Python :: 3.14
20
20
  Classifier: Topic :: Internet
21
21
  Classifier: Topic :: Security
22
22
  Provides-Extra: docs
23
- Requires-Dist: Sphinx (>=8.2.3) ; (python_version >= "3.11") and (extra == "docs")
24
- Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.14.2)
23
+ Requires-Dist: Sphinx (>=9.0.4) ; (python_version >= "3.11") and (extra == "docs")
24
+ Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.14.3)
25
25
  Requires-Dist: ete3 (>=3.1.3)
26
26
  Requires-Dist: filetype (>=1.2.0)
27
27
  Requires-Dist: json-stream (>=2.3.3,<3.0.0)
@@ -29,10 +29,10 @@ Requires-Dist: legacy-cgi (>=2.6.4) ; python_version >= "3.13" and python_versio
29
29
  Requires-Dist: multipart (>=1.3.0,<2.0.0)
30
30
  Requires-Dist: numpy (>=2.2,<2.3) ; python_version < "3.11"
31
31
  Requires-Dist: numpy (>=2.3.5) ; python_version >= "3.11" and python_version < "4.0"
32
- Requires-Dist: publicsuffixlist (>=1.0.2.20251119)
32
+ Requires-Dist: publicsuffixlist (>=1.0.2.20251217)
33
33
  Requires-Dist: requests-toolbelt (>=1.0.0,<2.0.0)
34
34
  Requires-Dist: six (>=1.17.0) ; extra == "docs"
35
- Requires-Dist: tinycss2 (>=1.5.0)
35
+ Requires-Dist: tinycss2 (>=1.5.1)
36
36
  Requires-Dist: w3lib (>=2.3.1)
37
37
  Project-URL: Documentation, https://har2tree.readthedocs.io/en/latest/
38
38
  Project-URL: Repository, https://github.com/Lookyloo/har2tree
@@ -1,10 +1,10 @@
1
1
  har2tree/__init__.py,sha256=Na3mxHkUBq3rzYbxiLNJF37DxH5mcghSorjzXw5Teug,422
2
- har2tree/har2tree.py,sha256=24Puk4dlDXWOVIAPV7SIXNoP-oP-_7ERH2mZPxXiwn8,52762
2
+ har2tree/har2tree.py,sha256=4SYegFw2ycH6kJTkUTTrJb2f6D4ekITEkyZ-4it1PsA,54280
3
3
  har2tree/helper.py,sha256=ktX5Fq-K_t4r0VVAXIH4uy7xc-qCjtSaiUvkX_PYxhw,20737
4
4
  har2tree/nodes.py,sha256=QWKqEUnuW7J6pASVvzwWAQNqL-_KDzSs2ld6uJl3qbw,37710
5
5
  har2tree/parser.py,sha256=4yej1OcVYAIiLfzYZsO9WCw3WyM_ykDTuvpW7UO1ROE,3645
6
6
  har2tree/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- har2tree-1.35.1.dist-info/METADATA,sha256=SGudpw_P8Wrs_x9uCJmL__fJciNsWVeDlx8rFaVurVM,2239
8
- har2tree-1.35.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
9
- har2tree-1.35.1.dist-info/licenses/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
10
- har2tree-1.35.1.dist-info/RECORD,,
7
+ har2tree-1.36.1.dist-info/METADATA,sha256=QuSZMYKNtpaVwtP8AVjFk_xFS1g0_kmPJ5V78nIQV_8,2239
8
+ har2tree-1.36.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
9
+ har2tree-1.36.1.dist-info/licenses/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
10
+ har2tree-1.36.1.dist-info/RECORD,,