har2tree 1.35.1__py3-none-any.whl → 1.36.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- har2tree/har2tree.py +34 -11
- {har2tree-1.35.1.dist-info → har2tree-1.36.1.dist-info}/METADATA +5 -5
- {har2tree-1.35.1.dist-info → har2tree-1.36.1.dist-info}/RECORD +5 -5
- {har2tree-1.35.1.dist-info → har2tree-1.36.1.dist-info}/WHEEL +0 -0
- {har2tree-1.35.1.dist-info → har2tree-1.36.1.dist-info}/licenses/LICENSE +0 -0
har2tree/har2tree.py
CHANGED
|
@@ -750,23 +750,47 @@ class Har2Tree:
|
|
|
750
750
|
self.logger.warning(f'Wrong format for the frames ({type(self.har.frames)}), very old capture.')
|
|
751
751
|
return self.url_tree
|
|
752
752
|
|
|
753
|
+
def _guess_best_node_for_partial_referer(self, node: URLNode, potential_parents: list[URLNode]) -> URLNode:
|
|
754
|
+
# we have more than one node with the hostname of the referer *and* content.
|
|
755
|
+
# 2025-12-17:
|
|
756
|
+
# 1. find the deepest HTML node in the list
|
|
757
|
+
for pp in reversed(potential_parents):
|
|
758
|
+
if 'html' in pp.mimetype:
|
|
759
|
+
return pp
|
|
760
|
+
else:
|
|
761
|
+
# 2. if there are no HTML node anywhere in the list, attach to the deepest node
|
|
762
|
+
return potential_parents[-1]
|
|
763
|
+
|
|
753
764
|
@trace_make_subtree_fallback
|
|
754
765
|
def _make_subtree_fallback(self, node: URLNode, dev_debug: bool=False) -> None:
|
|
755
766
|
if hasattr(node, 'referer'):
|
|
756
767
|
# 2022-04-28: the node has a referer, but for some reason, it could't be attached to the tree
|
|
757
768
|
# Probable reason: the referer is a part of the URL (hostname)
|
|
758
|
-
# FIXME: this is a very dirty fix, but I'm not sure we can do it any better
|
|
759
769
|
if (referer_hostname := urlparse(node.referer).hostname):
|
|
760
|
-
# the referer has a hostname
|
|
761
770
|
if (nodes_with_hostname := self.url_tree.search_nodes(hostname=referer_hostname)):
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
771
|
+
attach_to: URLNode
|
|
772
|
+
# 2025-12-17: we have at least one node with that hostname.
|
|
773
|
+
if len(nodes_with_hostname) == 1:
|
|
774
|
+
# That's the only one, use it
|
|
775
|
+
attach_to = nodes_with_hostname[0]
|
|
776
|
+
else:
|
|
777
|
+
# check if there are empty nodes
|
|
778
|
+
if (nodes_with_hostname_and_response := [n for n in nodes_with_hostname if not n.empty_response]):
|
|
779
|
+
if len(nodes_with_hostname_and_response) == 1:
|
|
780
|
+
attach_to = nodes_with_hostname_and_response[0]
|
|
781
|
+
else:
|
|
782
|
+
# multiple non-empty nodes with that hostname, this is the more difficult one
|
|
783
|
+
attach_to = self._guess_best_node_for_partial_referer(node, nodes_with_hostname_and_response)
|
|
784
|
+
else:
|
|
785
|
+
# more than one node with that hostname, but they're all empty, attach to the first one
|
|
786
|
+
attach_to = nodes_with_hostname[0]
|
|
787
|
+
return self._make_subtree(attach_to, [node], fallback=True)
|
|
788
|
+
else:
|
|
789
|
+
# no node with that hostname at all, this should not happen
|
|
790
|
+
self.logger.warning(f'Unable to find any node with the hostname {referer_hostname}, despites it being set as referer.')
|
|
791
|
+
else:
|
|
792
|
+
# the referer has no hostname and it is fascinating
|
|
793
|
+
self.logger.warning(f'Unable to get hostname out of referer: {node.referer}')
|
|
770
794
|
|
|
771
795
|
# Sometimes, the har has a list of pages, generally when we have HTTP redirects.
|
|
772
796
|
# IF we have more than one page in the list
|
|
@@ -929,7 +953,6 @@ class Har2Tree:
|
|
|
929
953
|
if dev_debug:
|
|
930
954
|
self.logger.warning(f'Found via initiator from {unode.name} to {matching_urls}.')
|
|
931
955
|
self._make_subtree(unode, matching_urls)
|
|
932
|
-
|
|
933
956
|
# The node can have a redirect, but also trigger ressources refering to themselves, we need to trigger this code on each node.
|
|
934
957
|
if self.all_initiator_url.get(unode.name):
|
|
935
958
|
# The URL (unode.name) is in the list of known urls initiating calls
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: har2tree
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.36.1
|
|
4
4
|
Summary: HTTP Archive (HAR) to ETE Toolkit generator
|
|
5
5
|
License-Expression: BSD-3-Clause
|
|
6
6
|
License-File: LICENSE
|
|
@@ -20,8 +20,8 @@ Classifier: Programming Language :: Python :: 3.14
|
|
|
20
20
|
Classifier: Topic :: Internet
|
|
21
21
|
Classifier: Topic :: Security
|
|
22
22
|
Provides-Extra: docs
|
|
23
|
-
Requires-Dist: Sphinx (>=
|
|
24
|
-
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.14.
|
|
23
|
+
Requires-Dist: Sphinx (>=9.0.4) ; (python_version >= "3.11") and (extra == "docs")
|
|
24
|
+
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.14.3)
|
|
25
25
|
Requires-Dist: ete3 (>=3.1.3)
|
|
26
26
|
Requires-Dist: filetype (>=1.2.0)
|
|
27
27
|
Requires-Dist: json-stream (>=2.3.3,<3.0.0)
|
|
@@ -29,10 +29,10 @@ Requires-Dist: legacy-cgi (>=2.6.4) ; python_version >= "3.13" and python_versio
|
|
|
29
29
|
Requires-Dist: multipart (>=1.3.0,<2.0.0)
|
|
30
30
|
Requires-Dist: numpy (>=2.2,<2.3) ; python_version < "3.11"
|
|
31
31
|
Requires-Dist: numpy (>=2.3.5) ; python_version >= "3.11" and python_version < "4.0"
|
|
32
|
-
Requires-Dist: publicsuffixlist (>=1.0.2.
|
|
32
|
+
Requires-Dist: publicsuffixlist (>=1.0.2.20251217)
|
|
33
33
|
Requires-Dist: requests-toolbelt (>=1.0.0,<2.0.0)
|
|
34
34
|
Requires-Dist: six (>=1.17.0) ; extra == "docs"
|
|
35
|
-
Requires-Dist: tinycss2 (>=1.5.
|
|
35
|
+
Requires-Dist: tinycss2 (>=1.5.1)
|
|
36
36
|
Requires-Dist: w3lib (>=2.3.1)
|
|
37
37
|
Project-URL: Documentation, https://har2tree.readthedocs.io/en/latest/
|
|
38
38
|
Project-URL: Repository, https://github.com/Lookyloo/har2tree
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
har2tree/__init__.py,sha256=Na3mxHkUBq3rzYbxiLNJF37DxH5mcghSorjzXw5Teug,422
|
|
2
|
-
har2tree/har2tree.py,sha256=
|
|
2
|
+
har2tree/har2tree.py,sha256=4SYegFw2ycH6kJTkUTTrJb2f6D4ekITEkyZ-4it1PsA,54280
|
|
3
3
|
har2tree/helper.py,sha256=ktX5Fq-K_t4r0VVAXIH4uy7xc-qCjtSaiUvkX_PYxhw,20737
|
|
4
4
|
har2tree/nodes.py,sha256=QWKqEUnuW7J6pASVvzwWAQNqL-_KDzSs2ld6uJl3qbw,37710
|
|
5
5
|
har2tree/parser.py,sha256=4yej1OcVYAIiLfzYZsO9WCw3WyM_ykDTuvpW7UO1ROE,3645
|
|
6
6
|
har2tree/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
har2tree-1.
|
|
8
|
-
har2tree-1.
|
|
9
|
-
har2tree-1.
|
|
10
|
-
har2tree-1.
|
|
7
|
+
har2tree-1.36.1.dist-info/METADATA,sha256=QuSZMYKNtpaVwtP8AVjFk_xFS1g0_kmPJ5V78nIQV_8,2239
|
|
8
|
+
har2tree-1.36.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
9
|
+
har2tree-1.36.1.dist-info/licenses/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
|
|
10
|
+
har2tree-1.36.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|