har2tree 1.27.8__tar.gz → 1.27.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {har2tree-1.27.8 → har2tree-1.27.10}/PKG-INFO +2 -2
- {har2tree-1.27.8 → har2tree-1.27.10}/har2tree/har2tree.py +21 -10
- {har2tree-1.27.8 → har2tree-1.27.10}/pyproject.toml +4 -4
- {har2tree-1.27.8 → har2tree-1.27.10}/LICENSE +0 -0
- {har2tree-1.27.8 → har2tree-1.27.10}/README.md +0 -0
- {har2tree-1.27.8 → har2tree-1.27.10}/har2tree/__init__.py +0 -0
- {har2tree-1.27.8 → har2tree-1.27.10}/har2tree/helper.py +0 -0
- {har2tree-1.27.8 → har2tree-1.27.10}/har2tree/nodes.py +0 -0
- {har2tree-1.27.8 → har2tree-1.27.10}/har2tree/parser.py +0 -0
- {har2tree-1.27.8 → har2tree-1.27.10}/har2tree/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: har2tree
|
|
3
|
-
Version: 1.27.
|
|
3
|
+
Version: 1.27.10
|
|
4
4
|
Summary: HTTP Archive (HAR) to ETE Toolkit generator
|
|
5
5
|
License: BSD-3-Clause
|
|
6
6
|
Author: Raphaël Vinot
|
|
@@ -21,7 +21,7 @@ Classifier: Topic :: Internet
|
|
|
21
21
|
Classifier: Topic :: Security
|
|
22
22
|
Provides-Extra: docs
|
|
23
23
|
Requires-Dist: Sphinx (>=8.1.3) ; (python_version >= "3.10") and (extra == "docs")
|
|
24
|
-
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.
|
|
24
|
+
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.3)
|
|
25
25
|
Requires-Dist: ete3 (>=3.1.3)
|
|
26
26
|
Requires-Dist: filetype (>=1.2.0)
|
|
27
27
|
Requires-Dist: legacy-cgi (>=2.6.2) ; python_version >= "3.13,<4.0"
|
|
@@ -159,11 +159,12 @@ class HarFile():
|
|
|
159
159
|
# Used to find the root entry of a page in the capture
|
|
160
160
|
# NOTE 2020-05-19: Turns out multiple pages can have the exact same timestamp...
|
|
161
161
|
self.pages_start_times: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
162
|
-
|
|
163
|
-
self.
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
self.
|
|
162
|
+
if 'pages' in self.har['log']:
|
|
163
|
+
for page in self.har['log']['pages']:
|
|
164
|
+
self.pages_start_times[page['startedDateTime']].append(page)
|
|
165
|
+
# The first entry has a different start time as the one from the list, add that
|
|
166
|
+
if self.entries:
|
|
167
|
+
self.pages_start_times[self.initial_start_time].append(self.har['log']['pages'][0])
|
|
167
168
|
|
|
168
169
|
# Set to false if initial_redirects fails to find the chain.
|
|
169
170
|
self.need_tree_redirects = False
|
|
@@ -200,7 +201,7 @@ class HarFile():
|
|
|
200
201
|
@property
|
|
201
202
|
def initial_title(self) -> str:
|
|
202
203
|
"""Title of the first page in the capture"""
|
|
203
|
-
if self.har['log']['pages'][0]['title']:
|
|
204
|
+
if 'pages' in self.har['log'] and self.har['log']['pages'][0]['title']:
|
|
204
205
|
return self.har['log']['pages'][0]['title']
|
|
205
206
|
else:
|
|
206
207
|
return '!! No title found !!'
|
|
@@ -670,10 +671,12 @@ class Har2Tree:
|
|
|
670
671
|
|
|
671
672
|
# Sometimes, the har has a list of pages, generally when we have HTTP redirects.
|
|
672
673
|
# IF we have more than one page in the list
|
|
673
|
-
# AND the orphan node's pageref points to an other page than the first one
|
|
674
|
+
# AND the orphan node's pageref points to an other page than the first one
|
|
674
675
|
# AND we already have a node in the tree with this pageref
|
|
675
676
|
# => attach to that node.
|
|
676
|
-
if
|
|
677
|
+
if ('pages' in self.har.har['log'] and len(self.har.har['log']['pages']) > 1
|
|
678
|
+
and node.pageref != self.har.har['log']['pages'][0]
|
|
679
|
+
and self.pages_root[node.pageref] != node.uuid):
|
|
677
680
|
# In that case, we check if there is already a page with the pageref of the orphan node,
|
|
678
681
|
# and attach the node to that. NOTE: we can only do that if there is already a node with this pageref in the tree.
|
|
679
682
|
# This node is not a page root, we can attach it \o/
|
|
@@ -689,7 +692,7 @@ class Har2Tree:
|
|
|
689
692
|
if dev_debug:
|
|
690
693
|
self.logger.warning(f'Failed to attach URLNode in the normal process, attaching node to final redirect: {self.har.final_redirect}.')
|
|
691
694
|
self._make_subtree(self.url_tree.search_nodes(name=self.har.final_redirect)[0], [node])
|
|
692
|
-
|
|
695
|
+
elif 'pages' in self.har.har['log']:
|
|
693
696
|
# No luck, the node is root for this pageref, let's attach it to the prior page in the list, or the very first node (tree root)
|
|
694
697
|
page_before = self.har.har['log']['pages'][0]
|
|
695
698
|
for page in self.har.har['log']['pages'][1:]:
|
|
@@ -711,6 +714,9 @@ class Har2Tree:
|
|
|
711
714
|
page_root_node = self.url_tree
|
|
712
715
|
self.logger.warning('The pages in the HAR are in in the wrong order, this should not happen but here we are')
|
|
713
716
|
self._make_subtree(page_root_node, [node])
|
|
717
|
+
else:
|
|
718
|
+
# no way to attach it to anything else, attach to the root node
|
|
719
|
+
self._make_subtree(self.url_tree, [node])
|
|
714
720
|
|
|
715
721
|
@trace_make_subtree
|
|
716
722
|
def _make_subtree(self, root: URLNode, nodes_to_attach: list[URLNode] | None=None, dev_debug: bool=False) -> None:
|
|
@@ -769,8 +775,13 @@ class Har2Tree:
|
|
|
769
775
|
else:
|
|
770
776
|
self.logger.warning(f'The URLNode has a redirect to something we already processed ({unode.redirect_url}), this should not happen.')
|
|
771
777
|
|
|
772
|
-
#
|
|
778
|
+
# 2025-02-06: If a node has no redirect **and** no content (empty response), we don't want to attach anything to it (it is a leaf)
|
|
779
|
+
# Example: A POST to self that triggers the **parent** to load an other URL. In this case,
|
|
780
|
+
# the proper attachment point is the parent, not this node, even if we have other nodes with this node URL as a referer.
|
|
781
|
+
if unode.empty_response:
|
|
782
|
+
continue
|
|
773
783
|
|
|
784
|
+
# The node can have a redirect, but also trigger ressources refering to themselves, we need to trigger this code on each node.
|
|
774
785
|
if self.all_initiator_url.get(unode.name):
|
|
775
786
|
# The URL (unode.name) is in the list of known urls initiating calls
|
|
776
787
|
for u in self.all_initiator_url[unode.name]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "har2tree"
|
|
3
|
-
version = "1.27.
|
|
3
|
+
version = "1.27.10"
|
|
4
4
|
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
|
5
5
|
authors = [
|
|
6
6
|
{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
|
|
@@ -13,7 +13,7 @@ dynamic = [ "classifiers" ]
|
|
|
13
13
|
|
|
14
14
|
dependencies = [
|
|
15
15
|
"ete3 (>=3.1.3)",
|
|
16
|
-
"beautifulsoup4[charset-normalizer,lxml] (>=4.13.
|
|
16
|
+
"beautifulsoup4[charset-normalizer,lxml] (>=4.13.3)",
|
|
17
17
|
"publicsuffixlist (>=1.0.2.20250202)",
|
|
18
18
|
"filetype (>=1.2.0)",
|
|
19
19
|
# poetry up fails with the version of numpy forced for python < 3.10.
|
|
@@ -44,10 +44,10 @@ classifiers = [
|
|
|
44
44
|
docs = ["Sphinx (>=8.1.3) ; python_version >= \"3.10\"", "six (>=1.17.0)"]
|
|
45
45
|
|
|
46
46
|
[tool.poetry.group.dev.dependencies]
|
|
47
|
-
mypy = "^1.
|
|
47
|
+
mypy = "^1.15.0"
|
|
48
48
|
pytest-cov = "^6.0.0"
|
|
49
49
|
coverage = "^7.6.10"
|
|
50
|
-
types-beautifulsoup4 = "^4.12.0.
|
|
50
|
+
types-beautifulsoup4 = "^4.12.0.20250204"
|
|
51
51
|
|
|
52
52
|
[build-system]
|
|
53
53
|
requires = ["poetry-core>=2.0"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|