har2tree 1.27.7__tar.gz → 1.27.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: har2tree
3
- Version: 1.27.7
3
+ Version: 1.27.9
4
4
  Summary: HTTP Archive (HAR) to ETE Toolkit generator
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -21,16 +21,16 @@ Classifier: Topic :: Internet
21
21
  Classifier: Topic :: Security
22
22
  Provides-Extra: docs
23
23
  Requires-Dist: Sphinx (>=8.1.3) ; (python_version >= "3.10") and (extra == "docs")
24
- Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.12.3)
24
+ Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.13.1)
25
25
  Requires-Dist: ete3 (>=3.1.3)
26
26
  Requires-Dist: filetype (>=1.2.0)
27
27
  Requires-Dist: legacy-cgi (>=2.6.2) ; python_version >= "3.13,<4.0"
28
- Requires-Dist: numpy (<2.1) ; python_version == "3.9"
28
+ Requires-Dist: numpy (<=2.1) ; python_version == "3.9"
29
29
  Requires-Dist: numpy (>=2.2.2) ; python_version >= "3.10"
30
- Requires-Dist: publicsuffixlist (>=1.0.2.20250127)
30
+ Requires-Dist: publicsuffixlist (>=1.0.2.20250202)
31
31
  Requires-Dist: six (>=1.17.0) ; extra == "docs"
32
32
  Requires-Dist: tinycss2 (>=1.4.0)
33
- Requires-Dist: w3lib (>=2.2.1)
33
+ Requires-Dist: w3lib (>=2.3.1)
34
34
  Project-URL: Documentation, https://har2tree.readthedocs.io/en/latest/
35
35
  Project-URL: Repository, https://github.com/Lookyloo/har2tree
36
36
  Project-URL: issues, https://github.com/Lookyloo/har2tree/issues
@@ -159,11 +159,12 @@ class HarFile():
159
159
  # Used to find the root entry of a page in the capture
160
160
  # NOTE 2020-05-19: Turns out multiple pages can have the exact same timestamp...
161
161
  self.pages_start_times: dict[str, list[dict[str, Any]]] = defaultdict(list)
162
- for page in self.har['log']['pages']:
163
- self.pages_start_times[page['startedDateTime']].append(page)
164
- # The first entry has a different start time as the one from the list, add that
165
- if self.entries:
166
- self.pages_start_times[self.initial_start_time].append(self.har['log']['pages'][0])
162
+ if 'pages' in self.har['log']:
163
+ for page in self.har['log']['pages']:
164
+ self.pages_start_times[page['startedDateTime']].append(page)
165
+ # The first entry has a different start time as the one from the list, add that
166
+ if self.entries:
167
+ self.pages_start_times[self.initial_start_time].append(self.har['log']['pages'][0])
167
168
 
168
169
  # Set to false if initial_redirects fails to find the chain.
169
170
  self.need_tree_redirects = False
@@ -200,7 +201,7 @@ class HarFile():
200
201
  @property
201
202
  def initial_title(self) -> str:
202
203
  """Title of the first page in the capture"""
203
- if self.har['log']['pages'][0]['title']:
204
+ if 'pages' in self.har['log'] and self.har['log']['pages'][0]['title']:
204
205
  return self.har['log']['pages'][0]['title']
205
206
  else:
206
207
  return '!! No title found !!'
@@ -670,10 +671,12 @@ class Har2Tree:
670
671
 
671
672
  # Sometimes, the har has a list of pages, generally when we have HTTP redirects.
672
673
  # IF we have more than one page in the list
673
- # AND the orphan node's pageref points to an other page than the first one <= FIXME not enabled yet
674
+ # AND the orphan node's pageref points to an other page than the first one
674
675
  # AND we already have a node in the tree with this pageref
675
676
  # => attach to that node.
676
- if len(self.har.har['log']['pages']) > 1 and node.pageref != self.har.har['log']['pages'][0] and self.pages_root[node.pageref] != node.uuid:
677
+ if ('pages' in self.har.har['log'] and len(self.har.har['log']['pages']) > 1
678
+ and node.pageref != self.har.har['log']['pages'][0]
679
+ and self.pages_root[node.pageref] != node.uuid):
677
680
  # In that case, we check if there is already a page with the pageref of the orphan node,
678
681
  # and attach the node to that. NOTE: we can only do that if there is already a node with this pageref in the tree.
679
682
  # This node is not a page root, we can attach it \o/
@@ -689,7 +692,7 @@ class Har2Tree:
689
692
  if dev_debug:
690
693
  self.logger.warning(f'Failed to attach URLNode in the normal process, attaching node to final redirect: {self.har.final_redirect}.')
691
694
  self._make_subtree(self.url_tree.search_nodes(name=self.har.final_redirect)[0], [node])
692
- else:
695
+ elif 'pages' in self.har.har['log']:
693
696
  # No luck, the node is root for this pageref, let's attach it to the prior page in the list, or the very first node (tree root)
694
697
  page_before = self.har.har['log']['pages'][0]
695
698
  for page in self.har.har['log']['pages'][1:]:
@@ -711,6 +714,9 @@ class Har2Tree:
711
714
  page_root_node = self.url_tree
712
715
  self.logger.warning('The pages in the HAR are in in the wrong order, this should not happen but here we are')
713
716
  self._make_subtree(page_root_node, [node])
717
+ else:
718
+ # no way to attach it to anything else, attach to the root node
719
+ self._make_subtree(self.url_tree, [node])
714
720
 
715
721
  @trace_make_subtree
716
722
  def _make_subtree(self, root: URLNode, nodes_to_attach: list[URLNode] | None=None, dev_debug: bool=False) -> None:
@@ -84,7 +84,7 @@ class URLNode(HarTreeNode):
84
84
  self.features_to_skip.add('ip_address')
85
85
 
86
86
  def _compute_domhash(self) -> str:
87
- to_hash = "|".join(t.name for t in self.rendered_soup.findAll()).encode()
87
+ to_hash = "|".join(t.name for t in self.rendered_soup.find_all()).encode()
88
88
  return sha256(to_hash).hexdigest()[:32]
89
89
 
90
90
  def add_rendered_features(self, all_requests: list[str], rendered_html: BytesIO | None=None, downloaded_file: tuple[str, BytesIO | None] | None=None) -> None:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "har2tree"
3
- version = "1.27.7"
3
+ version = "1.27.9"
4
4
  description = "HTTP Archive (HAR) to ETE Toolkit generator"
5
5
  authors = [
6
6
  {name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
@@ -13,12 +13,14 @@ dynamic = [ "classifiers" ]
13
13
 
14
14
  dependencies = [
15
15
  "ete3 (>=3.1.3)",
16
- "beautifulsoup4 [lxml,charset_normalizer] (>=4.12.3)",
17
- "publicsuffixlist (>=1.0.2.20250127)",
16
+ "beautifulsoup4[charset-normalizer,lxml] (>=4.13.1)",
17
+ "publicsuffixlist (>=1.0.2.20250202)",
18
18
  "filetype (>=1.2.0)",
19
- "numpy (<2.1) ; python_version == \"3.9\"",
19
+ # poetry up fails with the version of numpy forced for python < 3.10.
20
+ # The work around is to comment it, run poetry up, uncomment it. and run poetry update.
21
+ "numpy (<=2.1) ; python_version == \"3.9\"",
20
22
  "numpy (>=2.2.2) ; python_version >= \"3.10\"",
21
- "w3lib (>=2.2.1)",
23
+ "w3lib (>=2.3.1)",
22
24
  "tinycss2 (>=1.4.0)",
23
25
  "legacy-cgi (>=2.6.2) ; python_version >= \"3.13,<4.0\"",
24
26
  ]
File without changes
File without changes
File without changes
File without changes
File without changes