har2tree 1.34.2__tar.gz → 1.35.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {har2tree-1.34.2 → har2tree-1.35.0}/PKG-INFO +2 -2
- {har2tree-1.34.2 → har2tree-1.35.0}/har2tree/har2tree.py +12 -13
- {har2tree-1.34.2 → har2tree-1.35.0}/har2tree/helper.py +1 -1
- {har2tree-1.34.2 → har2tree-1.35.0}/pyproject.toml +2 -2
- {har2tree-1.34.2 → har2tree-1.35.0}/LICENSE +0 -0
- {har2tree-1.34.2 → har2tree-1.35.0}/README.md +0 -0
- {har2tree-1.34.2 → har2tree-1.35.0}/har2tree/__init__.py +0 -0
- {har2tree-1.34.2 → har2tree-1.35.0}/har2tree/nodes.py +0 -0
- {har2tree-1.34.2 → har2tree-1.35.0}/har2tree/parser.py +0 -0
- {har2tree-1.34.2 → har2tree-1.35.0}/har2tree/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: har2tree
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.35.0
|
|
4
4
|
Summary: HTTP Archive (HAR) to ETE Toolkit generator
|
|
5
5
|
License-Expression: BSD-3-Clause
|
|
6
6
|
License-File: LICENSE
|
|
@@ -28,7 +28,7 @@ Requires-Dist: json-stream (>=2.3.3,<3.0.0)
|
|
|
28
28
|
Requires-Dist: legacy-cgi (>=2.6.4) ; python_version >= "3.13" and python_version < "4.0"
|
|
29
29
|
Requires-Dist: multipart (>=1.3.0,<2.0.0)
|
|
30
30
|
Requires-Dist: numpy (>=2.2,<2.3) ; python_version < "3.11"
|
|
31
|
-
Requires-Dist: numpy (>=2.3.
|
|
31
|
+
Requires-Dist: numpy (>=2.3.5) ; python_version >= "3.11" and python_version < "4.0"
|
|
32
32
|
Requires-Dist: publicsuffixlist (>=1.0.2.20251115)
|
|
33
33
|
Requires-Dist: requests-toolbelt (>=1.0.0,<2.0.0)
|
|
34
34
|
Requires-Dist: six (>=1.17.0) ; extra == "docs"
|
|
@@ -426,12 +426,20 @@ class Har2Tree:
|
|
|
426
426
|
|
|
427
427
|
self.url_tree = self._nodes_list.pop(0)
|
|
428
428
|
|
|
429
|
+
def _url_to_local_only_content(self, url: str | None) -> bool:
|
|
430
|
+
return (url is None
|
|
431
|
+
or url in ['about:blank', 'about:srcdoc', ''] # not loading anything remotely
|
|
432
|
+
or url.startswith('data') # base64 encoded content
|
|
433
|
+
or url.startswith('chrome-error') # not in the HAR/tree
|
|
434
|
+
or url.startswith('blob') # blobs aren't URLs
|
|
435
|
+
)
|
|
436
|
+
|
|
429
437
|
def _load_iframes(self, current: URLNode, frames: FramesResponse) -> None:
|
|
430
438
|
if not frames.get('content') or frames['content'] is None:
|
|
431
439
|
# NOTE: debug stuff, no content makes it pretty useless.
|
|
432
440
|
if frames.get('url'):
|
|
433
|
-
if frames['url']
|
|
434
|
-
self.logger.info('Got
|
|
441
|
+
if self._url_to_local_only_content(frames['url']):
|
|
442
|
+
self.logger.info('Got an empty frame to local content.')
|
|
435
443
|
else:
|
|
436
444
|
u = unquote_plus(frames['url'])
|
|
437
445
|
self.logger.warning(f'Got a url ({u}) for the frame, but no content')
|
|
@@ -439,12 +447,7 @@ class Har2Tree:
|
|
|
439
447
|
self.logger.info('Got a frame, but no content.')
|
|
440
448
|
return
|
|
441
449
|
|
|
442
|
-
if
|
|
443
|
-
and not (frames['url'] in ['about:blank'] # not loading anything, same as empty
|
|
444
|
-
or frames['url'].startswith('data') # base64 encoded content
|
|
445
|
-
or frames['url'].startswith('chrome-error') # not in the HAR/tree
|
|
446
|
-
or frames['url'].startswith('blob') # blobs aren't URLs
|
|
447
|
-
)):
|
|
450
|
+
if frames.get('url') and not self._url_to_local_only_content(frames['url']):
|
|
448
451
|
u = unquote_plus(frames['url'])
|
|
449
452
|
possible_child_name = {u, u.split('#', 1)[0]}
|
|
450
453
|
# this url should be in a node directly attached to that one
|
|
@@ -821,11 +824,7 @@ class Har2Tree:
|
|
|
821
824
|
|
|
822
825
|
def all_real_urls_in_children(self, frame: FramesResponse) -> Iterator[str]:
|
|
823
826
|
# from a frame, search all the real urls in each of the children, stop at the first one
|
|
824
|
-
if (frame.get('url') and frame['url'] is not None
|
|
825
|
-
and not (frame['url'] in ['about:blank', 'about:srcdoc'] # not loading anything, same as empty
|
|
826
|
-
or frame['url'].startswith('data') # base64 encoded content
|
|
827
|
-
or frame['url'].startswith('chrome-error') # not in the HAR/tree
|
|
828
|
-
or frame['url'].startswith('blob'))): # blobs aren't URLs
|
|
827
|
+
if (frame.get('url') and frame['url'] is not None and not self._url_to_local_only_content(frame['url'])):
|
|
829
828
|
yield frame['url']
|
|
830
829
|
else:
|
|
831
830
|
# got no real URL, try the children
|
|
@@ -72,7 +72,7 @@ def make_hhhash(entry: dict[str, Any]) -> str:
|
|
|
72
72
|
# We need the HTTP version used for the query:
|
|
73
73
|
# * The HTTP Header names in HTTP 1.1 can have uppercase characters
|
|
74
74
|
# * The HTTP Header names in HTTP 2 *must* be lowercase: https://www.rfc-editor.org/rfc/rfc7540#section-8.1.2
|
|
75
|
-
if entry['httpVersion'].lower() in ["http/1.1", "http/1.0"]:
|
|
75
|
+
if entry['httpVersion'].lower() in ["http/1.1", "http/1.0", "1.1"]:
|
|
76
76
|
return f'hhh:1:{sha256}'
|
|
77
77
|
if entry['httpVersion'].lower() == "http/2.0":
|
|
78
78
|
return f'hhh:2:{sha256}'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "har2tree"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.35.0"
|
|
4
4
|
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
|
5
5
|
authors = [
|
|
6
6
|
{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
|
|
@@ -19,7 +19,7 @@ dependencies = [
|
|
|
19
19
|
# poetry up fails with the version of numpy forced for python < 3.11.
|
|
20
20
|
# The work around is to comment it, run poetry up, uncomment it. and run poetry update.
|
|
21
21
|
"numpy (>=2.2,<2.3) ; python_version < '3.11'",
|
|
22
|
-
"numpy (>=2.3.
|
|
22
|
+
"numpy (>=2.3.5) ; python_version >= \"3.11\" and python_version < \"4.0\"",
|
|
23
23
|
"w3lib (>=2.3.1)",
|
|
24
24
|
"tinycss2 (>=1.4.0)",
|
|
25
25
|
"legacy-cgi (>=2.6.4) ; python_version >= \"3.13\" and python_version < \"4.0\"",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|