har2tree 1.31.3__py3-none-any.whl → 1.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- har2tree/har2tree.py +185 -31
- har2tree/helper.py +3 -2
- har2tree/nodes.py +154 -45
- {har2tree-1.31.3.dist-info → har2tree-1.36.0.dist-info}/METADATA +16 -14
- har2tree-1.36.0.dist-info/RECORD +10 -0
- {har2tree-1.31.3.dist-info → har2tree-1.36.0.dist-info}/WHEEL +1 -1
- har2tree-1.31.3.dist-info/RECORD +0 -10
- {har2tree-1.31.3.dist-info → har2tree-1.36.0.dist-info/licenses}/LICENSE +0 -0
har2tree/har2tree.py
CHANGED
|
@@ -13,7 +13,9 @@ from functools import wraps, lru_cache
|
|
|
13
13
|
from io import BytesIO
|
|
14
14
|
from operator import itemgetter
|
|
15
15
|
from pathlib import Path
|
|
16
|
-
from typing import Any,
|
|
16
|
+
from typing import Any, TypedDict
|
|
17
|
+
from collections.abc import Iterator
|
|
18
|
+
from collections.abc import Callable
|
|
17
19
|
from urllib.parse import unquote_plus, urlparse
|
|
18
20
|
|
|
19
21
|
from .helper import rebuild_url, Har2TreeError, Har2TreeLogAdapter
|
|
@@ -58,7 +60,8 @@ def trace_make_subtree_fallback(method: Callable[..., None]) -> Callable[..., No
|
|
|
58
60
|
|
|
59
61
|
def trace_make_subtree(method: Callable[..., None]) -> Callable[..., None]:
|
|
60
62
|
@wraps(method)
|
|
61
|
-
def _impl(self: Any, root: URLNode, nodes_to_attach: list[URLNode] | None=None,
|
|
63
|
+
def _impl(self: Any, root: URLNode, nodes_to_attach: list[URLNode] | None=None,
|
|
64
|
+
dev_debug: bool=False, fallback: bool=False) -> None:
|
|
62
65
|
if dev_debug_mode:
|
|
63
66
|
__load_debug_files()
|
|
64
67
|
if dev_debug_url and root.name == dev_debug_url or nodes_to_attach is not None and any(True for u in nodes_to_attach if u.name == dev_debug_url):
|
|
@@ -67,7 +70,7 @@ def trace_make_subtree(method: Callable[..., None]) -> Callable[..., None]:
|
|
|
67
70
|
elif dev_debug_hostname and root.hostname == dev_debug_hostname or nodes_to_attach is not None and any(True for u in nodes_to_attach if u.hostname == dev_debug_hostname):
|
|
68
71
|
root.logger.warning(f'Debugging Hostname: {dev_debug_hostname}.')
|
|
69
72
|
dev_debug = True
|
|
70
|
-
return method(self, root, nodes_to_attach, dev_debug)
|
|
73
|
+
return method(self, root, nodes_to_attach, dev_debug, fallback)
|
|
71
74
|
return _impl
|
|
72
75
|
|
|
73
76
|
|
|
@@ -84,6 +87,15 @@ def __load_debug_files() -> None:
|
|
|
84
87
|
dev_debug_hostname = f.read().strip()
|
|
85
88
|
|
|
86
89
|
|
|
90
|
+
# NOTE: Copy from PlaywrightCapture to avoid extra dep
|
|
91
|
+
class FramesResponse(TypedDict, total=False):
|
|
92
|
+
|
|
93
|
+
name: str
|
|
94
|
+
url: str
|
|
95
|
+
content: str | None
|
|
96
|
+
children: list[FramesResponse] | None
|
|
97
|
+
|
|
98
|
+
|
|
87
99
|
class HarFile():
|
|
88
100
|
|
|
89
101
|
def __init__(self, harfile: Path, capture_uuid: str):
|
|
@@ -115,8 +127,14 @@ class HarFile():
|
|
|
115
127
|
last_redirect_file = self.path.parent / f'{root_name}.last_redirect.txt'
|
|
116
128
|
if last_redirect_file.is_file():
|
|
117
129
|
with last_redirect_file.open('r') as _lr:
|
|
118
|
-
|
|
119
|
-
self.
|
|
130
|
+
last_redirect = unquote_plus(_lr.read())
|
|
131
|
+
self.final_redirect: str = last_redirect
|
|
132
|
+
if not self._search_final_redirect():
|
|
133
|
+
if last_redirect.startswith('chrome') or last_redirect.startswith('about'):
|
|
134
|
+
# the capture failed.
|
|
135
|
+
pass
|
|
136
|
+
else:
|
|
137
|
+
self.logger.info(f'Final redirect URL from address bar not in tree: {last_redirect}')
|
|
120
138
|
else:
|
|
121
139
|
self.logger.debug('No last_redirect file available.')
|
|
122
140
|
self.final_redirect = ''
|
|
@@ -129,6 +147,14 @@ class HarFile():
|
|
|
129
147
|
self.logger.debug('No cookies file available.')
|
|
130
148
|
self.cookies = []
|
|
131
149
|
|
|
150
|
+
framesfile = self.path.parent / f'{root_name}.frames.json'
|
|
151
|
+
if framesfile.is_file():
|
|
152
|
+
with framesfile.open() as c:
|
|
153
|
+
self.frames: FramesResponse = json.load(c)
|
|
154
|
+
else:
|
|
155
|
+
self.logger.debug('No frames file available.')
|
|
156
|
+
self.frames = {}
|
|
157
|
+
|
|
132
158
|
dlfile = self.path.parent / f'{root_name}.data'
|
|
133
159
|
dlfilename = self.path.parent / f'{root_name}.data.filename'
|
|
134
160
|
self.downloaded_file: BytesIO | None
|
|
@@ -169,29 +195,30 @@ class HarFile():
|
|
|
169
195
|
# Set to false if initial_redirects fails to find the chain.
|
|
170
196
|
self.need_tree_redirects = False
|
|
171
197
|
|
|
172
|
-
def _search_final_redirect(self) ->
|
|
198
|
+
def _search_final_redirect(self) -> bool:
|
|
173
199
|
"""Try to find the final path to the final redirect without building the tree"""
|
|
174
200
|
for e in self.entries:
|
|
175
201
|
unquoted_url = unquote_plus(e['request']['url'])
|
|
176
202
|
if unquoted_url == self.final_redirect:
|
|
177
|
-
|
|
203
|
+
return True
|
|
178
204
|
elif unquoted_url.startswith(f'{self.final_redirect}?'):
|
|
179
205
|
# WARNING: the URL in that file may not be present in the HAR: the query part is stripped by splash
|
|
180
206
|
self.final_redirect = unquoted_url
|
|
181
|
-
|
|
207
|
+
return True
|
|
182
208
|
else:
|
|
183
209
|
# Update 2020-04-01: .. but the fragment is not striped so self.final_redirect may not be found
|
|
184
210
|
# Unless we find the entry in the har, we need to search again without the fragment
|
|
185
211
|
if '#' in self.final_redirect:
|
|
186
212
|
self.final_redirect = self.final_redirect.split('#', 1)[0]
|
|
187
|
-
self._search_final_redirect()
|
|
213
|
+
return self._search_final_redirect()
|
|
188
214
|
elif '?' in self.final_redirect:
|
|
189
215
|
# At this point, we're trying things. The final URL returned by splash may have been changed
|
|
190
216
|
# in JavaScript and never appear in the HAR. Let's try to find the closest one with the same path
|
|
191
217
|
self.final_redirect = self.final_redirect.split('?', 1)[0]
|
|
192
|
-
self._search_final_redirect()
|
|
218
|
+
return self._search_final_redirect()
|
|
193
219
|
else:
|
|
194
220
|
self.logger.info(f'Unable to find the final redirect: {self.final_redirect}')
|
|
221
|
+
return False
|
|
195
222
|
|
|
196
223
|
@property
|
|
197
224
|
def number_entries(self) -> int:
|
|
@@ -306,8 +333,9 @@ class Har2Tree:
|
|
|
306
333
|
self.pages_root: dict[str, str] = {}
|
|
307
334
|
|
|
308
335
|
self.all_redirects: list[str] = []
|
|
309
|
-
|
|
310
|
-
self.
|
|
336
|
+
# 2025-11-16: make values of referers and initiators sets because there will be duplicates
|
|
337
|
+
self.all_referer: dict[str, set[str]] = defaultdict(set)
|
|
338
|
+
self.all_initiator_url: dict[str, set[str]] = defaultdict(set)
|
|
311
339
|
self._load_url_entries()
|
|
312
340
|
|
|
313
341
|
# Generate cookies lookup tables
|
|
@@ -398,6 +426,65 @@ class Har2Tree:
|
|
|
398
426
|
|
|
399
427
|
self.url_tree = self._nodes_list.pop(0)
|
|
400
428
|
|
|
429
|
+
def _url_to_local_only_content(self, url: str | None) -> bool:
|
|
430
|
+
return (url is None
|
|
431
|
+
or url in ['about:blank', 'about:srcdoc', ''] # not loading anything remotely
|
|
432
|
+
or url.startswith('data') # base64 encoded content
|
|
433
|
+
or url.startswith('chrome-error') # not in the HAR/tree
|
|
434
|
+
or url.startswith('blob') # blobs aren't URLs
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
def _load_iframes(self, current: URLNode, frames: FramesResponse) -> None:
|
|
438
|
+
if not frames.get('content') or frames['content'] is None:
|
|
439
|
+
# NOTE: debug stuff, no content makes it pretty useless.
|
|
440
|
+
if frames.get('url'):
|
|
441
|
+
if self._url_to_local_only_content(frames['url']):
|
|
442
|
+
self.logger.info('Got an empty frame to local content.')
|
|
443
|
+
else:
|
|
444
|
+
u = unquote_plus(frames['url'])
|
|
445
|
+
self.logger.warning(f'Got a url ({u}) for the frame, but no content')
|
|
446
|
+
else:
|
|
447
|
+
self.logger.info('Got a frame, but no content.')
|
|
448
|
+
return
|
|
449
|
+
|
|
450
|
+
if frames.get('url') and not self._url_to_local_only_content(frames['url']):
|
|
451
|
+
u = unquote_plus(frames['url'])
|
|
452
|
+
possible_child_name = {u, u.split('#', 1)[0]}
|
|
453
|
+
# this url should be in a node directly attached to that one
|
|
454
|
+
# we need to find that node
|
|
455
|
+
for child in current.traverse():
|
|
456
|
+
if child.name in possible_child_name:
|
|
457
|
+
self.logger.debug(f'Found URL "{u}".')
|
|
458
|
+
# Found the node, adding the content
|
|
459
|
+
if not hasattr(child, 'rendered_frame'):
|
|
460
|
+
child.rendered_frame = []
|
|
461
|
+
child.rendered_frame.append(BytesIO(frames['content'].encode()))
|
|
462
|
+
# and mark the node as iframe
|
|
463
|
+
child.add_feature('iframe', True)
|
|
464
|
+
# if there are children, use that node as parent and call the current method recursvely
|
|
465
|
+
if f_children := frames.get('children'):
|
|
466
|
+
for f_child in f_children:
|
|
467
|
+
self._load_iframes(child, f_child)
|
|
468
|
+
break
|
|
469
|
+
else:
|
|
470
|
+
# Couldn'd find the node Oo
|
|
471
|
+
to_print = ', '.join(possible_child_name)
|
|
472
|
+
children_to_print = ', '.join([child.name for child in current.traverse()])
|
|
473
|
+
self.logger.warning(f'Unable to find "{to_print}" in the children of "{current.name}" - {children_to_print}')
|
|
474
|
+
else:
|
|
475
|
+
self.logger.debug(f'"{current.name}" contains an iFrame.')
|
|
476
|
+
# No URL, this frame is directly in the parent frame.
|
|
477
|
+
if not hasattr(current, 'rendered_frame'):
|
|
478
|
+
current.rendered_frame = []
|
|
479
|
+
current.rendered_frame.append(BytesIO(frames['content'].encode()))
|
|
480
|
+
self.logger.debug(f'"{current.name}" has {len(current.rendered_frame)} iFrames.')
|
|
481
|
+
# and mark the node as iframe
|
|
482
|
+
current.add_feature('iframe', True)
|
|
483
|
+
# if there are children, use that node as parent and call the current method recursvely
|
|
484
|
+
if f_children := frames.get('children'):
|
|
485
|
+
for f_child in f_children:
|
|
486
|
+
self._load_iframes(current, f_child)
|
|
487
|
+
|
|
401
488
|
@property
|
|
402
489
|
def initial_referer(self) -> str | None:
|
|
403
490
|
'''The referer passed to the first URL in the tree'''
|
|
@@ -520,7 +607,7 @@ class Har2Tree:
|
|
|
520
607
|
|
|
521
608
|
if hasattr(n, 'initiator_url'):
|
|
522
609
|
# The HAR file was created by chrome/chromium and we got the _initiator key
|
|
523
|
-
self.all_initiator_url[n.initiator_url].
|
|
610
|
+
self.all_initiator_url[n.initiator_url].add(n.name)
|
|
524
611
|
|
|
525
612
|
if url_entry['startedDateTime'] in self.har.pages_start_times:
|
|
526
613
|
for page in self.har.pages_start_times[url_entry['startedDateTime']]:
|
|
@@ -533,7 +620,7 @@ class Har2Tree:
|
|
|
533
620
|
if hasattr(n, 'referer') and i > 0:
|
|
534
621
|
# NOTE 2021-05-14: referer to self are a real thing: url -> POST to self
|
|
535
622
|
if n.name != n.referer or ('method' in n.request and n.request['method'] == 'POST'):
|
|
536
|
-
self.all_referer[n.referer].
|
|
623
|
+
self.all_referer[n.referer].add(n.name)
|
|
537
624
|
|
|
538
625
|
self._nodes_list.append(n)
|
|
539
626
|
self.all_url_requests[n.name].append(n)
|
|
@@ -566,12 +653,6 @@ class Har2Tree:
|
|
|
566
653
|
if node:
|
|
567
654
|
return node[0]
|
|
568
655
|
|
|
569
|
-
browser_errors = ['chrome-error', 'about:blank']
|
|
570
|
-
if self.har.final_redirect and not any(self.har.final_redirect.startswith(r) for r in browser_errors):
|
|
571
|
-
self.logger.warning(f'Final redirect URL from adress bar not in tree: {self.har.final_redirect}')
|
|
572
|
-
else:
|
|
573
|
-
# No final redirect, already logged earlier.
|
|
574
|
-
pass
|
|
575
656
|
# Just try to get the best guess: first node after JS/HTTP redirects
|
|
576
657
|
curnode = self.url_tree
|
|
577
658
|
while hasattr(curnode, 'redirect') and curnode.redirect:
|
|
@@ -620,6 +701,14 @@ class Har2Tree:
|
|
|
620
701
|
for child_node_hostname, child_nodes_url in sub_roots.items():
|
|
621
702
|
self.make_hostname_tree(child_nodes_url, child_node_hostname)
|
|
622
703
|
|
|
704
|
+
def _all_urlnodes_in_host_tree(self) -> None:
|
|
705
|
+
# debug: check if all the nodes in the URL tree are in the hostnode tree (they must have an UUID)
|
|
706
|
+
self.logger.warning('Validating host tree....')
|
|
707
|
+
for urlnode in self.url_tree.traverse():
|
|
708
|
+
if not hasattr(urlnode, 'hostnode_uuid'):
|
|
709
|
+
self.logger.error(f'URL Node not un host tree: {urlnode}')
|
|
710
|
+
self.logger.warning('host tree validated.')
|
|
711
|
+
|
|
623
712
|
def make_tree(self) -> URLNode:
|
|
624
713
|
"""Build URL and Host trees"""
|
|
625
714
|
self._make_subtree(self.url_tree)
|
|
@@ -650,6 +739,15 @@ class Har2Tree:
|
|
|
650
739
|
# Initialize the hostname tree root
|
|
651
740
|
self.hostname_tree.add_url(self.url_tree)
|
|
652
741
|
self.make_hostname_tree(self.url_tree, self.hostname_tree)
|
|
742
|
+
if dev_debug_mode:
|
|
743
|
+
self._all_urlnodes_in_host_tree()
|
|
744
|
+
if isinstance(self.har.frames, dict):
|
|
745
|
+
if self.har.frames.get('children') and self.har.frames['children'] is not None:
|
|
746
|
+
# we have frames in the main one
|
|
747
|
+
for f_child in self.har.frames['children']:
|
|
748
|
+
self._load_iframes(self.rendered_node, f_child)
|
|
749
|
+
else:
|
|
750
|
+
self.logger.warning(f'Wrong format for the frames ({type(self.har.frames)}), very old capture.')
|
|
653
751
|
return self.url_tree
|
|
654
752
|
|
|
655
753
|
@trace_make_subtree_fallback
|
|
@@ -667,7 +765,7 @@ class Har2Tree:
|
|
|
667
765
|
# we got an non-empty response, breaking
|
|
668
766
|
break
|
|
669
767
|
# attach to the the first response with something, or to whatever we get.
|
|
670
|
-
self._make_subtree(node_with_hostname, [node])
|
|
768
|
+
self._make_subtree(node_with_hostname, [node], fallback=True)
|
|
671
769
|
return
|
|
672
770
|
|
|
673
771
|
# Sometimes, the har has a list of pages, generally when we have HTTP redirects.
|
|
@@ -679,20 +777,25 @@ class Har2Tree:
|
|
|
679
777
|
and node.pageref != self.har.har['log']['pages'][0]
|
|
680
778
|
and self.pages_root[node.pageref] != node.uuid):
|
|
681
779
|
# In that case, we check if there is already a page with the pageref of the orphan node,
|
|
682
|
-
# and attach the node to that.
|
|
780
|
+
# and attach the node to that.
|
|
781
|
+
# NOTE: we can only do that if there is already a node with this pageref in the tree.
|
|
683
782
|
# This node is not a page root, we can attach it \o/
|
|
684
783
|
page_root_node = self.get_url_node_by_uuid(self.pages_root[node.pageref])
|
|
685
784
|
if dev_debug:
|
|
686
785
|
self.logger.warning(f'Failed to attach URLNode in the normal process, attaching node to page {node.pageref} - Node: {page_root_node.uuid} - {page_root_node.name}.')
|
|
687
|
-
self._make_subtree(page_root_node, [node])
|
|
688
|
-
elif self.
|
|
689
|
-
# Generally, when we have a bunch of redirects, they do not branch out
|
|
690
|
-
# *but* it is not always the case: some intermediary
|
|
786
|
+
self._make_subtree(page_root_node, [node], fallback=True)
|
|
787
|
+
elif self.rendered_node != self.url_tree:
|
|
788
|
+
# Generally, when we have a bunch of redirects, they (generally) do not branch out
|
|
789
|
+
# before the final landing page *but* it is not always the case: some intermediary
|
|
790
|
+
# redirects will have calls to 3rd party pages.
|
|
691
791
|
# Hopefully, this last case was taken care of in the branch above.
|
|
692
|
-
# In this branch, we get the landing page after the redirects
|
|
792
|
+
# In this branch, we get the landing page after the redirects, and attach the node to it.
|
|
793
|
+
|
|
794
|
+
# We skip this call if there are no redirects as it is the very last fallback at the
|
|
795
|
+
# end of this method anyway
|
|
693
796
|
if dev_debug:
|
|
694
797
|
self.logger.warning(f'Failed to attach URLNode in the normal process, attaching node to final redirect: {self.har.final_redirect}.')
|
|
695
|
-
self._make_subtree(self.
|
|
798
|
+
self._make_subtree(self.rendered_node, [node], fallback=True)
|
|
696
799
|
elif 'pages' in self.har.har['log']:
|
|
697
800
|
# No luck, the node is root for this pageref, let's attach it to the prior page in the list, or the very first node (tree root)
|
|
698
801
|
page_before = self.har.har['log']['pages'][0]
|
|
@@ -714,13 +817,38 @@ class Har2Tree:
|
|
|
714
817
|
# node to the root node
|
|
715
818
|
page_root_node = self.url_tree
|
|
716
819
|
self.logger.warning('The pages in the HAR are in in the wrong order, this should not happen but here we are')
|
|
717
|
-
self._make_subtree(page_root_node, [node])
|
|
820
|
+
self._make_subtree(page_root_node, [node], fallback=True)
|
|
718
821
|
else:
|
|
719
822
|
# no way to attach it to anything else, attach to the root node
|
|
720
|
-
self._make_subtree(self.url_tree, [node])
|
|
823
|
+
self._make_subtree(self.url_tree, [node], fallback=True)
|
|
824
|
+
|
|
825
|
+
def all_real_urls_in_children(self, frame: FramesResponse) -> Iterator[str]:
|
|
826
|
+
# from a frame, search all the real urls in each of the children, stop at the first one
|
|
827
|
+
if (frame.get('url') and frame['url'] is not None and not self._url_to_local_only_content(frame['url'])):
|
|
828
|
+
yield frame['url']
|
|
829
|
+
else:
|
|
830
|
+
# got no real URL, try the children
|
|
831
|
+
if frame.get('children') and frame['children'] is not None:
|
|
832
|
+
for c in frame['children']:
|
|
833
|
+
yield from self.all_real_urls_in_children(c)
|
|
834
|
+
|
|
835
|
+
def search_in_frames(self, urls: set[str], frame: FramesResponse) -> Iterator[str]:
|
|
836
|
+
# If the frame doesn't have children, there are no potential URLs to attach
|
|
837
|
+
if not isinstance(frame, dict) or not frame.get('children') or frame['children'] is None:
|
|
838
|
+
return None
|
|
839
|
+
|
|
840
|
+
if frame.get('url'):
|
|
841
|
+
u = unquote_plus(frame['url'])
|
|
842
|
+
if urls & {u, u.split('#', 1)[0]}:
|
|
843
|
+
# got a matching URL, get list of potential iframes urls
|
|
844
|
+
for c in frame['children']:
|
|
845
|
+
yield from self.all_real_urls_in_children(c)
|
|
846
|
+
for c in frame['children']:
|
|
847
|
+
yield from self.search_in_frames(urls, c)
|
|
721
848
|
|
|
722
849
|
@trace_make_subtree
|
|
723
|
-
def _make_subtree(self, root: URLNode, nodes_to_attach: list[URLNode] | None=None,
|
|
850
|
+
def _make_subtree(self, root: URLNode, nodes_to_attach: list[URLNode] | None=None,
|
|
851
|
+
dev_debug: bool=False, fallback: bool=False) -> None:
|
|
724
852
|
"""Recursive method building each level of the tree"""
|
|
725
853
|
matching_urls: list[URLNode]
|
|
726
854
|
if nodes_to_attach is None:
|
|
@@ -782,6 +910,26 @@ class Har2Tree:
|
|
|
782
910
|
if unode.empty_response:
|
|
783
911
|
continue
|
|
784
912
|
|
|
913
|
+
# 2025-11-14
|
|
914
|
+
# the referer of an iframe is the hostname of the parent, even if the parent
|
|
915
|
+
# is a URL with a full path. Before using the referer, we need to check if we have
|
|
916
|
+
# the current url in the frame tree. If we do, find nodes (in the remaining list)
|
|
917
|
+
# with the URLs of the children - any fragment will be missing - and attach that node
|
|
918
|
+
possible_iframe_urls = {unode.name, unode.name.split('#', 1)[0]}
|
|
919
|
+
for possible_url in self.search_in_frames(possible_iframe_urls, self.har.frames):
|
|
920
|
+
cu = unquote_plus(possible_url)
|
|
921
|
+
for u in {cu, cu.split('#', 1)[0]}:
|
|
922
|
+
if u not in self.all_url_requests:
|
|
923
|
+
if '#' not in u:
|
|
924
|
+
self.logger.info(f'"{u}" in the frames URLs, but not in the HAR.')
|
|
925
|
+
continue
|
|
926
|
+
matching_urls = [url_node for url_node in self.all_url_requests[u]
|
|
927
|
+
if url_node in self._nodes_list]
|
|
928
|
+
self._nodes_list = [node for node in self._nodes_list if node not in matching_urls]
|
|
929
|
+
if dev_debug:
|
|
930
|
+
self.logger.warning(f'Found via initiator from {unode.name} to {matching_urls}.')
|
|
931
|
+
self._make_subtree(unode, matching_urls)
|
|
932
|
+
|
|
785
933
|
# The node can have a redirect, but also trigger ressources refering to themselves, we need to trigger this code on each node.
|
|
786
934
|
if self.all_initiator_url.get(unode.name):
|
|
787
935
|
# The URL (unode.name) is in the list of known urls initiating calls
|
|
@@ -813,6 +961,12 @@ class Har2Tree:
|
|
|
813
961
|
if hasattr(unode, 'external_ressources'):
|
|
814
962
|
# the url loads external things, and some of them have no referer....
|
|
815
963
|
for external_tag, links in unode.external_ressources.items():
|
|
964
|
+
# 2025-11-06: skip full regex until we're calling this method in the fallback
|
|
965
|
+
# the iframes will often (not always) have a referer set and the URL
|
|
966
|
+
# might be found by the regex and it will not be attached at the
|
|
967
|
+
# right place
|
|
968
|
+
if external_tag == 'full_regex' and not fallback:
|
|
969
|
+
continue
|
|
816
970
|
for link in links:
|
|
817
971
|
if link not in self.all_url_requests or link == self.har.final_redirect:
|
|
818
972
|
# We have a lot of false positives
|
har2tree/helper.py
CHANGED
|
@@ -72,7 +72,7 @@ def make_hhhash(entry: dict[str, Any]) -> str:
|
|
|
72
72
|
# We need the HTTP version used for the query:
|
|
73
73
|
# * The HTTP Header names in HTTP 1.1 can have uppercase characters
|
|
74
74
|
# * The HTTP Header names in HTTP 2 *must* be lowercase: https://www.rfc-editor.org/rfc/rfc7540#section-8.1.2
|
|
75
|
-
if entry['httpVersion'].lower() in ["http/1.1", "http/1.0"]:
|
|
75
|
+
if entry['httpVersion'].lower() in ["http/1.1", "http/1.0", "1.1"]:
|
|
76
76
|
return f'hhh:1:{sha256}'
|
|
77
77
|
if entry['httpVersion'].lower() == "http/2.0":
|
|
78
78
|
return f'hhh:2:{sha256}'
|
|
@@ -364,7 +364,8 @@ def find_external_ressources(mimetype: str, data: bytes, base_url: str, all_requ
|
|
|
364
364
|
# link: https://www.w3schools.com/TAGs/tag_link.asp -> href
|
|
365
365
|
# object: https://www.w3schools.com/TAGs/tag_object.asp -> data
|
|
366
366
|
external_ressources: dict[str, list[str]] = {'img': [], 'script': [], 'video': [], 'audio': [],
|
|
367
|
-
'iframe': [],
|
|
367
|
+
'iframe': [],
|
|
368
|
+
'embed': [], 'source': [],
|
|
368
369
|
'link': [],
|
|
369
370
|
'object': [],
|
|
370
371
|
'css': [],
|
har2tree/nodes.py
CHANGED
|
@@ -15,16 +15,19 @@ from base64 import b64decode
|
|
|
15
15
|
from datetime import datetime, timedelta
|
|
16
16
|
from functools import lru_cache, cached_property
|
|
17
17
|
from hashlib import sha256
|
|
18
|
-
from io import BytesIO
|
|
18
|
+
from io import BytesIO, StringIO
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
from typing import Any
|
|
21
21
|
from collections.abc import MutableMapping
|
|
22
|
-
from urllib.parse import unquote_plus, urlparse, urljoin
|
|
22
|
+
from urllib.parse import unquote_plus, urlparse, urljoin, parse_qs
|
|
23
23
|
|
|
24
24
|
import filetype # type: ignore
|
|
25
|
+
import json_stream # type: ignore
|
|
26
|
+
|
|
25
27
|
from bs4 import BeautifulSoup
|
|
26
28
|
from ete3 import TreeNode # type: ignore
|
|
27
29
|
from publicsuffixlist import PublicSuffixList # type: ignore
|
|
30
|
+
from requests_toolbelt.multipart import decoder # type: ignore
|
|
28
31
|
from w3lib.html import strip_html5_whitespace
|
|
29
32
|
from w3lib.url import canonicalize_url, safe_url_string
|
|
30
33
|
|
|
@@ -211,33 +214,67 @@ class URLNode(HarTreeNode):
|
|
|
211
214
|
if 'user_agent' not in self.features:
|
|
212
215
|
self.add_feature('user_agent', '')
|
|
213
216
|
|
|
214
|
-
if 'method' in self.request and self.request['method'] == 'POST'
|
|
215
|
-
|
|
216
|
-
if self.request
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
217
|
+
if 'method' in self.request and self.request['method'] == 'POST':
|
|
218
|
+
decoded_posted_data: list[Any] | str | bytes | int | float | bool | dict[str, str] | dict[str, list[str]] | None = None
|
|
219
|
+
if 'postData' not in self.request or 'text' not in self.request['postData']:
|
|
220
|
+
self.logger.debug('POST request with no content.')
|
|
221
|
+
self.add_feature('posted_data_info', "No content.")
|
|
222
|
+
elif not self.request['postData']['text']:
|
|
223
|
+
# If the POST content is empty
|
|
224
|
+
self.logger.debug('Empty POST request.')
|
|
225
|
+
decoded_posted_data = ''
|
|
226
|
+
self.add_feature('posted_data_info', "Empty request.")
|
|
227
|
+
elif self.request['postData']['text'].startswith('\x1f\uFFFD\x08'):
|
|
228
|
+
# b'\x1f\xef\xbf\xbd\x08', decoded to UTF-8
|
|
229
|
+
# => the replacement character
|
|
230
|
+
# https://www.cogsci.ed.ac.uk/~richard/utf-8.cgi?input=%EF%BF%BD&mode=char
|
|
231
|
+
self.logger.debug('Got a garbled gzipped POST blob.')
|
|
232
|
+
self.add_feature('posted_data_info', "It was a POSTed gzipped blob, but the data has been garbled.")
|
|
233
|
+
decoded_posted_data = self.request['postData']['text']
|
|
234
|
+
elif self.request['postData'].get('params'):
|
|
235
|
+
# NOTE 2025-08-08
|
|
236
|
+
# if the posted data mimetype is "application/x-www-form-urlencoded"
|
|
237
|
+
# the HAR contains the decoded entry in the params key
|
|
238
|
+
# The params key is a list of dicts with a key and a value
|
|
239
|
+
# {"name": <key>, "value": <data>}
|
|
240
|
+
# I'd rather have it as {<key>: <data>}
|
|
241
|
+
# TODO: some processing on the data part (it's often a json blob)
|
|
242
|
+
self.logger.debug('Got a params POST.')
|
|
243
|
+
decoded_posted_data = {entry['name']: entry['value'] for entry in self.request['postData']['params']}
|
|
244
|
+
self.add_feature('posted_data_info', "POST request as URL params.")
|
|
245
|
+
else:
|
|
246
|
+
self.logger.debug('Got a normal POST')
|
|
220
247
|
try:
|
|
221
|
-
|
|
248
|
+
# NOTE 2023-08-22: Blind attempt to base64 decode the data
|
|
249
|
+
decoded_posted_data = self._dirty_safe_b64decode(self.request['postData']['text'])
|
|
222
250
|
except binascii.Error:
|
|
223
|
-
decoded_posted_data =
|
|
251
|
+
decoded_posted_data = self.request['postData']['text']
|
|
224
252
|
if 'mimeType' in self.request['postData']:
|
|
225
253
|
# make it easier to compare.
|
|
226
254
|
mimetype_lower = self.request['postData']['mimeType'].lower()
|
|
227
255
|
if mimetype_lower.startswith('application/x-www-form-urlencoded'):
|
|
256
|
+
# NOTE: this should never happen as there should
|
|
257
|
+
# be something in self.request['postData']['params']
|
|
258
|
+
# and we already processed it before but just in case...
|
|
259
|
+
self.logger.debug('Got a application/x-www-form-urlencoded without params key')
|
|
228
260
|
# 100% sure there will be websites where decode will fail
|
|
229
261
|
try:
|
|
230
262
|
if isinstance(decoded_posted_data, bytes):
|
|
231
263
|
decoded_posted_data = decoded_posted_data.decode()
|
|
232
264
|
if isinstance(decoded_posted_data, str):
|
|
233
265
|
decoded_posted_data = unquote_plus(decoded_posted_data)
|
|
266
|
+
if isinstance(decoded_posted_data, str):
|
|
267
|
+
decoded_posted_data = parse_qs(decoded_posted_data)
|
|
268
|
+
self.add_feature('posted_data_info', "Successfully decoded POST request.")
|
|
234
269
|
except Exception as e:
|
|
235
|
-
self.logger.warning(f'Unable to unquote form data "{decoded_posted_data!r}": {e}')
|
|
270
|
+
self.logger.warning(f'Unable to unquote or parse form data "{decoded_posted_data!r}": {e}')
|
|
271
|
+
self.add_feature('posted_data_info', "Unable to decode POST request.")
|
|
236
272
|
elif (mimetype_lower.startswith('application/json')
|
|
237
273
|
or mimetype_lower.startswith('application/csp-report')
|
|
238
274
|
or mimetype_lower.startswith('application/x-amz-json-1.1')
|
|
239
|
-
or mimetype_lower.startswith('application/x-json-stream')
|
|
240
275
|
or mimetype_lower.startswith('application/reports+json')
|
|
276
|
+
or mimetype_lower.startswith('application/vnd.adobe.dc+json')
|
|
277
|
+
or mimetype_lower.startswith('application/ion+json')
|
|
241
278
|
or mimetype_lower.endswith('json')
|
|
242
279
|
):
|
|
243
280
|
if isinstance(decoded_posted_data, (str, bytes)):
|
|
@@ -245,56 +282,127 @@ class URLNode(HarTreeNode):
|
|
|
245
282
|
try:
|
|
246
283
|
# NOTE 2023-08-22: loads here may give us a int, float or a bool.
|
|
247
284
|
decoded_posted_data = json.loads(decoded_posted_data)
|
|
285
|
+
self.add_feature('posted_data_info', "Successfully decoded POST request.")
|
|
248
286
|
except Exception:
|
|
287
|
+
self.add_feature('posted_data_info', "Unable to decode POST request.")
|
|
249
288
|
if isinstance(decoded_posted_data, (str, bytes)):
|
|
250
|
-
self.logger.
|
|
289
|
+
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
|
|
251
290
|
else:
|
|
252
|
-
self.logger.
|
|
291
|
+
self.logger.warning(f"Expected json, got garbage: {mimetype_lower} - {decoded_posted_data}")
|
|
292
|
+
elif mimetype_lower.startswith('application/x-json-stream'):
|
|
293
|
+
try:
|
|
294
|
+
to_stream: StringIO | BytesIO
|
|
295
|
+
if isinstance(decoded_posted_data, str):
|
|
296
|
+
to_stream = StringIO(decoded_posted_data)
|
|
297
|
+
elif isinstance(decoded_posted_data, bytes):
|
|
298
|
+
to_stream = BytesIO(decoded_posted_data)
|
|
299
|
+
else:
|
|
300
|
+
raise ValueError(f'Invalid type: {type(decoded_posted_data)}')
|
|
301
|
+
streamed_data = json_stream.load(to_stream)
|
|
302
|
+
decoded_posted_data = json_stream.to_standard_types(streamed_data)
|
|
303
|
+
self.add_feature('posted_data_info', "Successfully decoded POST request.")
|
|
304
|
+
except Exception:
|
|
305
|
+
if isinstance(decoded_posted_data, (str, bytes)):
|
|
306
|
+
self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data[:20]!r}[...]")
|
|
307
|
+
else:
|
|
308
|
+
self.logger.warning(f"Expected json stream, got garbage: {mimetype_lower} - {decoded_posted_data}")
|
|
309
|
+
self.add_feature('posted_data_info', "Unable to decode POST request.")
|
|
310
|
+
elif mimetype_lower.startswith('multipart'):
|
|
311
|
+
self.add_feature('posted_data_info', f"Decoding {mimetype_lower} is partially supported.")
|
|
312
|
+
if isinstance(decoded_posted_data, str):
|
|
313
|
+
# must be encoded for decoding
|
|
314
|
+
multipart_to_decode = decoded_posted_data.encode()
|
|
315
|
+
elif isinstance(decoded_posted_data, bytes):
|
|
316
|
+
multipart_to_decode = decoded_posted_data
|
|
317
|
+
else:
|
|
318
|
+
raise ValueError(f'Invalid type for multipart POST: {type(decoded_posted_data)}')
|
|
319
|
+
if b"\r\n" not in multipart_to_decode:
|
|
320
|
+
# the decoder wants that
|
|
321
|
+
multipart_to_decode = multipart_to_decode.replace(b"\n", b"\r\n")
|
|
322
|
+
try:
|
|
323
|
+
multipart_data = decoder.MultipartDecoder(multipart_to_decode, mimetype_lower)
|
|
324
|
+
decoded_posted_data = []
|
|
325
|
+
for part in multipart_data.parts:
|
|
326
|
+
headers = {k.decode(): v.decode() for k, v in part.headers.items()}
|
|
327
|
+
content = part.text
|
|
328
|
+
decoded_posted_data.append({'headers': headers, 'content': content})
|
|
329
|
+
except Exception as e:
|
|
330
|
+
self.logger.warning(f'Unable to decode multipart POST: {e}')
|
|
331
|
+
self.add_feature('posted_data_info', "Unable to decode multipart in POST request.")
|
|
253
332
|
|
|
254
|
-
elif mimetype_lower.startswith('multipart/form-data'):
|
|
255
|
-
# FIXME multipart content (similar to email). Not totally sure what do do with it tight now.
|
|
256
|
-
pass
|
|
257
333
|
elif mimetype_lower.startswith('application/x-protobuf'):
|
|
258
334
|
# FIXME If possible, decode?
|
|
259
|
-
|
|
260
|
-
|
|
335
|
+
self.logger.debug(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
336
|
+
self.add_feature('posted_data_info', f"Decoding {mimetype_lower} is not supported yet.")
|
|
337
|
+
elif mimetype_lower.startswith('text') and isinstance(decoded_posted_data, (str, bytes)):
|
|
261
338
|
try:
|
|
262
339
|
# NOTE 2023-08-22: Quite a few text entries are in fact json, give it a shot.
|
|
263
340
|
# loads here may give us a int, float or a bool.
|
|
264
341
|
decoded_posted_data = json.loads(decoded_posted_data)
|
|
342
|
+
self.add_feature('posted_data_info', "Decoded JSON out of POST request.")
|
|
265
343
|
except Exception:
|
|
266
344
|
# keep it as it is otherwise.
|
|
267
345
|
pass
|
|
268
346
|
elif mimetype_lower.endswith('javascript'):
|
|
269
347
|
# keep it as it is
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
348
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
349
|
+
self.add_feature('posted_data_info', f"Pretty rendering of {mimetype_lower} is not supported yet.")
|
|
350
|
+
elif mimetype_lower in ['?', '*/*']:
|
|
351
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
352
|
+
self.add_feature('posted_data_info', f"Weird MimeType ({mimetype_lower}) is not supported yet.")
|
|
353
|
+
elif mimetype_lower == 'application/binary':
|
|
354
|
+
self.logger.warning(f'Got a POST {mimetype_lower}, not a broken gziped blob: {decoded_posted_data!r}')
|
|
355
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
356
|
+
elif mimetype_lower in ['application/octet-stream']:
|
|
275
357
|
# Should flag it, maybe?
|
|
276
|
-
|
|
277
|
-
|
|
358
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
359
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
360
|
+
elif mimetype_lower in ['application/grpc-web+proto']:
|
|
361
|
+
# Can be decoded?
|
|
362
|
+
self.logger.warning(f'Got a POST {mimetype_lower} - can be decoded: {decoded_posted_data!r}')
|
|
363
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
364
|
+
elif mimetype_lower in ['application/unknown']:
|
|
278
365
|
# Weird but already seen stuff
|
|
279
|
-
|
|
366
|
+
self.logger.warning(f'Got a POST {mimetype_lower}: {decoded_posted_data!r}')
|
|
367
|
+
self.add_feature('posted_data_info', f"MimeType ({mimetype_lower}) is not supported yet.")
|
|
280
368
|
else:
|
|
281
|
-
self.logger.warning(f'Unexpected mime type: {mimetype_lower}')
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
369
|
+
self.logger.warning(f'Unexpected mime type: {mimetype_lower} - {decoded_posted_data!r}')
|
|
370
|
+
self.add_feature('posted_data_info', f"Unexpected MimeType ({mimetype_lower}) is not supported yet.")
|
|
371
|
+
else:
|
|
372
|
+
self.logger.warning(f'Missing mimetype in POST: {self.request["postData"]}')
|
|
373
|
+
self.add_feature('posted_data_info', "Missing MimeType, not sure what to do.")
|
|
374
|
+
|
|
375
|
+
# NOTE 2023-08-22: Blind attempt to process the data as json
|
|
376
|
+
if decoded_posted_data and isinstance(decoded_posted_data, (str, bytes)):
|
|
377
|
+
try:
|
|
378
|
+
decoded_posted_data = json.loads(decoded_posted_data)
|
|
379
|
+
except Exception:
|
|
380
|
+
pass
|
|
381
|
+
|
|
382
|
+
if decoded_posted_data and isinstance(decoded_posted_data, bytes):
|
|
383
|
+
# NOTE 2023-08-22: Blind attempt to decode the bytes
|
|
384
|
+
# Try to decode it as utf-8
|
|
385
|
+
try:
|
|
386
|
+
decoded_posted_data = decoded_posted_data.decode('utf-8')
|
|
387
|
+
except Exception:
|
|
388
|
+
pass
|
|
389
|
+
|
|
390
|
+
self.add_feature('posted_data', decoded_posted_data)
|
|
391
|
+
if 'postData' in self.request and self.request['postData'].get('mimeType'):
|
|
392
|
+
self.add_feature('posted_data_mimetype', self.request['postData']['mimeType'])
|
|
393
|
+
# Get size, post decode.
|
|
394
|
+
if not decoded_posted_data:
|
|
395
|
+
# empty or None, set to 0
|
|
396
|
+
self.add_feature('posted_data_size', 0)
|
|
397
|
+
elif isinstance(decoded_posted_data, (list, dict)):
|
|
398
|
+
# set size to the json dump
|
|
399
|
+
self.add_feature('posted_data_size', len(json.dumps(decoded_posted_data)))
|
|
400
|
+
elif isinstance(decoded_posted_data, (str, bytes)):
|
|
401
|
+
# length
|
|
402
|
+
self.add_feature('posted_data_size', len(decoded_posted_data))
|
|
403
|
+
else:
|
|
404
|
+
# Stringify and len
|
|
405
|
+
self.add_feature('posted_data_size', len(str(decoded_posted_data)))
|
|
298
406
|
|
|
299
407
|
self.add_feature('response', har_entry['response'])
|
|
300
408
|
try:
|
|
@@ -377,6 +485,7 @@ class URLNode(HarTreeNode):
|
|
|
377
485
|
|
|
378
486
|
# Common JS redirect we can catch easily
|
|
379
487
|
# NOTE: it is extremely fragile and doesn't work very often but is kinda better than nothing.
|
|
488
|
+
# NOTE 2025-08-30: Also, finding that doesn't mean it is in a part of the code that is executed without user interaction. It can be triggered after a user fills a form for example.
|
|
380
489
|
# Source: https://stackoverflow.com/questions/13363174/regular-expression-to-catch-as-many-javascript-redirections-as-possible
|
|
381
490
|
regex = re.compile(br"""((location.href)|(window.location)|(location.replace)|(location.assign))(( ?= ?)|( ?\( ?))("|')([^'"]*)("|')( ?\) ?)?;""", re.I)
|
|
382
491
|
matches = re.findall(regex, self.body.getvalue())
|
|
@@ -478,7 +587,7 @@ class URLNode(HarTreeNode):
|
|
|
478
587
|
return href
|
|
479
588
|
|
|
480
589
|
if not hasattr(self, 'rendered_html') or not self.rendered_html:
|
|
481
|
-
raise Har2TreeError('Not the node of a page rendered, invalid request.')
|
|
590
|
+
raise Har2TreeError(f'Not the node of a page rendered ({self.uuid}), invalid request.')
|
|
482
591
|
urls: set[str] = set()
|
|
483
592
|
|
|
484
593
|
# The simple ones: the links.
|
|
@@ -1,36 +1,38 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: har2tree
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.36.0
|
|
4
4
|
Summary: HTTP Archive (HAR) to ETE Toolkit generator
|
|
5
|
-
License: BSD-3-Clause
|
|
5
|
+
License-Expression: BSD-3-Clause
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Raphaël Vinot
|
|
7
8
|
Author-email: raphael.vinot@circl.lu
|
|
8
|
-
Requires-Python: >=3.
|
|
9
|
+
Requires-Python: >=3.10,<3.15
|
|
9
10
|
Classifier: Intended Audience :: Information Technology
|
|
10
11
|
Classifier: Intended Audience :: Science/Research
|
|
11
12
|
Classifier: Intended Audience :: Telecommunications Industry
|
|
12
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
13
13
|
Classifier: Operating System :: POSIX :: Linux
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
20
|
Classifier: Topic :: Internet
|
|
21
21
|
Classifier: Topic :: Security
|
|
22
22
|
Provides-Extra: docs
|
|
23
|
-
Requires-Dist: Sphinx (>=
|
|
24
|
-
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.
|
|
23
|
+
Requires-Dist: Sphinx (>=9.0.4) ; (python_version >= "3.11") and (extra == "docs")
|
|
24
|
+
Requires-Dist: beautifulsoup4[charset-normalizer,lxml] (>=4.14.3)
|
|
25
25
|
Requires-Dist: ete3 (>=3.1.3)
|
|
26
26
|
Requires-Dist: filetype (>=1.2.0)
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist: numpy (>=2.3
|
|
31
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: json-stream (>=2.3.3,<3.0.0)
|
|
28
|
+
Requires-Dist: legacy-cgi (>=2.6.4) ; python_version >= "3.13" and python_version < "4.0"
|
|
29
|
+
Requires-Dist: multipart (>=1.3.0,<2.0.0)
|
|
30
|
+
Requires-Dist: numpy (>=2.2,<2.3) ; python_version < "3.11"
|
|
31
|
+
Requires-Dist: numpy (>=2.3.5) ; python_version >= "3.11" and python_version < "3.15"
|
|
32
|
+
Requires-Dist: publicsuffixlist (>=1.0.2.20251209)
|
|
33
|
+
Requires-Dist: requests-toolbelt (>=1.0.0,<2.0.0)
|
|
32
34
|
Requires-Dist: six (>=1.17.0) ; extra == "docs"
|
|
33
|
-
Requires-Dist: tinycss2 (>=1.
|
|
35
|
+
Requires-Dist: tinycss2 (>=1.5.1)
|
|
34
36
|
Requires-Dist: w3lib (>=2.3.1)
|
|
35
37
|
Project-URL: Documentation, https://har2tree.readthedocs.io/en/latest/
|
|
36
38
|
Project-URL: Repository, https://github.com/Lookyloo/har2tree
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
har2tree/__init__.py,sha256=Na3mxHkUBq3rzYbxiLNJF37DxH5mcghSorjzXw5Teug,422
|
|
2
|
+
har2tree/har2tree.py,sha256=24Puk4dlDXWOVIAPV7SIXNoP-oP-_7ERH2mZPxXiwn8,52762
|
|
3
|
+
har2tree/helper.py,sha256=ktX5Fq-K_t4r0VVAXIH4uy7xc-qCjtSaiUvkX_PYxhw,20737
|
|
4
|
+
har2tree/nodes.py,sha256=QWKqEUnuW7J6pASVvzwWAQNqL-_KDzSs2ld6uJl3qbw,37710
|
|
5
|
+
har2tree/parser.py,sha256=4yej1OcVYAIiLfzYZsO9WCw3WyM_ykDTuvpW7UO1ROE,3645
|
|
6
|
+
har2tree/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
har2tree-1.36.0.dist-info/METADATA,sha256=jjZ2lxWFYv19ZpFL_1ehSNbUNLqEePYGNmLOABeUdnM,2240
|
|
8
|
+
har2tree-1.36.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
9
|
+
har2tree-1.36.0.dist-info/licenses/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
|
|
10
|
+
har2tree-1.36.0.dist-info/RECORD,,
|
har2tree-1.31.3.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
har2tree/__init__.py,sha256=Na3mxHkUBq3rzYbxiLNJF37DxH5mcghSorjzXw5Teug,422
|
|
2
|
-
har2tree/har2tree.py,sha256=47x9X5tY69f9SXkYJgJsnAaX2kxgXHgzFThGz6M86Zw,44495
|
|
3
|
-
har2tree/helper.py,sha256=CgeXqfBeHs8SbkW7TRNKqJBTZLAu63KggQjbGHCZAGI,20681
|
|
4
|
-
har2tree/nodes.py,sha256=CC3NseEaM455JOpPqjfTAQ-dwWiGWmzlceGSSeTwoRo,28951
|
|
5
|
-
har2tree/parser.py,sha256=4yej1OcVYAIiLfzYZsO9WCw3WyM_ykDTuvpW7UO1ROE,3645
|
|
6
|
-
har2tree/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
har2tree-1.31.3.dist-info/LICENSE,sha256=Xa4EVROgJsEo10CW-ISCRiw0TtqdKz1JuM3BBLBM55c,1803
|
|
8
|
-
har2tree-1.31.3.dist-info/METADATA,sha256=PSDu0bnPUYje8It-uyZfcDVbo4TTTC7RCzH-2CRAc0U,2112
|
|
9
|
-
har2tree-1.31.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
10
|
-
har2tree-1.31.3.dist-info/RECORD,,
|
|
File without changes
|