scythe-ttp 0.12.4__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scythe-ttp might be problematic. Click here for more details.
- scythe/auth/__init__.py +3 -1
- scythe/auth/base.py +9 -0
- scythe/auth/cookie_jwt.py +172 -0
- scythe/cli/__init__.py +3 -0
- scythe/cli/main.py +601 -0
- scythe/core/headers.py +69 -9
- scythe/journeys/__init__.py +2 -1
- scythe/journeys/actions.py +235 -1
- scythe/journeys/base.py +161 -12
- scythe/journeys/executor.py +102 -22
- scythe/ttps/web/uuid_guessing.py +3 -2
- {scythe_ttp-0.12.4.dist-info → scythe_ttp-0.14.0.dist-info}/METADATA +84 -16
- {scythe_ttp-0.12.4.dist-info → scythe_ttp-0.14.0.dist-info}/RECORD +17 -13
- scythe_ttp-0.14.0.dist-info/entry_points.txt +2 -0
- {scythe_ttp-0.12.4.dist-info → scythe_ttp-0.14.0.dist-info}/WHEEL +0 -0
- {scythe_ttp-0.12.4.dist-info → scythe_ttp-0.14.0.dist-info}/licenses/LICENSE +0 -0
- {scythe_ttp-0.12.4.dist-info → scythe_ttp-0.14.0.dist-info}/top_level.txt +0 -0
scythe/core/headers.py
CHANGED
|
@@ -19,6 +19,51 @@ class HeaderExtractor:
|
|
|
19
19
|
def __init__(self):
|
|
20
20
|
self.logger = logging.getLogger("HeaderExtractor")
|
|
21
21
|
|
|
22
|
+
@staticmethod
|
|
23
|
+
def _normalize_url(url: str) -> str:
|
|
24
|
+
"""Ensure the URL has a scheme so requests can handle it."""
|
|
25
|
+
if not isinstance(url, str):
|
|
26
|
+
return url
|
|
27
|
+
lower = url.lower().strip()
|
|
28
|
+
if lower.startswith("http://") or lower.startswith("https://"):
|
|
29
|
+
return url
|
|
30
|
+
return f"http://{url}"
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _is_static_asset(url: str, headers: Optional[Dict[str, Any]] = None) -> bool:
|
|
34
|
+
"""Heuristically determine if a URL/log entry is a static asset (css/js/image/font/etc.)."""
|
|
35
|
+
try:
|
|
36
|
+
if not isinstance(url, str):
|
|
37
|
+
return False
|
|
38
|
+
u = url.lower()
|
|
39
|
+
# Common static file extensions
|
|
40
|
+
static_exts = (
|
|
41
|
+
'.css', '.js', '.mjs', '.map', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico',
|
|
42
|
+
'.woff', '.woff2', '.ttf', '.otf', '.eot', '.webp', '.mp4', '.webm', '.mp3', '.wav'
|
|
43
|
+
)
|
|
44
|
+
if any(u.endswith(ext) for ext in static_exts):
|
|
45
|
+
return True
|
|
46
|
+
if '/static/' in u or '/assets/' in u:
|
|
47
|
+
return True
|
|
48
|
+
# Content-Type hint
|
|
49
|
+
if isinstance(headers, dict):
|
|
50
|
+
# case-insensitive lookup
|
|
51
|
+
ctype = None
|
|
52
|
+
for k, v in headers.items():
|
|
53
|
+
if isinstance(k, str) and k.lower() == 'content-type':
|
|
54
|
+
ctype = str(v).lower()
|
|
55
|
+
break
|
|
56
|
+
if ctype and (ctype.startswith('text/css') or
|
|
57
|
+
ctype.startswith('application/javascript') or
|
|
58
|
+
ctype.startswith('text/javascript') or
|
|
59
|
+
ctype.startswith('image/') or
|
|
60
|
+
ctype.startswith('font/')):
|
|
61
|
+
return True
|
|
62
|
+
except Exception:
|
|
63
|
+
# Be safe: if unsure, do not classify as static
|
|
64
|
+
return False
|
|
65
|
+
return False
|
|
66
|
+
|
|
22
67
|
@staticmethod
|
|
23
68
|
def enable_logging_for_driver(chrome_options: Options) -> None:
|
|
24
69
|
"""
|
|
@@ -50,13 +95,14 @@ class HeaderExtractor:
|
|
|
50
95
|
Version string if header found, None otherwise
|
|
51
96
|
"""
|
|
52
97
|
try:
|
|
53
|
-
self.
|
|
98
|
+
norm_url = self._normalize_url(url)
|
|
99
|
+
self.logger.debug(f"Making {method} request to {norm_url} for header extraction")
|
|
54
100
|
|
|
55
101
|
# Use HEAD by default for efficiency, fallback to GET if needed
|
|
56
102
|
if method.upper() == "HEAD":
|
|
57
|
-
response = requests.head(
|
|
103
|
+
response = requests.head(norm_url, timeout=timeout, allow_redirects=True)
|
|
58
104
|
else:
|
|
59
|
-
response = requests.get(
|
|
105
|
+
response = requests.get(norm_url, timeout=timeout, allow_redirects=True)
|
|
60
106
|
|
|
61
107
|
# Check if request was successful
|
|
62
108
|
response.raise_for_status()
|
|
@@ -71,7 +117,8 @@ class HeaderExtractor:
|
|
|
71
117
|
return None
|
|
72
118
|
|
|
73
119
|
except requests.exceptions.RequestException as e:
|
|
74
|
-
|
|
120
|
+
hint = " (tip: include http:// or https://)" if isinstance(url, str) and not url.lower().startswith(("http://","https://")) else ""
|
|
121
|
+
self.logger.warning(f"Failed to make {method} request to {url}: {e}{hint}")
|
|
75
122
|
return None
|
|
76
123
|
except Exception as e:
|
|
77
124
|
self.logger.warning(f"Unexpected error during banner grab: {e}")
|
|
@@ -90,12 +137,13 @@ class HeaderExtractor:
|
|
|
90
137
|
Dictionary of all response headers
|
|
91
138
|
"""
|
|
92
139
|
try:
|
|
93
|
-
self.
|
|
140
|
+
norm_url = self._normalize_url(url)
|
|
141
|
+
self.logger.debug(f"Making {method} request to {norm_url} for all headers")
|
|
94
142
|
|
|
95
143
|
if method.upper() == "HEAD":
|
|
96
|
-
response = requests.head(
|
|
144
|
+
response = requests.head(norm_url, timeout=timeout, allow_redirects=True)
|
|
97
145
|
else:
|
|
98
|
-
response = requests.get(
|
|
146
|
+
response = requests.get(norm_url, timeout=timeout, allow_redirects=True)
|
|
99
147
|
|
|
100
148
|
response.raise_for_status()
|
|
101
149
|
|
|
@@ -103,7 +151,8 @@ class HeaderExtractor:
|
|
|
103
151
|
return {k: str(v) for k, v in response.headers.items()}
|
|
104
152
|
|
|
105
153
|
except requests.exceptions.RequestException as e:
|
|
106
|
-
|
|
154
|
+
hint = " (tip: include http:// or https://)" if isinstance(url, str) and not url.lower().startswith(("http://","https://")) else ""
|
|
155
|
+
self.logger.warning(f"Failed to get headers from {url}: {e}{hint}")
|
|
107
156
|
return {}
|
|
108
157
|
except Exception as e:
|
|
109
158
|
self.logger.warning(f"Unexpected error getting headers: {e}")
|
|
@@ -179,7 +228,11 @@ class HeaderExtractor:
|
|
|
179
228
|
self.logger.debug(f"Successfully extracted version '{version}' via banner grab")
|
|
180
229
|
return version
|
|
181
230
|
else:
|
|
182
|
-
self.logger.debug("Banner grab failed
|
|
231
|
+
self.logger.debug("Banner grab failed")
|
|
232
|
+
|
|
233
|
+
# In API mode (no driver), do not fall back to Selenium to avoid noisy warnings
|
|
234
|
+
if driver is None:
|
|
235
|
+
return None
|
|
183
236
|
|
|
184
237
|
# Fall back to Selenium performance logs
|
|
185
238
|
self.logger.debug("Using Selenium performance logs method")
|
|
@@ -223,6 +276,13 @@ class HeaderExtractor:
|
|
|
223
276
|
if target_url and target_url not in response_url:
|
|
224
277
|
continue
|
|
225
278
|
|
|
279
|
+
# Ignore static assets (css/js/images/fonts) to avoid false detections/noise
|
|
280
|
+
try:
|
|
281
|
+
if self._is_static_asset(response_url, headers):
|
|
282
|
+
continue
|
|
283
|
+
except Exception:
|
|
284
|
+
pass
|
|
285
|
+
|
|
226
286
|
# Look for the version header (case-insensitive)
|
|
227
287
|
version = self._find_version_header(headers)
|
|
228
288
|
if version:
|
scythe/journeys/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ or other custom actions like navigation, form filling, etc.
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from .base import Journey, Step, Action
|
|
10
|
-
from .actions import NavigateAction, ClickAction, FillFormAction, WaitAction, TTPAction
|
|
10
|
+
from .actions import NavigateAction, ClickAction, FillFormAction, WaitAction, TTPAction, ApiRequestAction
|
|
11
11
|
from .executor import JourneyExecutor
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
@@ -19,5 +19,6 @@ __all__ = [
|
|
|
19
19
|
'FillFormAction',
|
|
20
20
|
'WaitAction',
|
|
21
21
|
'TTPAction',
|
|
22
|
+
'ApiRequestAction',
|
|
22
23
|
'JourneyExecutor'
|
|
23
24
|
]
|
scythe/journeys/actions.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import time
|
|
2
|
+
import logging
|
|
2
3
|
from typing import Dict, Any, Optional
|
|
3
4
|
from selenium.webdriver.remote.webdriver import WebDriver
|
|
4
5
|
from selenium.webdriver.common.by import By
|
|
5
6
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
6
7
|
from selenium.webdriver.support import expected_conditions as EC
|
|
7
8
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
|
9
|
+
import requests
|
|
8
10
|
|
|
9
11
|
from .base import Action
|
|
10
12
|
from ..core.ttp import TTP
|
|
@@ -643,4 +645,236 @@ class AssertAction(Action):
|
|
|
643
645
|
by_method = selector_map.get(self.selector_type)
|
|
644
646
|
if by_method is None:
|
|
645
647
|
raise ValueError(f"Unsupported selector type: {self.selector_type}")
|
|
646
|
-
return by_method
|
|
648
|
+
return by_method
|
|
649
|
+
|
|
650
|
+
class ApiRequestAction(Action):
|
|
651
|
+
"""Action to perform a REST API request in Journey API mode.
|
|
652
|
+
|
|
653
|
+
This action ignores the WebDriver and uses a requests.Session provided
|
|
654
|
+
in the journey context under the key 'requests_session'. It merges any
|
|
655
|
+
'auth_headers' from the context into the request headers.
|
|
656
|
+
Optionally, it can validate and parse the JSON response using a Pydantic
|
|
657
|
+
model class when provided.
|
|
658
|
+
"""
|
|
659
|
+
def __init__(self,
|
|
660
|
+
method: str,
|
|
661
|
+
url: str,
|
|
662
|
+
params: Optional[Dict[str, Any]] = None,
|
|
663
|
+
body_json: Optional[Dict[str, Any]] = None,
|
|
664
|
+
data: Optional[Dict[str, Any]] = None,
|
|
665
|
+
headers: Optional[Dict[str, str]] = None,
|
|
666
|
+
expected_status: Optional[int] = 200,
|
|
667
|
+
timeout: float = 10.0,
|
|
668
|
+
name: Optional[str] = None,
|
|
669
|
+
description: Optional[str] = None,
|
|
670
|
+
expected_result: bool = True,
|
|
671
|
+
response_model: Optional[Any] = None,
|
|
672
|
+
response_model_context_key: Optional[str] = None,
|
|
673
|
+
fail_on_validation_error: bool = False):
|
|
674
|
+
self.method = method.upper()
|
|
675
|
+
self.url = url
|
|
676
|
+
self.params = params or {}
|
|
677
|
+
self.body_json = body_json
|
|
678
|
+
self.data = data
|
|
679
|
+
self.headers = headers or {}
|
|
680
|
+
self.expected_status = expected_status
|
|
681
|
+
self.timeout = timeout
|
|
682
|
+
self.response_model = response_model
|
|
683
|
+
self.response_model_context_key = response_model_context_key
|
|
684
|
+
self.fail_on_validation_error = fail_on_validation_error
|
|
685
|
+
name = name or f"API {self.method} {url}"
|
|
686
|
+
description = description or f"Perform {self.method} request to {url}"
|
|
687
|
+
super().__init__(name, description, expected_result)
|
|
688
|
+
|
|
689
|
+
def execute(self, driver: WebDriver, context: Dict[str, Any]) -> bool:
|
|
690
|
+
# Resolve session
|
|
691
|
+
session = context.get('requests_session')
|
|
692
|
+
if session is None:
|
|
693
|
+
session = requests.Session()
|
|
694
|
+
context['requests_session'] = session
|
|
695
|
+
|
|
696
|
+
# Build headers: auth headers from context + action headers (action overrides)
|
|
697
|
+
final_headers = {}
|
|
698
|
+
auth_headers = context.get('auth_headers', {}) or {}
|
|
699
|
+
if auth_headers:
|
|
700
|
+
final_headers.update(auth_headers)
|
|
701
|
+
if self.headers:
|
|
702
|
+
final_headers.update(self.headers)
|
|
703
|
+
|
|
704
|
+
# Simple masking for sensitive headers
|
|
705
|
+
def _mask_headers(headers: Dict[str, Any]) -> Dict[str, Any]:
|
|
706
|
+
masked = {}
|
|
707
|
+
for k, v in (headers or {}).items():
|
|
708
|
+
if k is None:
|
|
709
|
+
continue
|
|
710
|
+
key_lower = str(k).lower()
|
|
711
|
+
if key_lower in {"authorization", "proxy-authorization", "cookie", "set-cookie", "x-api-key", "x-auth-token"}:
|
|
712
|
+
masked[k] = "***"
|
|
713
|
+
else:
|
|
714
|
+
masked[k] = v
|
|
715
|
+
return masked
|
|
716
|
+
|
|
717
|
+
# Resolve URL: absolute or join with target_url from context
|
|
718
|
+
from urllib.parse import urljoin
|
|
719
|
+
from ..core.headers import HeaderExtractor
|
|
720
|
+
base_url = context.get('target_url') or ''
|
|
721
|
+
# Ensure base_url has a scheme so urljoin works with relative paths
|
|
722
|
+
if isinstance(base_url, str) and base_url and not base_url.lower().startswith(('http://', 'https://')):
|
|
723
|
+
base_url = HeaderExtractor._normalize_url(base_url)
|
|
724
|
+
if isinstance(self.url, str) and self.url.lower().startswith('http'):
|
|
725
|
+
resolved_url = self.url
|
|
726
|
+
else:
|
|
727
|
+
resolved_url = urljoin(base_url, self.url)
|
|
728
|
+
|
|
729
|
+
# Store request details early
|
|
730
|
+
self.store_result('request_method', self.method)
|
|
731
|
+
self.store_result('url', resolved_url)
|
|
732
|
+
if self.params:
|
|
733
|
+
self.store_result('request_params', self.params)
|
|
734
|
+
if self.body_json is not None:
|
|
735
|
+
self.store_result('request_json', self.body_json)
|
|
736
|
+
if self.data is not None:
|
|
737
|
+
self.store_result('request_data', self.data)
|
|
738
|
+
self.store_result('request_headers', _mask_headers(final_headers))
|
|
739
|
+
|
|
740
|
+
logger = logging.getLogger("Journey.ApiRequestAction")
|
|
741
|
+
# Honor any pending rate-limit resume time set by previous actions/steps
|
|
742
|
+
try:
|
|
743
|
+
resume_at = context.get('rate_limit_resume_at')
|
|
744
|
+
now = time.time()
|
|
745
|
+
if isinstance(resume_at, (int, float)) and resume_at > now:
|
|
746
|
+
wait_s = min(resume_at - now, 30)
|
|
747
|
+
if wait_s > 0:
|
|
748
|
+
self.store_result('waited_ms_before_request', int(wait_s * 1000))
|
|
749
|
+
try:
|
|
750
|
+
logger.info(f"Delaying {wait_s:.2f}s due to prior rate limit (resume_at)")
|
|
751
|
+
except Exception:
|
|
752
|
+
pass
|
|
753
|
+
time.sleep(wait_s)
|
|
754
|
+
except Exception:
|
|
755
|
+
pass
|
|
756
|
+
|
|
757
|
+
def _h(headers: Dict[str, Any], name: str):
|
|
758
|
+
lname = (name or '').lower()
|
|
759
|
+
for k, v in (headers or {}).items():
|
|
760
|
+
try:
|
|
761
|
+
if isinstance(k, str) and k.lower() == lname:
|
|
762
|
+
return v
|
|
763
|
+
except Exception:
|
|
764
|
+
continue
|
|
765
|
+
return None
|
|
766
|
+
|
|
767
|
+
attempts = 2 # at most one retry on 429 for idempotent methods
|
|
768
|
+
last_exception = None
|
|
769
|
+
for attempt in range(attempts):
|
|
770
|
+
start_ts = time.time()
|
|
771
|
+
try:
|
|
772
|
+
response = session.request(
|
|
773
|
+
self.method,
|
|
774
|
+
resolved_url,
|
|
775
|
+
params=self.params or None,
|
|
776
|
+
json=self.body_json,
|
|
777
|
+
data=self.data,
|
|
778
|
+
headers=final_headers or None,
|
|
779
|
+
timeout=self.timeout,
|
|
780
|
+
)
|
|
781
|
+
duration_ms = int((time.time() - start_ts) * 1000)
|
|
782
|
+
self.store_result('duration_ms', duration_ms)
|
|
783
|
+
|
|
784
|
+
# Store details
|
|
785
|
+
status_code = getattr(response, 'status_code', None)
|
|
786
|
+
self.store_result('status_code', status_code)
|
|
787
|
+
response_headers = dict(getattr(response, 'headers', {}) or {})
|
|
788
|
+
# Mask sensitive response headers
|
|
789
|
+
self.store_result('response_headers', _mask_headers(response_headers))
|
|
790
|
+
# Publish to context for downstream version extraction and rate-limit coordination
|
|
791
|
+
context['last_response_headers'] = response_headers
|
|
792
|
+
context['last_response_url'] = resolved_url
|
|
793
|
+
|
|
794
|
+
# Parse rate-limit headers
|
|
795
|
+
try:
|
|
796
|
+
# Normalize numeric values
|
|
797
|
+
remaining = _h(response_headers, 'X-RateLimit-Remaining')
|
|
798
|
+
if remaining is None:
|
|
799
|
+
remaining = _h(response_headers, 'X-Ratelimit-Remaining')
|
|
800
|
+
reset = _h(response_headers, 'X-RateLimit-Reset')
|
|
801
|
+
if reset is None:
|
|
802
|
+
reset = _h(response_headers, 'X-Ratelimit-Reset')
|
|
803
|
+
retry_after = _h(response_headers, 'Retry-After')
|
|
804
|
+
|
|
805
|
+
# If explicit Retry-After or 429, set resume time and optionally retry
|
|
806
|
+
if status_code == 429:
|
|
807
|
+
wait_s = 0
|
|
808
|
+
try:
|
|
809
|
+
wait_s = int(str(retry_after).strip()) if retry_after is not None else 1
|
|
810
|
+
except Exception:
|
|
811
|
+
wait_s = 1
|
|
812
|
+
wait_s = max(1, min(wait_s, 30))
|
|
813
|
+
context['rate_limit_resume_at'] = time.time() + wait_s
|
|
814
|
+
self.store_result('rate_limit_wait_s', wait_s)
|
|
815
|
+
if attempt == 0 and self.method in {'GET', 'HEAD', 'OPTIONS'}:
|
|
816
|
+
try:
|
|
817
|
+
logger.info(f"Hit 429 Too Many Requests; backing off {wait_s}s and retrying once")
|
|
818
|
+
except Exception:
|
|
819
|
+
pass
|
|
820
|
+
time.sleep(wait_s)
|
|
821
|
+
continue # retry once
|
|
822
|
+
else:
|
|
823
|
+
# If remaining == 0 and reset provided, set resume time
|
|
824
|
+
try:
|
|
825
|
+
if remaining is not None and str(remaining).strip() == '0' and reset is not None:
|
|
826
|
+
wait_s2 = int(str(reset).strip())
|
|
827
|
+
if wait_s2 > 0:
|
|
828
|
+
context['rate_limit_resume_at'] = time.time() + min(wait_s2, 30)
|
|
829
|
+
except Exception:
|
|
830
|
+
pass
|
|
831
|
+
except Exception:
|
|
832
|
+
pass
|
|
833
|
+
|
|
834
|
+
# Try JSON, fallback to text
|
|
835
|
+
parsed_model = None
|
|
836
|
+
try:
|
|
837
|
+
body = response.json()
|
|
838
|
+
self.store_result('response_json', body)
|
|
839
|
+
# If a response_model is provided, attempt validation/parsing
|
|
840
|
+
if self.response_model is not None:
|
|
841
|
+
try:
|
|
842
|
+
# Pydantic v2 preferred: model_validate
|
|
843
|
+
if hasattr(self.response_model, 'model_validate'):
|
|
844
|
+
parsed_model = self.response_model.model_validate(body)
|
|
845
|
+
else:
|
|
846
|
+
# Pydantic v1 fallback
|
|
847
|
+
parsed_model = self.response_model.parse_obj(body)
|
|
848
|
+
self.store_result('response_model_instance', parsed_model)
|
|
849
|
+
# Save into context for downstream actions
|
|
850
|
+
key = self.response_model_context_key or 'last_response_model'
|
|
851
|
+
context[key] = parsed_model
|
|
852
|
+
except Exception as ve:
|
|
853
|
+
self.store_result('response_validation_error', str(ve))
|
|
854
|
+
except Exception:
|
|
855
|
+
text = getattr(response, 'text', '')
|
|
856
|
+
# Limit stored text to keep logs light
|
|
857
|
+
if text is not None and isinstance(text, str):
|
|
858
|
+
self.store_result('response_text', text if len(text) <= 2000 else text[:2000])
|
|
859
|
+
else:
|
|
860
|
+
self.store_result('response_text', text)
|
|
861
|
+
|
|
862
|
+
# Determine success (status-based by default)
|
|
863
|
+
if self.expected_status is not None:
|
|
864
|
+
http_ok = (getattr(response, 'status_code', None) == self.expected_status)
|
|
865
|
+
else:
|
|
866
|
+
http_ok = bool(getattr(response, 'ok', False))
|
|
867
|
+
|
|
868
|
+
# Optionally fail on validation error
|
|
869
|
+
if self.response_model is not None and self.fail_on_validation_error and self.get_result('response_validation_error'):
|
|
870
|
+
return False
|
|
871
|
+
|
|
872
|
+
return http_ok
|
|
873
|
+
except Exception as e:
|
|
874
|
+
last_exception = e
|
|
875
|
+
self.store_result('duration_ms', int((time.time() - start_ts) * 1000))
|
|
876
|
+
self.store_result('error', str(e))
|
|
877
|
+
break
|
|
878
|
+
|
|
879
|
+
# If we got here and had an exception or no return, fail
|
|
880
|
+
return False
|
scythe/journeys/base.py
CHANGED
|
@@ -102,7 +102,7 @@ class Step:
|
|
|
102
102
|
"""Add an action to this step."""
|
|
103
103
|
self.actions.append(action)
|
|
104
104
|
|
|
105
|
-
def execute(self, driver: WebDriver, context: Dict[str, Any]) -> bool:
|
|
105
|
+
def execute(self, driver: WebDriver|None, context: Dict[str, Any]) -> bool:
|
|
106
106
|
"""
|
|
107
107
|
Execute all actions in this step.
|
|
108
108
|
|
|
@@ -111,7 +111,7 @@ class Step:
|
|
|
111
111
|
context: Shared context data
|
|
112
112
|
|
|
113
113
|
Returns:
|
|
114
|
-
True if step succeeded, False otherwise
|
|
114
|
+
True if a step succeeded, False otherwise
|
|
115
115
|
"""
|
|
116
116
|
logger = logging.getLogger(f"Journey.Step.{self.name}")
|
|
117
117
|
logger.info(f"Executing step: {self.name}")
|
|
@@ -136,12 +136,14 @@ class Step:
|
|
|
136
136
|
result = action.execute(driver, context)
|
|
137
137
|
|
|
138
138
|
# Store result
|
|
139
|
+
details = getattr(action, 'execution_data', {})
|
|
139
140
|
action_result = {
|
|
140
141
|
'action_name': action.name,
|
|
141
142
|
'action_description': action.description,
|
|
142
143
|
'expected': action.expected_result,
|
|
143
144
|
'actual': result,
|
|
144
|
-
'timestamp': time.time()
|
|
145
|
+
'timestamp': time.time(),
|
|
146
|
+
'details': details.copy() if isinstance(details, dict) else {}
|
|
145
147
|
}
|
|
146
148
|
self.execution_results.append(action_result)
|
|
147
149
|
|
|
@@ -156,6 +158,47 @@ class Step:
|
|
|
156
158
|
else:
|
|
157
159
|
if action.expected_result:
|
|
158
160
|
logger.error(f"✗ Action failed: {action.name}")
|
|
161
|
+
# Emit diagnostic details when available (e.g., for API requests)
|
|
162
|
+
try:
|
|
163
|
+
ad = action_result.get('details', {}) or {}
|
|
164
|
+
method = ad.get('request_method') or getattr(action, 'method', None)
|
|
165
|
+
url = ad.get('url') or getattr(action, 'url', None)
|
|
166
|
+
status = ad.get('status_code')
|
|
167
|
+
dur = ad.get('duration_ms')
|
|
168
|
+
if method or url or status is not None:
|
|
169
|
+
parts = []
|
|
170
|
+
if method:
|
|
171
|
+
parts.append(f"method={method}")
|
|
172
|
+
if url:
|
|
173
|
+
parts.append(f"url={url}")
|
|
174
|
+
if status is not None:
|
|
175
|
+
parts.append(f"status={status}")
|
|
176
|
+
if dur is not None:
|
|
177
|
+
parts.append(f"duration_ms={dur}")
|
|
178
|
+
logger.error(" Details: " + ", ".join(parts))
|
|
179
|
+
req_headers = ad.get('request_headers')
|
|
180
|
+
if req_headers:
|
|
181
|
+
logger.error(f" Request headers: {req_headers}")
|
|
182
|
+
req_params = ad.get('request_params')
|
|
183
|
+
if req_params:
|
|
184
|
+
logger.error(f" Request params: {req_params}")
|
|
185
|
+
req_json = ad.get('request_json')
|
|
186
|
+
if req_json is not None:
|
|
187
|
+
logger.error(f" Request JSON: {req_json}")
|
|
188
|
+
req_data = ad.get('request_data')
|
|
189
|
+
if req_data is not None:
|
|
190
|
+
logger.error(f" Request data: {req_data}")
|
|
191
|
+
resp_headers = ad.get('response_headers')
|
|
192
|
+
if resp_headers:
|
|
193
|
+
logger.error(f" Response headers: {resp_headers}")
|
|
194
|
+
if 'response_json' in ad:
|
|
195
|
+
logger.error(f" Response JSON: {ad.get('response_json')}")
|
|
196
|
+
elif 'response_text' in ad:
|
|
197
|
+
logger.error(f" Response text: {ad.get('response_text')}")
|
|
198
|
+
if ad.get('error'):
|
|
199
|
+
logger.error(f" Error: {ad.get('error')}")
|
|
200
|
+
except Exception:
|
|
201
|
+
pass
|
|
159
202
|
failure_count += 1
|
|
160
203
|
if not self.continue_on_failure:
|
|
161
204
|
return False
|
|
@@ -165,6 +208,35 @@ class Step:
|
|
|
165
208
|
|
|
166
209
|
except Exception as e:
|
|
167
210
|
logger.error(f"Exception in action {action.name}: {str(e)}")
|
|
211
|
+
# Emit any available diagnostics even on exceptions
|
|
212
|
+
try:
|
|
213
|
+
details = getattr(action, 'execution_data', {}) or {}
|
|
214
|
+
if details:
|
|
215
|
+
method = details.get('request_method') or getattr(action, 'method', None)
|
|
216
|
+
url = details.get('url') or getattr(action, 'url', None)
|
|
217
|
+
status = details.get('status_code')
|
|
218
|
+
dur = details.get('duration_ms')
|
|
219
|
+
parts = []
|
|
220
|
+
if method:
|
|
221
|
+
parts.append(f"method={method}")
|
|
222
|
+
if url:
|
|
223
|
+
parts.append(f"url={url}")
|
|
224
|
+
if status is not None:
|
|
225
|
+
parts.append(f"status={status}")
|
|
226
|
+
if dur is not None:
|
|
227
|
+
parts.append(f"duration_ms={dur}")
|
|
228
|
+
if parts:
|
|
229
|
+
logger.error(" Details: " + ", ".join(parts))
|
|
230
|
+
if details.get('request_headers'):
|
|
231
|
+
logger.error(f" Request headers: {details.get('request_headers')}")
|
|
232
|
+
if details.get('response_headers'):
|
|
233
|
+
logger.error(f" Response headers: {details.get('response_headers')}")
|
|
234
|
+
if 'response_json' in details:
|
|
235
|
+
logger.error(f" Response JSON: {details.get('response_json')}")
|
|
236
|
+
elif 'response_text' in details:
|
|
237
|
+
logger.error(f" Response text: {details.get('response_text')}")
|
|
238
|
+
except Exception:
|
|
239
|
+
pass
|
|
168
240
|
failure_count += 1
|
|
169
241
|
if not self.continue_on_failure:
|
|
170
242
|
return False
|
|
@@ -260,7 +332,7 @@ class Journey:
|
|
|
260
332
|
logger.error(f"Authentication failed: {str(e)}")
|
|
261
333
|
return False
|
|
262
334
|
|
|
263
|
-
def execute(self, driver: WebDriver, target_url: str) -> Dict[str, Any]:
|
|
335
|
+
def execute(self, driver: WebDriver|None, target_url: str) -> Dict[str, Any]:
|
|
264
336
|
"""
|
|
265
337
|
Execute the complete journey.
|
|
266
338
|
|
|
@@ -283,7 +355,9 @@ class Journey:
|
|
|
283
355
|
start_time = time.time()
|
|
284
356
|
|
|
285
357
|
# Set initial context
|
|
286
|
-
|
|
358
|
+
# Normalize target_url to include scheme when missing (e.g., 'localhost:8080' -> 'http://localhost:8080')
|
|
359
|
+
normalized_target_url = HeaderExtractor._normalize_url(target_url) if isinstance(target_url, str) else target_url
|
|
360
|
+
self.set_context('target_url', normalized_target_url)
|
|
287
361
|
self.set_context('journey_name', self.name)
|
|
288
362
|
self.set_context('start_time', start_time)
|
|
289
363
|
|
|
@@ -311,12 +385,45 @@ class Journey:
|
|
|
311
385
|
if self.requires_authentication():
|
|
312
386
|
auth_name = self.authentication.name if self.authentication else "Unknown"
|
|
313
387
|
logger.info(f"Authentication required: {auth_name}")
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
388
|
+
if driver is None:
|
|
389
|
+
# API mode: use header-based authentication if available
|
|
390
|
+
headers = {}
|
|
391
|
+
try:
|
|
392
|
+
if self.authentication and hasattr(self.authentication, 'get_auth_headers'):
|
|
393
|
+
headers = self.authentication.get_auth_headers() or {}
|
|
394
|
+
except Exception as e:
|
|
395
|
+
logger.error(f"Failed to get authentication headers: {e}")
|
|
396
|
+
headers = {}
|
|
397
|
+
cookies = {}
|
|
398
|
+
if headers:
|
|
399
|
+
# Merge into existing context headers
|
|
400
|
+
existing = self.get_context('auth_headers', {})
|
|
401
|
+
merged = {**existing, **headers}
|
|
402
|
+
self.set_context('auth_headers', merged)
|
|
403
|
+
logger.info("Authentication headers prepared for API mode")
|
|
404
|
+
# Try to merge cookies as well (hybrid auth)
|
|
405
|
+
try:
|
|
406
|
+
if self.authentication and hasattr(self.authentication, 'get_auth_cookies'):
|
|
407
|
+
cookies = self.authentication.get_auth_cookies() or {}
|
|
408
|
+
except Exception as e:
|
|
409
|
+
logger.error(f"Failed to get authentication cookies: {e}")
|
|
410
|
+
cookies = {}
|
|
411
|
+
if cookies:
|
|
412
|
+
existing_cookies = self.get_context('auth_cookies', {})
|
|
413
|
+
merged_cookies = {**existing_cookies, **cookies}
|
|
414
|
+
self.set_context('auth_cookies', merged_cookies)
|
|
415
|
+
logger.info("Authentication cookies prepared for API mode")
|
|
416
|
+
if not headers and not cookies:
|
|
417
|
+
logger.error("Authentication required but no headers/cookies available in API mode")
|
|
418
|
+
results['errors'].append("Authentication failed (no API auth data)")
|
|
419
|
+
return results
|
|
420
|
+
else:
|
|
421
|
+
auth_success = self.authenticate(driver, target_url)
|
|
422
|
+
if not auth_success:
|
|
423
|
+
logger.error("Authentication failed - aborting journey")
|
|
424
|
+
results['errors'].append("Authentication failed")
|
|
425
|
+
return results
|
|
426
|
+
logger.info("Authentication successful")
|
|
320
427
|
|
|
321
428
|
# Execute each step
|
|
322
429
|
for i, step in enumerate(self.steps, 1):
|
|
@@ -342,7 +449,7 @@ class Journey:
|
|
|
342
449
|
results['actions_failed'] += 1
|
|
343
450
|
|
|
344
451
|
# Extract target version header after step execution
|
|
345
|
-
target_version = header_extractor.
|
|
452
|
+
target_version = header_extractor.extract_target_version_hybrid(driver, target_url)
|
|
346
453
|
if target_version:
|
|
347
454
|
results['target_versions'].append(target_version)
|
|
348
455
|
logger.info(f"Target version detected: {target_version}")
|
|
@@ -358,6 +465,48 @@ class Journey:
|
|
|
358
465
|
'target_version': target_version
|
|
359
466
|
}
|
|
360
467
|
results['step_results'].append(step_result)
|
|
468
|
+
|
|
469
|
+
# If the previous step exhausted the rate limit, pause before starting the next one
|
|
470
|
+
try:
|
|
471
|
+
# Prefer an explicit resume time set by actions
|
|
472
|
+
resume_at = self.context.get('rate_limit_resume_at')
|
|
473
|
+
now = time.time()
|
|
474
|
+
if isinstance(resume_at, (int, float)) and resume_at > now:
|
|
475
|
+
wait_s = min(resume_at - now, 30)
|
|
476
|
+
if wait_s > 0:
|
|
477
|
+
logger.info(f"Rate limit backoff in effect; waiting {wait_s:.2f}s before next step")
|
|
478
|
+
time.sleep(wait_s)
|
|
479
|
+
else:
|
|
480
|
+
last_headers = (self.context.get('last_response_headers') or {})
|
|
481
|
+
if isinstance(last_headers, dict) and last_headers:
|
|
482
|
+
def _h(name: str):
|
|
483
|
+
name = (name or '').lower()
|
|
484
|
+
for k, v in last_headers.items():
|
|
485
|
+
if isinstance(k, str) and k.lower() == name:
|
|
486
|
+
return v
|
|
487
|
+
return None
|
|
488
|
+
retry_after = _h('retry-after')
|
|
489
|
+
if retry_after is not None:
|
|
490
|
+
try:
|
|
491
|
+
wait_s = int(str(retry_after).strip())
|
|
492
|
+
if wait_s > 0:
|
|
493
|
+
logger.info(f"Rate-limited by server (Retry-After={wait_s}s); pausing before next step")
|
|
494
|
+
time.sleep(min(wait_s, 30))
|
|
495
|
+
except Exception:
|
|
496
|
+
pass
|
|
497
|
+
else:
|
|
498
|
+
remaining = _h('x-ratelimit-remaining')
|
|
499
|
+
reset = _h('x-ratelimit-reset')
|
|
500
|
+
if remaining is not None and str(remaining).strip() == '0' and reset is not None:
|
|
501
|
+
try:
|
|
502
|
+
wait_s = int(str(reset).strip())
|
|
503
|
+
if wait_s > 0:
|
|
504
|
+
logger.info(f"Rate limit reached (remaining=0). Waiting {wait_s}s for reset before next step")
|
|
505
|
+
time.sleep(min(wait_s, 30))
|
|
506
|
+
except Exception:
|
|
507
|
+
pass
|
|
508
|
+
except Exception:
|
|
509
|
+
pass
|
|
361
510
|
|
|
362
511
|
except Exception as e:
|
|
363
512
|
logger.error(f"Exception in step {step.name}: {str(e)}")
|