cpd-sec 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cpd/__init__.py +0 -0
- cpd/cli.py +315 -0
- cpd/engine.py +90 -0
- cpd/http_client.py +36 -0
- cpd/logic/__init__.py +0 -0
- cpd/logic/baseline.py +58 -0
- cpd/logic/poison.py +481 -0
- cpd/logic/validator.py +60 -0
- cpd/main.py +4 -0
- cpd/utils/__init__.py +0 -0
- cpd/utils/logger.py +73 -0
- cpd/utils/parser.py +63 -0
- cpd_sec-0.2.9.dist-info/METADATA +153 -0
- cpd_sec-0.2.9.dist-info/RECORD +16 -0
- cpd_sec-0.2.9.dist-info/WHEEL +4 -0
- cpd_sec-0.2.9.dist-info/entry_points.txt +4 -0
cpd/__init__.py
ADDED
|
File without changes
|
cpd/cli.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import click
|
|
2
|
+
import sys
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import TextIO
|
|
5
|
+
from cpd.utils.logger import setup_logger, logger
|
|
6
|
+
from cpd.engine import Engine
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
import json
|
|
11
|
+
import requests
|
|
12
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
13
|
+
|
|
14
|
+
def check_for_updates(quiet=False):
|
|
15
|
+
"""
|
|
16
|
+
Check for updates on PyPI with local caching to prevent frequent requests.
|
|
17
|
+
"""
|
|
18
|
+
cache_dir = os.path.expanduser("~/.cpd")
|
|
19
|
+
cache_file = os.path.join(cache_dir, "update_check.json")
|
|
20
|
+
|
|
21
|
+
# 1. Ensure cache directory exists
|
|
22
|
+
if not os.path.exists(cache_dir):
|
|
23
|
+
try:
|
|
24
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
25
|
+
except OSError:
|
|
26
|
+
return # Fail silently if we can't write
|
|
27
|
+
|
|
28
|
+
# 2. Check local cache (debounce 24h)
|
|
29
|
+
now = time.time()
|
|
30
|
+
try:
|
|
31
|
+
if os.path.exists(cache_file):
|
|
32
|
+
with open(cache_file, 'r') as f:
|
|
33
|
+
data = json.load(f)
|
|
34
|
+
last_checked = data.get('last_checked', 0)
|
|
35
|
+
# If checked within last 24 hours (86400 seconds), skip
|
|
36
|
+
if now - last_checked < 86400:
|
|
37
|
+
return
|
|
38
|
+
except Exception:
|
|
39
|
+
pass # Ignore cache read errors
|
|
40
|
+
|
|
41
|
+
# 3. Query PyPI
|
|
42
|
+
try:
|
|
43
|
+
# Get installed version
|
|
44
|
+
try:
|
|
45
|
+
current_version = version("cpd-sec")
|
|
46
|
+
except PackageNotFoundError:
|
|
47
|
+
current_version = "0.0.0"
|
|
48
|
+
|
|
49
|
+
# Fetch latest from PyPI
|
|
50
|
+
resp = requests.get("https://pypi.org/pypi/cpd-sec/json", timeout=2)
|
|
51
|
+
if resp.status_code == 200:
|
|
52
|
+
info = resp.json()
|
|
53
|
+
latest_version = info['info']['version']
|
|
54
|
+
|
|
55
|
+
# Simple string comparison or semver? PyPI versions usually sortable.
|
|
56
|
+
# Use packaging.version if available, or simple check.
|
|
57
|
+
# Assuming simple check for now:
|
|
58
|
+
if latest_version != current_version and latest_version > current_version:
|
|
59
|
+
msg = f"\n[+] A new version of CPD is available ({latest_version})! Run 'pip install --upgrade cpd-sec' to update.\n"
|
|
60
|
+
if not quiet:
|
|
61
|
+
click.secho(msg, fg="green", bold=True)
|
|
62
|
+
|
|
63
|
+
# 4. Update Cache
|
|
64
|
+
with open(cache_file, 'w') as f:
|
|
65
|
+
json.dump({'last_checked': now, 'latest_seen': latest_version}, f)
|
|
66
|
+
|
|
67
|
+
except Exception:
|
|
68
|
+
# Fail silently on network errors, timeouts, or parse errors
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def get_version():
|
|
72
|
+
try:
|
|
73
|
+
return version("cpd-sec")
|
|
74
|
+
except PackageNotFoundError:
|
|
75
|
+
return "unknown"
|
|
76
|
+
|
|
77
|
+
@click.group()
|
|
78
|
+
@click.version_option(version=get_version())
|
|
79
|
+
@click.option('--verbose', '-v', is_flag=True, help="Enable verbose logging.")
|
|
80
|
+
@click.option('--quiet', '-q', is_flag=True, help="Suppress informational output.")
|
|
81
|
+
@click.option('--log-level', '-l', help="Set log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). overrides -v and -q.")
|
|
82
|
+
def cli(verbose, quiet, log_level):
|
|
83
|
+
"""
|
|
84
|
+
CachePoisonDetector (CPD) - A tool for detecting web cache poisoning vulnerabilities.
|
|
85
|
+
"""
|
|
86
|
+
setup_logger(verbose, quiet, log_level)
|
|
87
|
+
|
|
88
|
+
# Auto-check for updates on run (skip if quiet to avoid breaking pipelines)
|
|
89
|
+
if not quiet:
|
|
90
|
+
check_for_updates(quiet=True)
|
|
91
|
+
|
|
92
|
+
@cli.command()
|
|
93
|
+
def update():
|
|
94
|
+
"""
|
|
95
|
+
Check for updates and show upgrade instructions.
|
|
96
|
+
"""
|
|
97
|
+
logger.info("Checking for updates...")
|
|
98
|
+
# Force check (bypass cache implicitly by always running check logic? No, check_for_updates uses cache)
|
|
99
|
+
# So we should probably bypass cache here.
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
103
|
+
try:
|
|
104
|
+
current = version("cpd-sec")
|
|
105
|
+
except PackageNotFoundError:
|
|
106
|
+
current = "unknown"
|
|
107
|
+
|
|
108
|
+
logger.info(f"Current version: {current}")
|
|
109
|
+
|
|
110
|
+
import requests
|
|
111
|
+
resp = requests.get("https://pypi.org/pypi/cpd-sec/json", timeout=5)
|
|
112
|
+
if resp.status_code == 200:
|
|
113
|
+
latest = resp.json()['info']['version']
|
|
114
|
+
if latest != current and latest > current:
|
|
115
|
+
click.secho(f"[+] Update available: {latest}", fg="green", bold=True)
|
|
116
|
+
click.secho("Run the following command to upgrade:", fg="white")
|
|
117
|
+
click.secho(" pip install --upgrade cpd-sec", fg="cyan", bold=True)
|
|
118
|
+
else:
|
|
119
|
+
click.secho(f"[+] You are using the latest version ({current}).", fg="green")
|
|
120
|
+
else:
|
|
121
|
+
logger.error("Failed to fetch update info from PyPI.")
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.error(f"Update check failed: {e}")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@cli.command()
|
|
127
|
+
@click.option('--url', '-u', help="Single URL to scan.")
|
|
128
|
+
@click.option('--file', '-f', type=click.File('r'), help="File containing URLs to scan.")
|
|
129
|
+
@click.option('--request-file', '-r', '-burp', type=click.File('r'), help="File containing raw HTTP request (Burp format).")
|
|
130
|
+
@click.option('--raw', help="Raw HTTP request string (use with caution).")
|
|
131
|
+
@click.option('--concurrency', '-c', default=50, help="Max concurrent requests.")
|
|
132
|
+
@click.option('--header', '-h', multiple=True, help="Custom header (e.g. 'Cookie: foo=bar'). Can be used multiple times.")
|
|
133
|
+
@click.option('--output', '-o', help="File to save JSON results to.")
|
|
134
|
+
def scan(url, file, request_file, raw, concurrency, header, output):
|
|
135
|
+
"""
|
|
136
|
+
Scan one or more URLs for cache poisoning vulnerabilities.
|
|
137
|
+
"""
|
|
138
|
+
from cpd.utils.parser import parse_raw_request
|
|
139
|
+
|
|
140
|
+
# Parse headers
|
|
141
|
+
custom_headers = {}
|
|
142
|
+
if header:
|
|
143
|
+
for h in header:
|
|
144
|
+
if ':' in h:
|
|
145
|
+
key, value = h.split(':', 1)
|
|
146
|
+
custom_headers[key.strip()] = value.strip()
|
|
147
|
+
else:
|
|
148
|
+
logger.warning(f"Invalid header format: {h}. Expected 'Key: Value'")
|
|
149
|
+
|
|
150
|
+
urls = []
|
|
151
|
+
if url:
|
|
152
|
+
urls.append(url)
|
|
153
|
+
|
|
154
|
+
if file:
|
|
155
|
+
for line in file:
|
|
156
|
+
line = line.strip()
|
|
157
|
+
if line:
|
|
158
|
+
urls.append(line)
|
|
159
|
+
|
|
160
|
+
# Check for stdin
|
|
161
|
+
if not url and not file and not sys.stdin.isatty():
|
|
162
|
+
for line in sys.stdin:
|
|
163
|
+
line = line.strip()
|
|
164
|
+
if line:
|
|
165
|
+
urls.append(line)
|
|
166
|
+
|
|
167
|
+
if not urls:
|
|
168
|
+
# Handle Raw Request
|
|
169
|
+
if request_file or raw:
|
|
170
|
+
content = request_file.read() if request_file else raw
|
|
171
|
+
try:
|
|
172
|
+
parsed = parse_raw_request(content)
|
|
173
|
+
logger.info(f"Loaded raw request: {parsed['method']} {parsed['url']}")
|
|
174
|
+
urls.append(parsed['url'])
|
|
175
|
+
|
|
176
|
+
# Merge headers
|
|
177
|
+
combined = parsed['headers']
|
|
178
|
+
combined.update(custom_headers)
|
|
179
|
+
custom_headers = combined
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.error(f"Failed to parse raw request: {e}")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
if not urls:
|
|
185
|
+
logger.error("No targets specified. Use --url, --file, --request-file, or pipe URLs via stdin.")
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
logger.info(f"Starting scan for {len(urls)} URLs with concurrency {concurrency}")
|
|
189
|
+
|
|
190
|
+
engine = Engine(concurrency=concurrency, headers=custom_headers)
|
|
191
|
+
findings = asyncio.run(engine.run(urls))
|
|
192
|
+
|
|
193
|
+
if findings:
|
|
194
|
+
import json
|
|
195
|
+
logger.info(f"Total findings: {len(findings)}")
|
|
196
|
+
print(json.dumps(findings, indent=2))
|
|
197
|
+
|
|
198
|
+
if output:
|
|
199
|
+
try:
|
|
200
|
+
with open(output, 'w') as f:
|
|
201
|
+
json.dump(findings, f, indent=2)
|
|
202
|
+
logger.info(f"Results saved to {output}")
|
|
203
|
+
except IOError as e:
|
|
204
|
+
logger.error(f"Failed to write results to {output}: {e}")
|
|
205
|
+
else:
|
|
206
|
+
logger.info("No vulnerabilities found.")
|
|
207
|
+
|
|
208
|
+
@cli.command()
|
|
209
|
+
@click.option('--url', '-u', required=True, help="Target URL to validate.")
|
|
210
|
+
@click.option('--header', '-H', required=True, help="Header to inject (e.g. 'X-Forwarded-Host: evil.com').")
|
|
211
|
+
@click.option('--method', '-m', default="GET", help="HTTP Method (default: GET).")
|
|
212
|
+
@click.option('--body', '-b', help="Request body.")
|
|
213
|
+
def validate(url, header, method, body):
|
|
214
|
+
"""
|
|
215
|
+
Manually validate a potential vulnerability by running a step-by-step analysis.
|
|
216
|
+
"""
|
|
217
|
+
import asyncio
|
|
218
|
+
import time
|
|
219
|
+
from cpd.http_client import HttpClient
|
|
220
|
+
|
|
221
|
+
async def _run_validation():
|
|
222
|
+
headers = {}
|
|
223
|
+
if ':' in header:
|
|
224
|
+
key, value = header.split(':', 1)
|
|
225
|
+
headers[key.strip()] = value.strip()
|
|
226
|
+
else:
|
|
227
|
+
logger.error("Invalid header format. Expected 'Key: Value'")
|
|
228
|
+
return
|
|
229
|
+
|
|
230
|
+
async with HttpClient() as client:
|
|
231
|
+
# 1. Baseline
|
|
232
|
+
logger.info("[1/4] Fetching Baseline...")
|
|
233
|
+
cb_base = f"cb={int(time.time())}_base"
|
|
234
|
+
url_base = f"{url}?{cb_base}" if '?' not in url else f"{url}&{cb_base}"
|
|
235
|
+
baseline = await client.request(method, url_base, data=body)
|
|
236
|
+
if not baseline:
|
|
237
|
+
logger.error("Failed to fetch baseline.")
|
|
238
|
+
return
|
|
239
|
+
logger.info(f"Baseline: Status {baseline['status']}, Length {len(baseline['body'])}")
|
|
240
|
+
|
|
241
|
+
# 2. Poison Attempt
|
|
242
|
+
logger.info(f"[2/4] Attempting Poison with {header}...")
|
|
243
|
+
cb_poison = f"cb={int(time.time())}_poison"
|
|
244
|
+
url_poison = f"{url}?{cb_poison}" if '?' not in url else f"{url}&{cb_poison}"
|
|
245
|
+
poison = await client.request(method, url_poison, headers=headers, data=body)
|
|
246
|
+
if not poison:
|
|
247
|
+
logger.error("Failed to fetch poison request.")
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
logger.info(f"Poison Response: Status {poison['status']}, Length {len(poison['body'])}")
|
|
251
|
+
|
|
252
|
+
# Check if poison differed from baseline (ignoring cache buster diffs)
|
|
253
|
+
# We can't strict check body because timestamps might change, but check status/headers
|
|
254
|
+
if poison['status'] != baseline['status']:
|
|
255
|
+
logger.info(f"-> Poison caused status change: {baseline['status']} -> {poison['status']}")
|
|
256
|
+
elif len(poison['body']) != len(baseline['body']):
|
|
257
|
+
logger.info(f"-> Poison caused length change: {len(baseline['body'])} -> {len(poison['body'])}")
|
|
258
|
+
else:
|
|
259
|
+
logger.warning("-> Poison response identical to baseline (ignoring body content). Attack might have failed.")
|
|
260
|
+
|
|
261
|
+
# 3. Verification (Clean Request)
|
|
262
|
+
logger.info("[3/4] Verifying (Fetching clean URL with same cache key)...")
|
|
263
|
+
# Reuse url_poison which has the cache buster we tried to poison
|
|
264
|
+
verify = await client.request("GET", url_poison)
|
|
265
|
+
if not verify:
|
|
266
|
+
logger.error("Failed to fetch verify request.")
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
logger.info(f"Verify Response: Status {verify['status']}, Length {len(verify['body'])}")
|
|
270
|
+
|
|
271
|
+
is_hit = False
|
|
272
|
+
if verify['body'] == poison['body']:
|
|
273
|
+
logger.info("-> Verify match Poison: YES (Potential Cache Hit)")
|
|
274
|
+
is_hit = True
|
|
275
|
+
else:
|
|
276
|
+
logger.info("-> Verify match Poison: NO (Cache Miss or Dynamic)")
|
|
277
|
+
|
|
278
|
+
if verify['body'] == baseline['body']:
|
|
279
|
+
logger.info("-> Verify match Baseline: YES")
|
|
280
|
+
|
|
281
|
+
# 4. Fresh Baseline (Drift Check)
|
|
282
|
+
logger.info("[4/4] Checking Fresh Baseline (for drift)...")
|
|
283
|
+
cb_fresh = f"cb={int(time.time())}_fresh"
|
|
284
|
+
url_fresh = f"{url}?{cb_fresh}" if '?' not in url else f"{url}&{cb_fresh}"
|
|
285
|
+
fresh = await client.request(method, url_fresh, data=body)
|
|
286
|
+
|
|
287
|
+
logger.info(f"Fresh Response: Status {fresh['status']}, Length {len(fresh['body'])}")
|
|
288
|
+
|
|
289
|
+
# Final Analysis
|
|
290
|
+
print("\n--- Analysis ---")
|
|
291
|
+
if not is_hit:
|
|
292
|
+
print("RESULT: Safe. Verification request did not return the poisoned content.")
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
# It was a hit (Verify == Poison)
|
|
296
|
+
# Logic Fix Check:
|
|
297
|
+
if len(fresh['body']) == len(verify['body']):
|
|
298
|
+
print("RESULT: False Positive (Benign).")
|
|
299
|
+
print("Reason: The 'poisoned' content is identical length to a fresh baseline.")
|
|
300
|
+
print("The server likely ignored the malicious header, and the site returned standard dynamic content.")
|
|
301
|
+
return
|
|
302
|
+
|
|
303
|
+
if fresh['body'] == verify['body']:
|
|
304
|
+
print("RESULT: False Positive (Drift).")
|
|
305
|
+
print("Reason: Fresh baseline matches the 'poisoned' content. The site just changed naturally.")
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
print("RESULT: POTENTIAL VULNERABILITY!")
|
|
309
|
+
print("Reason: Verification matched Poison, but Fresh Baseline differs.")
|
|
310
|
+
print("The cache appears to be poisoning clean requests with the malicious response.")
|
|
311
|
+
|
|
312
|
+
asyncio.run(_run_validation())
|
|
313
|
+
|
|
314
|
+
if __name__ == "__main__":
|
|
315
|
+
cli()
|
cpd/engine.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List, Dict
|
|
3
|
+
from cpd.http_client import HttpClient
|
|
4
|
+
from cpd.utils.logger import logger
|
|
5
|
+
|
|
6
|
+
class Engine:
|
|
7
|
+
def __init__(self, concurrency: int = 50, timeout: int = 10, headers: Dict[str, str] = None):
|
|
8
|
+
self.concurrency = concurrency
|
|
9
|
+
self.timeout = timeout
|
|
10
|
+
self.headers = headers or {}
|
|
11
|
+
self.headers = headers or {}
|
|
12
|
+
|
|
13
|
+
async def run(self, urls: List[str]):
|
|
14
|
+
"""
|
|
15
|
+
Main execution loop.
|
|
16
|
+
"""
|
|
17
|
+
# Worker Pool Pattern
|
|
18
|
+
queue = asyncio.Queue()
|
|
19
|
+
|
|
20
|
+
# Populate queue
|
|
21
|
+
for url in urls:
|
|
22
|
+
queue.put_nowait(url)
|
|
23
|
+
|
|
24
|
+
# Create workers
|
|
25
|
+
workers = []
|
|
26
|
+
all_findings = []
|
|
27
|
+
|
|
28
|
+
async def worker():
|
|
29
|
+
while True:
|
|
30
|
+
try:
|
|
31
|
+
url = queue.get_nowait()
|
|
32
|
+
except asyncio.QueueEmpty:
|
|
33
|
+
break
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
result = await self._process_url(client, url)
|
|
37
|
+
if result:
|
|
38
|
+
all_findings.extend(result)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logger.error(f"Error processing {url}: {e}")
|
|
41
|
+
finally:
|
|
42
|
+
queue.task_done()
|
|
43
|
+
|
|
44
|
+
async with HttpClient(timeout=self.timeout) as client:
|
|
45
|
+
# Launch workers
|
|
46
|
+
for _ in range(self.concurrency):
|
|
47
|
+
workers.append(asyncio.create_task(worker()))
|
|
48
|
+
|
|
49
|
+
# Wait for all workers to finish
|
|
50
|
+
await asyncio.gather(*workers)
|
|
51
|
+
|
|
52
|
+
return all_findings
|
|
53
|
+
|
|
54
|
+
async def _process_url(self, client: HttpClient, url: str):
|
|
55
|
+
from cpd.logic.baseline import BaselineAnalyzer
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# No semaphore needed, worker count limits concurrency
|
|
59
|
+
logger.info(f"Processing {url}")
|
|
60
|
+
|
|
61
|
+
# 0. Cache Validation
|
|
62
|
+
from cpd.logic.validator import CacheValidator
|
|
63
|
+
validator = CacheValidator()
|
|
64
|
+
is_cached, reason = await validator.analyze(client, url)
|
|
65
|
+
|
|
66
|
+
if is_cached:
|
|
67
|
+
logger.info(f"Cache detected on {url}: {reason}")
|
|
68
|
+
else:
|
|
69
|
+
logger.warning(f"Target {url} does not appear to be using a cache ({reason}). Findings might be invalid.")
|
|
70
|
+
|
|
71
|
+
# 1. Baseline Analysis
|
|
72
|
+
analyzer = BaselineAnalyzer(headers=self.headers)
|
|
73
|
+
baseline = await analyzer.analyze(client, url)
|
|
74
|
+
|
|
75
|
+
if not baseline:
|
|
76
|
+
logger.error(f"Could not establish baseline for {url}")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
logger.info(f"Baseline established for {url} - Stable: {baseline.is_stable}, Hash: {baseline.body_hash[:8]}")
|
|
80
|
+
|
|
81
|
+
# 2. Poisoning Simulation
|
|
82
|
+
from cpd.logic.poison import Poisoner
|
|
83
|
+
poisoner = Poisoner(baseline, headers=self.headers)
|
|
84
|
+
findings = await poisoner.run(client)
|
|
85
|
+
if findings:
|
|
86
|
+
logger.info(f"Scan finished for {url} - Findings: {len(findings)}")
|
|
87
|
+
return findings
|
|
88
|
+
else:
|
|
89
|
+
logger.info(f"Scan finished for {url} - No vulnerabilities found")
|
|
90
|
+
return []
|
cpd/http_client.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import asyncio
|
|
3
|
+
from typing import Optional, Dict, Any
|
|
4
|
+
from cpd.utils.logger import logger
|
|
5
|
+
|
|
6
|
+
class HttpClient:
|
|
7
|
+
def __init__(self, timeout: int = 10, proxy: Optional[str] = None):
|
|
8
|
+
self.timeout = aiohttp.ClientTimeout(total=timeout)
|
|
9
|
+
self.proxy = proxy
|
|
10
|
+
self.session: Optional[aiohttp.ClientSession] = None
|
|
11
|
+
|
|
12
|
+
async def __aenter__(self):
|
|
13
|
+
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
14
|
+
return self
|
|
15
|
+
|
|
16
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
17
|
+
if self.session:
|
|
18
|
+
await self.session.close()
|
|
19
|
+
|
|
20
|
+
async def request(self, method: str, url: str, headers: Optional[Dict[str, str]] = None, **kwargs) -> Any:
|
|
21
|
+
if not self.session:
|
|
22
|
+
raise RuntimeError("Session not initialized. Use 'async with' context manager.")
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
async with self.session.request(method, url, headers=headers, proxy=self.proxy, **kwargs) as response:
|
|
26
|
+
# Read body immediately to release connection
|
|
27
|
+
body = await response.read()
|
|
28
|
+
return {
|
|
29
|
+
"status": response.status,
|
|
30
|
+
"headers": dict(response.headers),
|
|
31
|
+
"body": body,
|
|
32
|
+
"url": str(response.url)
|
|
33
|
+
}
|
|
34
|
+
except Exception as e:
|
|
35
|
+
logger.debug(f"Request failed for {url}: {str(e)}")
|
|
36
|
+
return None
|
cpd/logic/__init__.py
ADDED
|
File without changes
|
cpd/logic/baseline.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from cpd.http_client import HttpClient
|
|
5
|
+
from cpd.utils.logger import logger
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Baseline:
|
|
9
|
+
url: str
|
|
10
|
+
status: int
|
|
11
|
+
headers: Dict[str, str]
|
|
12
|
+
body_hash: str
|
|
13
|
+
body: bytes = b""
|
|
14
|
+
is_stable: bool = True
|
|
15
|
+
|
|
16
|
+
class BaselineAnalyzer:
|
|
17
|
+
def __init__(self, iterations: int = 3, headers: Dict[str, str] = None):
|
|
18
|
+
self.iterations = iterations
|
|
19
|
+
self.headers = headers or {}
|
|
20
|
+
|
|
21
|
+
async def analyze(self, client: HttpClient, url: str) -> Optional[Baseline]:
|
|
22
|
+
"""
|
|
23
|
+
Fetch the URL multiple times to establish a baseline.
|
|
24
|
+
"""
|
|
25
|
+
responses = []
|
|
26
|
+
for i in range(self.iterations):
|
|
27
|
+
resp = await client.request("GET", url, headers=self.headers)
|
|
28
|
+
if not resp:
|
|
29
|
+
logger.warning(f"Failed to fetch baseline for {url} (attempt {i+1})")
|
|
30
|
+
continue
|
|
31
|
+
responses.append(resp)
|
|
32
|
+
|
|
33
|
+
if not responses:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
# Analyze stability
|
|
37
|
+
first = responses[0]
|
|
38
|
+
first_hash = self._calculate_hash(first['body'])
|
|
39
|
+
|
|
40
|
+
is_stable = True
|
|
41
|
+
for resp in responses[1:]:
|
|
42
|
+
current_hash = self._calculate_hash(resp['body'])
|
|
43
|
+
if current_hash != first_hash:
|
|
44
|
+
is_stable = False
|
|
45
|
+
logger.info(f"Baseline instability detected for {url}")
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
return Baseline(
|
|
49
|
+
url=url,
|
|
50
|
+
status=first['status'],
|
|
51
|
+
headers=first['headers'],
|
|
52
|
+
body_hash=first_hash,
|
|
53
|
+
body=first['body'],
|
|
54
|
+
is_stable=is_stable
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def _calculate_hash(self, body: bytes) -> str:
|
|
58
|
+
return hashlib.sha256(body).hexdigest()
|