hexproxy 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexproxy/__init__.py +7 -0
- hexproxy/__main__.py +5 -0
- hexproxy/app.py +192 -0
- hexproxy/bodyview.py +435 -0
- hexproxy/certs.py +222 -0
- hexproxy/clipboard.py +89 -0
- hexproxy/extensions.py +739 -0
- hexproxy/mcp.py +2114 -0
- hexproxy/models.py +72 -0
- hexproxy/preferences.py +131 -0
- hexproxy/proxy.py +1178 -0
- hexproxy/store.py +1001 -0
- hexproxy/themes.py +274 -0
- hexproxy/tui.py +8796 -0
- hexproxy-0.2.2.dist-info/METADATA +556 -0
- hexproxy-0.2.2.dist-info/RECORD +20 -0
- hexproxy-0.2.2.dist-info/WHEEL +5 -0
- hexproxy-0.2.2.dist-info/entry_points.txt +2 -0
- hexproxy-0.2.2.dist-info/licenses/LICENSE +37 -0
- hexproxy-0.2.2.dist-info/top_level.txt +1 -0
hexproxy/__init__.py
ADDED
hexproxy/__main__.py
ADDED
hexproxy/app.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import asyncio
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
|
|
9
|
+
from .certs import CertificateAuthority, default_certificate_dir
|
|
10
|
+
from .extensions import PluginManager
|
|
11
|
+
from .preferences import ApplicationPreferences
|
|
12
|
+
from .proxy import HttpProxyServer
|
|
13
|
+
from .store import TrafficStore
|
|
14
|
+
from .themes import ThemeManager
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from .tui import ProxyTUI
|
|
18
|
+
_TUI_IMPORT_ERROR: Exception | None = None
|
|
19
|
+
except ModuleNotFoundError as exc:
|
|
20
|
+
if exc.name not in {"curses", "_curses"}:
|
|
21
|
+
raise
|
|
22
|
+
ProxyTUI = None # type: ignore[assignment]
|
|
23
|
+
_TUI_IMPORT_ERROR = exc
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ProxyRuntime:
|
|
27
|
+
def __init__(self, proxy: HttpProxyServer) -> None:
|
|
28
|
+
self.proxy = proxy
|
|
29
|
+
self._thread = threading.Thread(target=self._run, name="hexproxy-runtime", daemon=True)
|
|
30
|
+
self._ready = threading.Event()
|
|
31
|
+
self._stopped = threading.Event()
|
|
32
|
+
self._shutdown_requested = threading.Event()
|
|
33
|
+
self._loop: asyncio.AbstractEventLoop | None = None
|
|
34
|
+
self._error: Exception | None = None
|
|
35
|
+
|
|
36
|
+
def start(self) -> None:
|
|
37
|
+
self._thread.start()
|
|
38
|
+
self._ready.wait()
|
|
39
|
+
if self._error is not None:
|
|
40
|
+
raise RuntimeError("failed to start proxy runtime") from self._error
|
|
41
|
+
|
|
42
|
+
def stop(self) -> None:
|
|
43
|
+
self._shutdown_requested.set()
|
|
44
|
+
self._thread.join(timeout=5)
|
|
45
|
+
|
|
46
|
+
def run_coroutine(self, coro):
|
|
47
|
+
if self._loop is None:
|
|
48
|
+
raise RuntimeError("proxy runtime loop is not available")
|
|
49
|
+
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
|
50
|
+
return future.result()
|
|
51
|
+
|
|
52
|
+
def _run(self) -> None:
|
|
53
|
+
self._loop = asyncio.new_event_loop()
|
|
54
|
+
asyncio.set_event_loop(self._loop)
|
|
55
|
+
try:
|
|
56
|
+
self._loop.run_until_complete(self._runner())
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
self._error = exc
|
|
59
|
+
self._ready.set()
|
|
60
|
+
finally:
|
|
61
|
+
pending = [task for task in asyncio.all_tasks(self._loop) if not task.done()]
|
|
62
|
+
for task in pending:
|
|
63
|
+
task.cancel()
|
|
64
|
+
if pending:
|
|
65
|
+
self._loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
|
66
|
+
self._loop.run_until_complete(self._loop.shutdown_asyncgens())
|
|
67
|
+
self._loop.run_until_complete(self._loop.shutdown_default_executor(timeout=1))
|
|
68
|
+
self._loop.close()
|
|
69
|
+
self._stopped.set()
|
|
70
|
+
|
|
71
|
+
async def _runner(self) -> None:
|
|
72
|
+
try:
|
|
73
|
+
await self.proxy.start()
|
|
74
|
+
finally:
|
|
75
|
+
self._ready.set()
|
|
76
|
+
while not self._shutdown_requested.is_set():
|
|
77
|
+
await asyncio.sleep(0.1)
|
|
78
|
+
await self.proxy.stop()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
82
|
+
parser = argparse.ArgumentParser(description="HTTP interception proxy with a terminal UI.")
|
|
83
|
+
parser.add_argument("--listen-host", default="127.0.0.1", help="Host interface to bind the proxy to.")
|
|
84
|
+
parser.add_argument("--listen-port", default=8080, type=int, help="Port to bind the proxy to.")
|
|
85
|
+
parser.add_argument(
|
|
86
|
+
"--project",
|
|
87
|
+
type=Path,
|
|
88
|
+
help="Project file used to load and autosave captured traffic.",
|
|
89
|
+
)
|
|
90
|
+
parser.add_argument(
|
|
91
|
+
"--plugin-dir",
|
|
92
|
+
type=Path,
|
|
93
|
+
action="append",
|
|
94
|
+
default=[],
|
|
95
|
+
help="Directory that contains HexProxy extension plugins.",
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--cert-dir",
|
|
99
|
+
type=Path,
|
|
100
|
+
default=default_certificate_dir(),
|
|
101
|
+
help="Directory used to store the generated local CA and leaf certificates.",
|
|
102
|
+
)
|
|
103
|
+
parser.add_argument(
|
|
104
|
+
"--config-file",
|
|
105
|
+
type=Path,
|
|
106
|
+
help="Global configuration file used for persistent application preferences.",
|
|
107
|
+
)
|
|
108
|
+
return parser
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def main(argv: list[str] | None = None) -> int:
|
|
112
|
+
args = build_parser().parse_args(argv)
|
|
113
|
+
if ProxyTUI is None:
|
|
114
|
+
if sys.platform.startswith("win"):
|
|
115
|
+
print(
|
|
116
|
+
"hexproxy: terminal UI is unavailable. Install the Windows TUI dependency with "
|
|
117
|
+
"`pip install windows-curses` and try again.",
|
|
118
|
+
file=sys.stderr,
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
print(f"hexproxy: failed to import curses support: {_TUI_IMPORT_ERROR}", file=sys.stderr)
|
|
122
|
+
return 1
|
|
123
|
+
store = TrafficStore()
|
|
124
|
+
preferences = ApplicationPreferences(args.config_file)
|
|
125
|
+
try:
|
|
126
|
+
preferences.load()
|
|
127
|
+
except Exception as exc:
|
|
128
|
+
print(f"hexproxy: failed to load config: {exc}", file=sys.stderr)
|
|
129
|
+
plugin_manager = PluginManager()
|
|
130
|
+
plugin_dirs = [Path("plugins"), *args.plugin_dir]
|
|
131
|
+
plugin_manager.load_from_dirs(plugin_dirs)
|
|
132
|
+
theme_manager = ThemeManager()
|
|
133
|
+
theme_manager.load()
|
|
134
|
+
certificate_authority = CertificateAuthority(args.cert_dir)
|
|
135
|
+
if args.project is not None:
|
|
136
|
+
if args.project.exists():
|
|
137
|
+
store.load(args.project)
|
|
138
|
+
else:
|
|
139
|
+
store.set_project_path(args.project)
|
|
140
|
+
store.save()
|
|
141
|
+
proxy = HttpProxyServer(
|
|
142
|
+
store=store,
|
|
143
|
+
listen_host=args.listen_host,
|
|
144
|
+
listen_port=args.listen_port,
|
|
145
|
+
plugins=plugin_manager,
|
|
146
|
+
certificate_authority=certificate_authority,
|
|
147
|
+
)
|
|
148
|
+
plugin_manager.bind_runtime(
|
|
149
|
+
store=store,
|
|
150
|
+
preferences=preferences,
|
|
151
|
+
theme_manager=theme_manager,
|
|
152
|
+
)
|
|
153
|
+
runtime = ProxyRuntime(proxy)
|
|
154
|
+
try:
|
|
155
|
+
runtime.start()
|
|
156
|
+
except Exception as exc:
|
|
157
|
+
print(f"hexproxy: {exc}", file=sys.stderr)
|
|
158
|
+
return 1
|
|
159
|
+
|
|
160
|
+
tui = ProxyTUI(
|
|
161
|
+
store=store,
|
|
162
|
+
listen_host=proxy.listen_host,
|
|
163
|
+
listen_port=proxy.listen_port,
|
|
164
|
+
certificate_authority=certificate_authority,
|
|
165
|
+
plugin_manager=plugin_manager,
|
|
166
|
+
theme_manager=theme_manager,
|
|
167
|
+
repeater_sender=lambda raw_request: runtime.run_coroutine(proxy.replay_request(raw_request)),
|
|
168
|
+
initial_keybindings=preferences.keybindings(),
|
|
169
|
+
keybinding_saver=lambda bindings: (preferences.set_keybindings(bindings), preferences.save()),
|
|
170
|
+
initial_theme_name=preferences.theme_name(),
|
|
171
|
+
theme_saver=lambda theme_name: (preferences.set_theme_name(theme_name), preferences.save()),
|
|
172
|
+
)
|
|
173
|
+
if proxy.startup_notice:
|
|
174
|
+
tui._set_status(proxy.startup_notice)
|
|
175
|
+
try:
|
|
176
|
+
tui.run()
|
|
177
|
+
except KeyboardInterrupt:
|
|
178
|
+
pass
|
|
179
|
+
finally:
|
|
180
|
+
try:
|
|
181
|
+
runtime.stop()
|
|
182
|
+
except KeyboardInterrupt:
|
|
183
|
+
pass
|
|
184
|
+
try:
|
|
185
|
+
preferences.set_keybindings(tui.custom_keybindings())
|
|
186
|
+
preferences.set_theme_name(tui.theme_name())
|
|
187
|
+
preferences.save()
|
|
188
|
+
if args.project is not None:
|
|
189
|
+
store.save()
|
|
190
|
+
except KeyboardInterrupt:
|
|
191
|
+
pass
|
|
192
|
+
return 0
|
hexproxy/bodyview.py
ADDED
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import gzip
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import zlib
|
|
8
|
+
from urllib.parse import parse_qsl
|
|
9
|
+
from xml.dom import minidom
|
|
10
|
+
|
|
11
|
+
from .models import HeaderList
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import brotli # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
brotli = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True)
|
|
20
|
+
class BodyDocument:
|
|
21
|
+
media_type: str
|
|
22
|
+
kind: str
|
|
23
|
+
display_name: str
|
|
24
|
+
raw_text: str
|
|
25
|
+
pretty_text: str | None
|
|
26
|
+
pretty_available: bool
|
|
27
|
+
is_binary: bool
|
|
28
|
+
encoding_summary: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def build_body_document(headers: HeaderList, body: bytes) -> BodyDocument:
|
|
32
|
+
if not body:
|
|
33
|
+
return BodyDocument(
|
|
34
|
+
media_type="-",
|
|
35
|
+
kind="empty",
|
|
36
|
+
display_name="Empty",
|
|
37
|
+
raw_text="No body.",
|
|
38
|
+
pretty_text=None,
|
|
39
|
+
pretty_available=False,
|
|
40
|
+
is_binary=False,
|
|
41
|
+
encoding_summary="identity",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
media_type = _extract_media_type(headers)
|
|
45
|
+
charset = _extract_charset(headers)
|
|
46
|
+
transfer_encodings = _extract_transfer_encodings(headers)
|
|
47
|
+
content_encodings = _extract_content_encodings(headers)
|
|
48
|
+
normalized_body, encoding_summary, fully_decoded = _normalize_body(body, transfer_encodings, content_encodings)
|
|
49
|
+
kind = _detect_kind(media_type, normalized_body if fully_decoded else b"")
|
|
50
|
+
display_name = _display_name(kind, media_type)
|
|
51
|
+
|
|
52
|
+
if not fully_decoded and content_encodings:
|
|
53
|
+
raw_text = _hexdump(body)
|
|
54
|
+
return BodyDocument(
|
|
55
|
+
media_type=media_type or "application/octet-stream",
|
|
56
|
+
kind="binary",
|
|
57
|
+
display_name=f"{display_name} (encoded)",
|
|
58
|
+
raw_text=raw_text,
|
|
59
|
+
pretty_text=None,
|
|
60
|
+
pretty_available=False,
|
|
61
|
+
is_binary=True,
|
|
62
|
+
encoding_summary=encoding_summary,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if kind == "binary":
|
|
66
|
+
raw_text = _hexdump(normalized_body)
|
|
67
|
+
return BodyDocument(
|
|
68
|
+
media_type=media_type or "application/octet-stream",
|
|
69
|
+
kind=kind,
|
|
70
|
+
display_name=display_name,
|
|
71
|
+
raw_text=raw_text,
|
|
72
|
+
pretty_text=None,
|
|
73
|
+
pretty_available=False,
|
|
74
|
+
is_binary=True,
|
|
75
|
+
encoding_summary=encoding_summary,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
text = _decode_body(normalized_body, charset)
|
|
79
|
+
pretty_text = _pretty_text(kind, text)
|
|
80
|
+
return BodyDocument(
|
|
81
|
+
media_type=media_type or "text/plain",
|
|
82
|
+
kind=kind,
|
|
83
|
+
display_name=display_name,
|
|
84
|
+
raw_text=text,
|
|
85
|
+
pretty_text=pretty_text,
|
|
86
|
+
pretty_available=pretty_text is not None and pretty_text != text,
|
|
87
|
+
is_binary=False,
|
|
88
|
+
encoding_summary=encoding_summary,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def normalize_http_body(headers: HeaderList, body: bytes) -> tuple[bytes, str, bool]:
|
|
93
|
+
transfer_encodings = _extract_transfer_encodings(headers)
|
|
94
|
+
content_encodings = _extract_content_encodings(headers)
|
|
95
|
+
return _normalize_body(body, transfer_encodings, content_encodings)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _extract_media_type(headers: HeaderList) -> str:
|
|
99
|
+
for name, value in headers:
|
|
100
|
+
if name.lower() != "content-type":
|
|
101
|
+
continue
|
|
102
|
+
return value.split(";", 1)[0].strip().lower()
|
|
103
|
+
return ""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _extract_charset(headers: HeaderList) -> str | None:
|
|
107
|
+
for name, value in headers:
|
|
108
|
+
if name.lower() != "content-type":
|
|
109
|
+
continue
|
|
110
|
+
for part in value.split(";")[1:]:
|
|
111
|
+
key, _, raw_value = part.partition("=")
|
|
112
|
+
if key.strip().lower() == "charset" and raw_value.strip():
|
|
113
|
+
return raw_value.strip().strip('"').strip("'")
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _extract_transfer_encodings(headers: HeaderList) -> list[str]:
|
|
118
|
+
for name, value in headers:
|
|
119
|
+
if name.lower() != "transfer-encoding":
|
|
120
|
+
continue
|
|
121
|
+
return [item.strip().lower() for item in value.split(",") if item.strip()]
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _extract_content_encodings(headers: HeaderList) -> list[str]:
|
|
126
|
+
for name, value in headers:
|
|
127
|
+
if name.lower() != "content-encoding":
|
|
128
|
+
continue
|
|
129
|
+
return [item.strip().lower() for item in value.split(",") if item.strip()]
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _detect_kind(media_type: str, body: bytes) -> str:
|
|
134
|
+
if media_type in {"application/json", "text/json"} or media_type.endswith("+json"):
|
|
135
|
+
return "json"
|
|
136
|
+
if media_type in {"application/xml", "text/xml"} or media_type.endswith("+xml"):
|
|
137
|
+
return "xml"
|
|
138
|
+
if media_type == "text/html":
|
|
139
|
+
return "html"
|
|
140
|
+
if media_type == "application/x-www-form-urlencoded":
|
|
141
|
+
return "form"
|
|
142
|
+
if media_type in {"application/javascript", "text/javascript"}:
|
|
143
|
+
return "javascript"
|
|
144
|
+
if media_type == "text/css":
|
|
145
|
+
return "css"
|
|
146
|
+
if media_type.startswith("text/"):
|
|
147
|
+
return "text"
|
|
148
|
+
|
|
149
|
+
sample = body[:512].lstrip()
|
|
150
|
+
if sample.startswith((b"{", b"[")):
|
|
151
|
+
return "json"
|
|
152
|
+
if sample.startswith((b"<?xml", b"<")):
|
|
153
|
+
lowered = sample.lower()
|
|
154
|
+
if lowered.startswith((b"<!doctype html", b"<html")):
|
|
155
|
+
return "html"
|
|
156
|
+
return "xml"
|
|
157
|
+
if _looks_like_text(body):
|
|
158
|
+
return "text"
|
|
159
|
+
return "binary"
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _display_name(kind: str, media_type: str) -> str:
|
|
163
|
+
mapping = {
|
|
164
|
+
"empty": "Empty",
|
|
165
|
+
"json": "JSON",
|
|
166
|
+
"xml": "XML",
|
|
167
|
+
"html": "HTML",
|
|
168
|
+
"form": "Form URL Encoded",
|
|
169
|
+
"javascript": "JavaScript",
|
|
170
|
+
"css": "CSS",
|
|
171
|
+
"text": "Text",
|
|
172
|
+
"binary": "Binary",
|
|
173
|
+
}
|
|
174
|
+
return mapping.get(kind, media_type or "Unknown")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _decode_body(body: bytes, charset: str | None) -> str:
|
|
178
|
+
if charset:
|
|
179
|
+
try:
|
|
180
|
+
return body.decode(charset, errors="replace")
|
|
181
|
+
except LookupError:
|
|
182
|
+
pass
|
|
183
|
+
try:
|
|
184
|
+
return body.decode("utf-8")
|
|
185
|
+
except UnicodeDecodeError:
|
|
186
|
+
return body.decode("iso-8859-1", errors="replace")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _normalize_body(body: bytes, transfer_encodings: list[str], content_encodings: list[str]) -> tuple[bytes, str, bool]:
|
|
190
|
+
normalized = body
|
|
191
|
+
notes: list[str] = []
|
|
192
|
+
fully_decoded = True
|
|
193
|
+
|
|
194
|
+
if "chunked" in transfer_encodings:
|
|
195
|
+
try:
|
|
196
|
+
normalized = _decode_chunked_body(normalized)
|
|
197
|
+
notes.append("chunked decoded")
|
|
198
|
+
except Exception:
|
|
199
|
+
notes.append("chunked undecoded")
|
|
200
|
+
fully_decoded = False
|
|
201
|
+
|
|
202
|
+
for encoding in reversed(content_encodings):
|
|
203
|
+
try:
|
|
204
|
+
normalized = _decode_content_encoding(normalized, encoding)
|
|
205
|
+
notes.append(f"{encoding} decoded")
|
|
206
|
+
except Exception:
|
|
207
|
+
notes.append(f"{encoding} unsupported")
|
|
208
|
+
fully_decoded = False
|
|
209
|
+
break
|
|
210
|
+
|
|
211
|
+
if not notes:
|
|
212
|
+
notes.append("identity")
|
|
213
|
+
return normalized, ", ".join(notes), fully_decoded
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _decode_content_encoding(body: bytes, encoding: str) -> bytes:
|
|
217
|
+
if encoding in {"gzip", "x-gzip"}:
|
|
218
|
+
return gzip.decompress(body)
|
|
219
|
+
if encoding == "deflate":
|
|
220
|
+
try:
|
|
221
|
+
return zlib.decompress(body)
|
|
222
|
+
except zlib.error:
|
|
223
|
+
return zlib.decompress(body, -zlib.MAX_WBITS)
|
|
224
|
+
if encoding == "br":
|
|
225
|
+
if brotli is None:
|
|
226
|
+
raise ValueError("brotli dependency is not installed")
|
|
227
|
+
return brotli.decompress(body)
|
|
228
|
+
if encoding in {"identity", ""}:
|
|
229
|
+
return body
|
|
230
|
+
raise ValueError(f"unsupported content encoding: {encoding}")
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _decode_chunked_body(body: bytes) -> bytes:
|
|
234
|
+
decoded = bytearray()
|
|
235
|
+
index = 0
|
|
236
|
+
total = len(body)
|
|
237
|
+
|
|
238
|
+
while True:
|
|
239
|
+
line_end = body.find(b"\r\n", index)
|
|
240
|
+
if line_end < 0:
|
|
241
|
+
raise ValueError("invalid chunked body: missing chunk size delimiter")
|
|
242
|
+
size_line = body[index:line_end]
|
|
243
|
+
chunk_size = int(size_line.split(b";", 1)[0].strip(), 16)
|
|
244
|
+
index = line_end + 2
|
|
245
|
+
if chunk_size == 0:
|
|
246
|
+
trailer_end = body.find(b"\r\n", index)
|
|
247
|
+
if trailer_end < 0:
|
|
248
|
+
raise ValueError("invalid chunked body: missing chunk trailer terminator")
|
|
249
|
+
break
|
|
250
|
+
if index + chunk_size + 2 > total:
|
|
251
|
+
raise ValueError("invalid chunked body: truncated chunk")
|
|
252
|
+
decoded.extend(body[index : index + chunk_size])
|
|
253
|
+
index += chunk_size
|
|
254
|
+
if body[index : index + 2] != b"\r\n":
|
|
255
|
+
raise ValueError("invalid chunked body: missing chunk terminator")
|
|
256
|
+
index += 2
|
|
257
|
+
return bytes(decoded)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _pretty_text(kind: str, text: str) -> str | None:
|
|
261
|
+
try:
|
|
262
|
+
if kind == "json":
|
|
263
|
+
return json.dumps(json.loads(text), indent=2, ensure_ascii=False)
|
|
264
|
+
if kind == "xml":
|
|
265
|
+
parsed = minidom.parseString(text.encode("utf-8"))
|
|
266
|
+
return parsed.toprettyxml(indent=" ")
|
|
267
|
+
if kind == "html":
|
|
268
|
+
return _pretty_html(text)
|
|
269
|
+
if kind == "javascript":
|
|
270
|
+
return _pretty_javascript(text)
|
|
271
|
+
if kind == "css":
|
|
272
|
+
return _pretty_css(text)
|
|
273
|
+
if kind == "form":
|
|
274
|
+
pairs = parse_qsl(text, keep_blank_values=True)
|
|
275
|
+
if not pairs:
|
|
276
|
+
return None
|
|
277
|
+
return "\n".join(f"{key} = {value}" for key, value in pairs)
|
|
278
|
+
except Exception:
|
|
279
|
+
return None
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _looks_like_text(body: bytes) -> bool:
|
|
284
|
+
sample = body[:512]
|
|
285
|
+
if not sample:
|
|
286
|
+
return True
|
|
287
|
+
allowed = 0
|
|
288
|
+
for byte in sample:
|
|
289
|
+
if byte in {9, 10, 13} or 32 <= byte <= 126:
|
|
290
|
+
allowed += 1
|
|
291
|
+
return (allowed / len(sample)) >= 0.85
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _pretty_html(text: str) -> str | None:
|
|
295
|
+
tokens = re.findall(r"<!--.*?-->|<![^>]*>|</?[^>]+>|[^<]+", text, flags=re.DOTALL)
|
|
296
|
+
if len(tokens) <= 1:
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
lines: list[str] = []
|
|
300
|
+
indent = 0
|
|
301
|
+
current_embedded_kind: str | None = None
|
|
302
|
+
void_tags = {
|
|
303
|
+
"area",
|
|
304
|
+
"base",
|
|
305
|
+
"br",
|
|
306
|
+
"col",
|
|
307
|
+
"embed",
|
|
308
|
+
"hr",
|
|
309
|
+
"img",
|
|
310
|
+
"input",
|
|
311
|
+
"link",
|
|
312
|
+
"meta",
|
|
313
|
+
"param",
|
|
314
|
+
"source",
|
|
315
|
+
"track",
|
|
316
|
+
"wbr",
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
for token in tokens:
|
|
320
|
+
stripped = token.strip()
|
|
321
|
+
if not stripped:
|
|
322
|
+
continue
|
|
323
|
+
|
|
324
|
+
if stripped.startswith("</"):
|
|
325
|
+
indent = max(0, indent - 1)
|
|
326
|
+
current_embedded_kind = None
|
|
327
|
+
lines.append(f"{' ' * indent}{stripped}")
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
if stripped.startswith("<"):
|
|
331
|
+
lines.append(f"{' ' * indent}{stripped}")
|
|
332
|
+
tag_name = _html_tag_name(stripped)
|
|
333
|
+
if tag_name in {"script", "style"} and not stripped.endswith("/>"):
|
|
334
|
+
current_embedded_kind = tag_name
|
|
335
|
+
if tag_name and not stripped.endswith("/>") and tag_name not in void_tags and not stripped.startswith("<!"):
|
|
336
|
+
indent += 1
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
embedded_pretty = _pretty_embedded_block(current_embedded_kind, stripped)
|
|
340
|
+
if embedded_pretty is not None:
|
|
341
|
+
for embedded_line in embedded_pretty.splitlines():
|
|
342
|
+
if embedded_line.strip():
|
|
343
|
+
lines.append(f"{' ' * indent}{embedded_line}")
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
collapsed = " ".join(part for part in stripped.split())
|
|
347
|
+
if collapsed:
|
|
348
|
+
lines.append(f"{' ' * indent}{collapsed}")
|
|
349
|
+
|
|
350
|
+
pretty = "\n".join(lines)
|
|
351
|
+
if pretty == text:
|
|
352
|
+
return None
|
|
353
|
+
return pretty
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _html_tag_name(tag: str) -> str:
|
|
357
|
+
match = re.match(r"</?\s*([a-zA-Z0-9:_-]+)", tag)
|
|
358
|
+
if match is None:
|
|
359
|
+
return ""
|
|
360
|
+
return match.group(1).lower()
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _pretty_embedded_block(kind: str | None, text: str) -> str | None:
|
|
364
|
+
if kind == "script":
|
|
365
|
+
return _pretty_javascript(text)
|
|
366
|
+
if kind == "style":
|
|
367
|
+
return _pretty_css(text)
|
|
368
|
+
return None
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _pretty_javascript(text: str) -> str | None:
|
|
372
|
+
normalized = _pretty_braced_text(text, keep_space_before_brace=True)
|
|
373
|
+
if normalized == text:
|
|
374
|
+
return None
|
|
375
|
+
return normalized
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _pretty_css(text: str) -> str | None:
|
|
379
|
+
normalized = _pretty_braced_text(text, keep_space_before_brace=False)
|
|
380
|
+
normalized = re.sub(r"\s*\{\s*", " {\n", normalized)
|
|
381
|
+
normalized = re.sub(r";\s*", ";\n", normalized)
|
|
382
|
+
normalized = re.sub(r"\n\s*\}", "\n}", normalized)
|
|
383
|
+
normalized = _normalize_indentation(normalized)
|
|
384
|
+
if normalized == text:
|
|
385
|
+
return None
|
|
386
|
+
return normalized
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _pretty_braced_text(text: str, keep_space_before_brace: bool) -> str:
|
|
390
|
+
compact = re.sub(r"\s+", " ", text.strip())
|
|
391
|
+
if not compact:
|
|
392
|
+
return text
|
|
393
|
+
|
|
394
|
+
pieces: list[str] = []
|
|
395
|
+
indent = 0
|
|
396
|
+
index = 0
|
|
397
|
+
while index < len(compact):
|
|
398
|
+
character = compact[index]
|
|
399
|
+
if character == "{":
|
|
400
|
+
prefix = " {" if keep_space_before_brace and pieces and not pieces[-1].endswith((" ", "\n")) else "{"
|
|
401
|
+
if keep_space_before_brace and pieces and pieces[-1].endswith(" "):
|
|
402
|
+
pieces[-1] = pieces[-1].rstrip()
|
|
403
|
+
pieces.append(prefix)
|
|
404
|
+
indent += 1
|
|
405
|
+
pieces.append("\n" + " " * indent)
|
|
406
|
+
elif character == "}":
|
|
407
|
+
indent = max(0, indent - 1)
|
|
408
|
+
pieces.append("\n" + " " * indent + "}")
|
|
409
|
+
if index + 1 < len(compact) and compact[index + 1] not in ";,)}":
|
|
410
|
+
pieces.append("\n" + " " * indent)
|
|
411
|
+
elif character == ";":
|
|
412
|
+
pieces.append(";")
|
|
413
|
+
if index + 1 < len(compact):
|
|
414
|
+
pieces.append("\n" + " " * indent)
|
|
415
|
+
else:
|
|
416
|
+
pieces.append(character)
|
|
417
|
+
index += 1
|
|
418
|
+
|
|
419
|
+
return _normalize_indentation("".join(pieces))
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _normalize_indentation(text: str) -> str:
|
|
423
|
+
lines = [line.rstrip() for line in text.splitlines()]
|
|
424
|
+
compact_lines = [line for line in lines if line.strip()]
|
|
425
|
+
return "\n".join(compact_lines)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def _hexdump(body: bytes, chunk_size: int = 16) -> str:
|
|
429
|
+
lines: list[str] = []
|
|
430
|
+
for offset in range(0, len(body), chunk_size):
|
|
431
|
+
chunk = body[offset : offset + chunk_size]
|
|
432
|
+
hex_part = " ".join(f"{byte:02x}" for byte in chunk)
|
|
433
|
+
ascii_part = "".join(chr(byte) if 32 <= byte <= 126 else "." for byte in chunk)
|
|
434
|
+
lines.append(f"{offset:08x} {hex_part:<47} {ascii_part}")
|
|
435
|
+
return "\n".join(lines)
|