hexproxy 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hexproxy/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ from importlib.metadata import version, PackageNotFoundError
2
+
3
+ try:
4
+ __version__ = version("hexproxy")
5
+ except PackageNotFoundError:
6
+ # Cuando se ejecuta en desarrollo sin instalar
7
+ __version__ = "0.0.0"
hexproxy/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .app import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())
hexproxy/app.py ADDED
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import asyncio
5
+ from pathlib import Path
6
+ import sys
7
+ import threading
8
+
9
+ from .certs import CertificateAuthority, default_certificate_dir
10
+ from .extensions import PluginManager
11
+ from .preferences import ApplicationPreferences
12
+ from .proxy import HttpProxyServer
13
+ from .store import TrafficStore
14
+ from .themes import ThemeManager
15
+
16
+ try:
17
+ from .tui import ProxyTUI
18
+ _TUI_IMPORT_ERROR: Exception | None = None
19
+ except ModuleNotFoundError as exc:
20
+ if exc.name not in {"curses", "_curses"}:
21
+ raise
22
+ ProxyTUI = None # type: ignore[assignment]
23
+ _TUI_IMPORT_ERROR = exc
24
+
25
+
26
+ class ProxyRuntime:
27
+ def __init__(self, proxy: HttpProxyServer) -> None:
28
+ self.proxy = proxy
29
+ self._thread = threading.Thread(target=self._run, name="hexproxy-runtime", daemon=True)
30
+ self._ready = threading.Event()
31
+ self._stopped = threading.Event()
32
+ self._shutdown_requested = threading.Event()
33
+ self._loop: asyncio.AbstractEventLoop | None = None
34
+ self._error: Exception | None = None
35
+
36
+ def start(self) -> None:
37
+ self._thread.start()
38
+ self._ready.wait()
39
+ if self._error is not None:
40
+ raise RuntimeError("failed to start proxy runtime") from self._error
41
+
42
+ def stop(self) -> None:
43
+ self._shutdown_requested.set()
44
+ self._thread.join(timeout=5)
45
+
46
+ def run_coroutine(self, coro):
47
+ if self._loop is None:
48
+ raise RuntimeError("proxy runtime loop is not available")
49
+ future = asyncio.run_coroutine_threadsafe(coro, self._loop)
50
+ return future.result()
51
+
52
+ def _run(self) -> None:
53
+ self._loop = asyncio.new_event_loop()
54
+ asyncio.set_event_loop(self._loop)
55
+ try:
56
+ self._loop.run_until_complete(self._runner())
57
+ except Exception as exc:
58
+ self._error = exc
59
+ self._ready.set()
60
+ finally:
61
+ pending = [task for task in asyncio.all_tasks(self._loop) if not task.done()]
62
+ for task in pending:
63
+ task.cancel()
64
+ if pending:
65
+ self._loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
66
+ self._loop.run_until_complete(self._loop.shutdown_asyncgens())
67
+ self._loop.run_until_complete(self._loop.shutdown_default_executor(timeout=1))
68
+ self._loop.close()
69
+ self._stopped.set()
70
+
71
+ async def _runner(self) -> None:
72
+ try:
73
+ await self.proxy.start()
74
+ finally:
75
+ self._ready.set()
76
+ while not self._shutdown_requested.is_set():
77
+ await asyncio.sleep(0.1)
78
+ await self.proxy.stop()
79
+
80
+
81
+ def build_parser() -> argparse.ArgumentParser:
82
+ parser = argparse.ArgumentParser(description="HTTP interception proxy with a terminal UI.")
83
+ parser.add_argument("--listen-host", default="127.0.0.1", help="Host interface to bind the proxy to.")
84
+ parser.add_argument("--listen-port", default=8080, type=int, help="Port to bind the proxy to.")
85
+ parser.add_argument(
86
+ "--project",
87
+ type=Path,
88
+ help="Project file used to load and autosave captured traffic.",
89
+ )
90
+ parser.add_argument(
91
+ "--plugin-dir",
92
+ type=Path,
93
+ action="append",
94
+ default=[],
95
+ help="Directory that contains HexProxy extension plugins.",
96
+ )
97
+ parser.add_argument(
98
+ "--cert-dir",
99
+ type=Path,
100
+ default=default_certificate_dir(),
101
+ help="Directory used to store the generated local CA and leaf certificates.",
102
+ )
103
+ parser.add_argument(
104
+ "--config-file",
105
+ type=Path,
106
+ help="Global configuration file used for persistent application preferences.",
107
+ )
108
+ return parser
109
+
110
+
111
+ def main(argv: list[str] | None = None) -> int:
112
+ args = build_parser().parse_args(argv)
113
+ if ProxyTUI is None:
114
+ if sys.platform.startswith("win"):
115
+ print(
116
+ "hexproxy: terminal UI is unavailable. Install the Windows TUI dependency with "
117
+ "`pip install windows-curses` and try again.",
118
+ file=sys.stderr,
119
+ )
120
+ else:
121
+ print(f"hexproxy: failed to import curses support: {_TUI_IMPORT_ERROR}", file=sys.stderr)
122
+ return 1
123
+ store = TrafficStore()
124
+ preferences = ApplicationPreferences(args.config_file)
125
+ try:
126
+ preferences.load()
127
+ except Exception as exc:
128
+ print(f"hexproxy: failed to load config: {exc}", file=sys.stderr)
129
+ plugin_manager = PluginManager()
130
+ plugin_dirs = [Path("plugins"), *args.plugin_dir]
131
+ plugin_manager.load_from_dirs(plugin_dirs)
132
+ theme_manager = ThemeManager()
133
+ theme_manager.load()
134
+ certificate_authority = CertificateAuthority(args.cert_dir)
135
+ if args.project is not None:
136
+ if args.project.exists():
137
+ store.load(args.project)
138
+ else:
139
+ store.set_project_path(args.project)
140
+ store.save()
141
+ proxy = HttpProxyServer(
142
+ store=store,
143
+ listen_host=args.listen_host,
144
+ listen_port=args.listen_port,
145
+ plugins=plugin_manager,
146
+ certificate_authority=certificate_authority,
147
+ )
148
+ plugin_manager.bind_runtime(
149
+ store=store,
150
+ preferences=preferences,
151
+ theme_manager=theme_manager,
152
+ )
153
+ runtime = ProxyRuntime(proxy)
154
+ try:
155
+ runtime.start()
156
+ except Exception as exc:
157
+ print(f"hexproxy: {exc}", file=sys.stderr)
158
+ return 1
159
+
160
+ tui = ProxyTUI(
161
+ store=store,
162
+ listen_host=proxy.listen_host,
163
+ listen_port=proxy.listen_port,
164
+ certificate_authority=certificate_authority,
165
+ plugin_manager=plugin_manager,
166
+ theme_manager=theme_manager,
167
+ repeater_sender=lambda raw_request: runtime.run_coroutine(proxy.replay_request(raw_request)),
168
+ initial_keybindings=preferences.keybindings(),
169
+ keybinding_saver=lambda bindings: (preferences.set_keybindings(bindings), preferences.save()),
170
+ initial_theme_name=preferences.theme_name(),
171
+ theme_saver=lambda theme_name: (preferences.set_theme_name(theme_name), preferences.save()),
172
+ )
173
+ if proxy.startup_notice:
174
+ tui._set_status(proxy.startup_notice)
175
+ try:
176
+ tui.run()
177
+ except KeyboardInterrupt:
178
+ pass
179
+ finally:
180
+ try:
181
+ runtime.stop()
182
+ except KeyboardInterrupt:
183
+ pass
184
+ try:
185
+ preferences.set_keybindings(tui.custom_keybindings())
186
+ preferences.set_theme_name(tui.theme_name())
187
+ preferences.save()
188
+ if args.project is not None:
189
+ store.save()
190
+ except KeyboardInterrupt:
191
+ pass
192
+ return 0
hexproxy/bodyview.py ADDED
@@ -0,0 +1,435 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import gzip
5
+ import json
6
+ import re
7
+ import zlib
8
+ from urllib.parse import parse_qsl
9
+ from xml.dom import minidom
10
+
11
+ from .models import HeaderList
12
+
13
+ try:
14
+ import brotli # type: ignore
15
+ except ImportError:
16
+ brotli = None
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class BodyDocument:
21
+ media_type: str
22
+ kind: str
23
+ display_name: str
24
+ raw_text: str
25
+ pretty_text: str | None
26
+ pretty_available: bool
27
+ is_binary: bool
28
+ encoding_summary: str
29
+
30
+
31
+ def build_body_document(headers: HeaderList, body: bytes) -> BodyDocument:
32
+ if not body:
33
+ return BodyDocument(
34
+ media_type="-",
35
+ kind="empty",
36
+ display_name="Empty",
37
+ raw_text="No body.",
38
+ pretty_text=None,
39
+ pretty_available=False,
40
+ is_binary=False,
41
+ encoding_summary="identity",
42
+ )
43
+
44
+ media_type = _extract_media_type(headers)
45
+ charset = _extract_charset(headers)
46
+ transfer_encodings = _extract_transfer_encodings(headers)
47
+ content_encodings = _extract_content_encodings(headers)
48
+ normalized_body, encoding_summary, fully_decoded = _normalize_body(body, transfer_encodings, content_encodings)
49
+ kind = _detect_kind(media_type, normalized_body if fully_decoded else b"")
50
+ display_name = _display_name(kind, media_type)
51
+
52
+ if not fully_decoded and content_encodings:
53
+ raw_text = _hexdump(body)
54
+ return BodyDocument(
55
+ media_type=media_type or "application/octet-stream",
56
+ kind="binary",
57
+ display_name=f"{display_name} (encoded)",
58
+ raw_text=raw_text,
59
+ pretty_text=None,
60
+ pretty_available=False,
61
+ is_binary=True,
62
+ encoding_summary=encoding_summary,
63
+ )
64
+
65
+ if kind == "binary":
66
+ raw_text = _hexdump(normalized_body)
67
+ return BodyDocument(
68
+ media_type=media_type or "application/octet-stream",
69
+ kind=kind,
70
+ display_name=display_name,
71
+ raw_text=raw_text,
72
+ pretty_text=None,
73
+ pretty_available=False,
74
+ is_binary=True,
75
+ encoding_summary=encoding_summary,
76
+ )
77
+
78
+ text = _decode_body(normalized_body, charset)
79
+ pretty_text = _pretty_text(kind, text)
80
+ return BodyDocument(
81
+ media_type=media_type or "text/plain",
82
+ kind=kind,
83
+ display_name=display_name,
84
+ raw_text=text,
85
+ pretty_text=pretty_text,
86
+ pretty_available=pretty_text is not None and pretty_text != text,
87
+ is_binary=False,
88
+ encoding_summary=encoding_summary,
89
+ )
90
+
91
+
92
+ def normalize_http_body(headers: HeaderList, body: bytes) -> tuple[bytes, str, bool]:
93
+ transfer_encodings = _extract_transfer_encodings(headers)
94
+ content_encodings = _extract_content_encodings(headers)
95
+ return _normalize_body(body, transfer_encodings, content_encodings)
96
+
97
+
98
+ def _extract_media_type(headers: HeaderList) -> str:
99
+ for name, value in headers:
100
+ if name.lower() != "content-type":
101
+ continue
102
+ return value.split(";", 1)[0].strip().lower()
103
+ return ""
104
+
105
+
106
+ def _extract_charset(headers: HeaderList) -> str | None:
107
+ for name, value in headers:
108
+ if name.lower() != "content-type":
109
+ continue
110
+ for part in value.split(";")[1:]:
111
+ key, _, raw_value = part.partition("=")
112
+ if key.strip().lower() == "charset" and raw_value.strip():
113
+ return raw_value.strip().strip('"').strip("'")
114
+ return None
115
+
116
+
117
+ def _extract_transfer_encodings(headers: HeaderList) -> list[str]:
118
+ for name, value in headers:
119
+ if name.lower() != "transfer-encoding":
120
+ continue
121
+ return [item.strip().lower() for item in value.split(",") if item.strip()]
122
+ return []
123
+
124
+
125
+ def _extract_content_encodings(headers: HeaderList) -> list[str]:
126
+ for name, value in headers:
127
+ if name.lower() != "content-encoding":
128
+ continue
129
+ return [item.strip().lower() for item in value.split(",") if item.strip()]
130
+ return []
131
+
132
+
133
+ def _detect_kind(media_type: str, body: bytes) -> str:
134
+ if media_type in {"application/json", "text/json"} or media_type.endswith("+json"):
135
+ return "json"
136
+ if media_type in {"application/xml", "text/xml"} or media_type.endswith("+xml"):
137
+ return "xml"
138
+ if media_type == "text/html":
139
+ return "html"
140
+ if media_type == "application/x-www-form-urlencoded":
141
+ return "form"
142
+ if media_type in {"application/javascript", "text/javascript"}:
143
+ return "javascript"
144
+ if media_type == "text/css":
145
+ return "css"
146
+ if media_type.startswith("text/"):
147
+ return "text"
148
+
149
+ sample = body[:512].lstrip()
150
+ if sample.startswith((b"{", b"[")):
151
+ return "json"
152
+ if sample.startswith((b"<?xml", b"<")):
153
+ lowered = sample.lower()
154
+ if lowered.startswith((b"<!doctype html", b"<html")):
155
+ return "html"
156
+ return "xml"
157
+ if _looks_like_text(body):
158
+ return "text"
159
+ return "binary"
160
+
161
+
162
+ def _display_name(kind: str, media_type: str) -> str:
163
+ mapping = {
164
+ "empty": "Empty",
165
+ "json": "JSON",
166
+ "xml": "XML",
167
+ "html": "HTML",
168
+ "form": "Form URL Encoded",
169
+ "javascript": "JavaScript",
170
+ "css": "CSS",
171
+ "text": "Text",
172
+ "binary": "Binary",
173
+ }
174
+ return mapping.get(kind, media_type or "Unknown")
175
+
176
+
177
+ def _decode_body(body: bytes, charset: str | None) -> str:
178
+ if charset:
179
+ try:
180
+ return body.decode(charset, errors="replace")
181
+ except LookupError:
182
+ pass
183
+ try:
184
+ return body.decode("utf-8")
185
+ except UnicodeDecodeError:
186
+ return body.decode("iso-8859-1", errors="replace")
187
+
188
+
189
+ def _normalize_body(body: bytes, transfer_encodings: list[str], content_encodings: list[str]) -> tuple[bytes, str, bool]:
190
+ normalized = body
191
+ notes: list[str] = []
192
+ fully_decoded = True
193
+
194
+ if "chunked" in transfer_encodings:
195
+ try:
196
+ normalized = _decode_chunked_body(normalized)
197
+ notes.append("chunked decoded")
198
+ except Exception:
199
+ notes.append("chunked undecoded")
200
+ fully_decoded = False
201
+
202
+ for encoding in reversed(content_encodings):
203
+ try:
204
+ normalized = _decode_content_encoding(normalized, encoding)
205
+ notes.append(f"{encoding} decoded")
206
+ except Exception:
207
+ notes.append(f"{encoding} unsupported")
208
+ fully_decoded = False
209
+ break
210
+
211
+ if not notes:
212
+ notes.append("identity")
213
+ return normalized, ", ".join(notes), fully_decoded
214
+
215
+
216
+ def _decode_content_encoding(body: bytes, encoding: str) -> bytes:
217
+ if encoding in {"gzip", "x-gzip"}:
218
+ return gzip.decompress(body)
219
+ if encoding == "deflate":
220
+ try:
221
+ return zlib.decompress(body)
222
+ except zlib.error:
223
+ return zlib.decompress(body, -zlib.MAX_WBITS)
224
+ if encoding == "br":
225
+ if brotli is None:
226
+ raise ValueError("brotli dependency is not installed")
227
+ return brotli.decompress(body)
228
+ if encoding in {"identity", ""}:
229
+ return body
230
+ raise ValueError(f"unsupported content encoding: {encoding}")
231
+
232
+
233
+ def _decode_chunked_body(body: bytes) -> bytes:
234
+ decoded = bytearray()
235
+ index = 0
236
+ total = len(body)
237
+
238
+ while True:
239
+ line_end = body.find(b"\r\n", index)
240
+ if line_end < 0:
241
+ raise ValueError("invalid chunked body: missing chunk size delimiter")
242
+ size_line = body[index:line_end]
243
+ chunk_size = int(size_line.split(b";", 1)[0].strip(), 16)
244
+ index = line_end + 2
245
+ if chunk_size == 0:
246
+ trailer_end = body.find(b"\r\n", index)
247
+ if trailer_end < 0:
248
+ raise ValueError("invalid chunked body: missing chunk trailer terminator")
249
+ break
250
+ if index + chunk_size + 2 > total:
251
+ raise ValueError("invalid chunked body: truncated chunk")
252
+ decoded.extend(body[index : index + chunk_size])
253
+ index += chunk_size
254
+ if body[index : index + 2] != b"\r\n":
255
+ raise ValueError("invalid chunked body: missing chunk terminator")
256
+ index += 2
257
+ return bytes(decoded)
258
+
259
+
260
+ def _pretty_text(kind: str, text: str) -> str | None:
261
+ try:
262
+ if kind == "json":
263
+ return json.dumps(json.loads(text), indent=2, ensure_ascii=False)
264
+ if kind == "xml":
265
+ parsed = minidom.parseString(text.encode("utf-8"))
266
+ return parsed.toprettyxml(indent=" ")
267
+ if kind == "html":
268
+ return _pretty_html(text)
269
+ if kind == "javascript":
270
+ return _pretty_javascript(text)
271
+ if kind == "css":
272
+ return _pretty_css(text)
273
+ if kind == "form":
274
+ pairs = parse_qsl(text, keep_blank_values=True)
275
+ if not pairs:
276
+ return None
277
+ return "\n".join(f"{key} = {value}" for key, value in pairs)
278
+ except Exception:
279
+ return None
280
+ return None
281
+
282
+
283
+ def _looks_like_text(body: bytes) -> bool:
284
+ sample = body[:512]
285
+ if not sample:
286
+ return True
287
+ allowed = 0
288
+ for byte in sample:
289
+ if byte in {9, 10, 13} or 32 <= byte <= 126:
290
+ allowed += 1
291
+ return (allowed / len(sample)) >= 0.85
292
+
293
+
294
+ def _pretty_html(text: str) -> str | None:
295
+ tokens = re.findall(r"<!--.*?-->|<![^>]*>|</?[^>]+>|[^<]+", text, flags=re.DOTALL)
296
+ if len(tokens) <= 1:
297
+ return None
298
+
299
+ lines: list[str] = []
300
+ indent = 0
301
+ current_embedded_kind: str | None = None
302
+ void_tags = {
303
+ "area",
304
+ "base",
305
+ "br",
306
+ "col",
307
+ "embed",
308
+ "hr",
309
+ "img",
310
+ "input",
311
+ "link",
312
+ "meta",
313
+ "param",
314
+ "source",
315
+ "track",
316
+ "wbr",
317
+ }
318
+
319
+ for token in tokens:
320
+ stripped = token.strip()
321
+ if not stripped:
322
+ continue
323
+
324
+ if stripped.startswith("</"):
325
+ indent = max(0, indent - 1)
326
+ current_embedded_kind = None
327
+ lines.append(f"{' ' * indent}{stripped}")
328
+ continue
329
+
330
+ if stripped.startswith("<"):
331
+ lines.append(f"{' ' * indent}{stripped}")
332
+ tag_name = _html_tag_name(stripped)
333
+ if tag_name in {"script", "style"} and not stripped.endswith("/>"):
334
+ current_embedded_kind = tag_name
335
+ if tag_name and not stripped.endswith("/>") and tag_name not in void_tags and not stripped.startswith("<!"):
336
+ indent += 1
337
+ continue
338
+
339
+ embedded_pretty = _pretty_embedded_block(current_embedded_kind, stripped)
340
+ if embedded_pretty is not None:
341
+ for embedded_line in embedded_pretty.splitlines():
342
+ if embedded_line.strip():
343
+ lines.append(f"{' ' * indent}{embedded_line}")
344
+ continue
345
+
346
+ collapsed = " ".join(part for part in stripped.split())
347
+ if collapsed:
348
+ lines.append(f"{' ' * indent}{collapsed}")
349
+
350
+ pretty = "\n".join(lines)
351
+ if pretty == text:
352
+ return None
353
+ return pretty
354
+
355
+
356
+ def _html_tag_name(tag: str) -> str:
357
+ match = re.match(r"</?\s*([a-zA-Z0-9:_-]+)", tag)
358
+ if match is None:
359
+ return ""
360
+ return match.group(1).lower()
361
+
362
+
363
+ def _pretty_embedded_block(kind: str | None, text: str) -> str | None:
364
+ if kind == "script":
365
+ return _pretty_javascript(text)
366
+ if kind == "style":
367
+ return _pretty_css(text)
368
+ return None
369
+
370
+
371
+ def _pretty_javascript(text: str) -> str | None:
372
+ normalized = _pretty_braced_text(text, keep_space_before_brace=True)
373
+ if normalized == text:
374
+ return None
375
+ return normalized
376
+
377
+
378
+ def _pretty_css(text: str) -> str | None:
379
+ normalized = _pretty_braced_text(text, keep_space_before_brace=False)
380
+ normalized = re.sub(r"\s*\{\s*", " {\n", normalized)
381
+ normalized = re.sub(r";\s*", ";\n", normalized)
382
+ normalized = re.sub(r"\n\s*\}", "\n}", normalized)
383
+ normalized = _normalize_indentation(normalized)
384
+ if normalized == text:
385
+ return None
386
+ return normalized
387
+
388
+
389
+ def _pretty_braced_text(text: str, keep_space_before_brace: bool) -> str:
390
+ compact = re.sub(r"\s+", " ", text.strip())
391
+ if not compact:
392
+ return text
393
+
394
+ pieces: list[str] = []
395
+ indent = 0
396
+ index = 0
397
+ while index < len(compact):
398
+ character = compact[index]
399
+ if character == "{":
400
+ prefix = " {" if keep_space_before_brace and pieces and not pieces[-1].endswith((" ", "\n")) else "{"
401
+ if keep_space_before_brace and pieces and pieces[-1].endswith(" "):
402
+ pieces[-1] = pieces[-1].rstrip()
403
+ pieces.append(prefix)
404
+ indent += 1
405
+ pieces.append("\n" + " " * indent)
406
+ elif character == "}":
407
+ indent = max(0, indent - 1)
408
+ pieces.append("\n" + " " * indent + "}")
409
+ if index + 1 < len(compact) and compact[index + 1] not in ";,)}":
410
+ pieces.append("\n" + " " * indent)
411
+ elif character == ";":
412
+ pieces.append(";")
413
+ if index + 1 < len(compact):
414
+ pieces.append("\n" + " " * indent)
415
+ else:
416
+ pieces.append(character)
417
+ index += 1
418
+
419
+ return _normalize_indentation("".join(pieces))
420
+
421
+
422
+ def _normalize_indentation(text: str) -> str:
423
+ lines = [line.rstrip() for line in text.splitlines()]
424
+ compact_lines = [line for line in lines if line.strip()]
425
+ return "\n".join(compact_lines)
426
+
427
+
428
+ def _hexdump(body: bytes, chunk_size: int = 16) -> str:
429
+ lines: list[str] = []
430
+ for offset in range(0, len(body), chunk_size):
431
+ chunk = body[offset : offset + chunk_size]
432
+ hex_part = " ".join(f"{byte:02x}" for byte in chunk)
433
+ ascii_part = "".join(chr(byte) if 32 <= byte <= 126 else "." for byte in chunk)
434
+ lines.append(f"{offset:08x} {hex_part:<47} {ascii_part}")
435
+ return "\n".join(lines)