runspec-webops-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runspec_webops_core-0.1.0/.gitignore +61 -0
- runspec_webops_core-0.1.0/CHANGELOG.md +34 -0
- runspec_webops_core-0.1.0/PKG-INFO +13 -0
- runspec_webops_core-0.1.0/README.md +41 -0
- runspec_webops_core-0.1.0/pyproject.toml +36 -0
- runspec_webops_core-0.1.0/runspec_webops_core/__init__.py +67 -0
- runspec_webops_core-0.1.0/runspec_webops_core/dns.py +65 -0
- runspec_webops_core-0.1.0/runspec_webops_core/errors.py +19 -0
- runspec_webops_core-0.1.0/runspec_webops_core/http.py +224 -0
- runspec_webops_core-0.1.0/runspec_webops_core/openapi.py +317 -0
- runspec_webops_core-0.1.0/runspec_webops_core/tls.py +270 -0
- runspec_webops_core-0.1.0/tests/__init__.py +0 -0
- runspec_webops_core-0.1.0/tests/test_dns.py +31 -0
- runspec_webops_core-0.1.0/tests/test_http.py +100 -0
- runspec_webops_core-0.1.0/tests/test_openapi.py +126 -0
- runspec_webops_core-0.1.0/tests/test_tls.py +104 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
.Python
|
|
7
|
+
*.egg
|
|
8
|
+
*.egg-info/
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
.eggs/
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
.env
|
|
16
|
+
pip-wheel-metadata/
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.ruff_cache/
|
|
20
|
+
htmlcov/
|
|
21
|
+
.coverage
|
|
22
|
+
coverage.xml
|
|
23
|
+
*.cover
|
|
24
|
+
|
|
25
|
+
# Node
|
|
26
|
+
node_modules/
|
|
27
|
+
dist/
|
|
28
|
+
*.js.map
|
|
29
|
+
.npm
|
|
30
|
+
|
|
31
|
+
# Go
|
|
32
|
+
*.exe
|
|
33
|
+
*.test
|
|
34
|
+
*.out
|
|
35
|
+
vendor/
|
|
36
|
+
|
|
37
|
+
# IDE
|
|
38
|
+
.idea/
|
|
39
|
+
.vscode/
|
|
40
|
+
*.iml
|
|
41
|
+
*.iws
|
|
42
|
+
*.ipr
|
|
43
|
+
.DS_Store
|
|
44
|
+
Thumbs.db
|
|
45
|
+
|
|
46
|
+
# Docs
|
|
47
|
+
site/
|
|
48
|
+
|
|
49
|
+
# Misc
|
|
50
|
+
*.log
|
|
51
|
+
*.tmp
|
|
52
|
+
|
|
53
|
+
# External reference repos (cloned locally, not committed)
|
|
54
|
+
chainlit-docs/
|
|
55
|
+
.chainlit/
|
|
56
|
+
|
|
57
|
+
# Claude Code local config (machine-specific)
|
|
58
|
+
.claude/launch.json
|
|
59
|
+
|
|
60
|
+
# Stray committed test venv (removed from tracking)
|
|
61
|
+
.venv-test/
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# runspec-webops-core Changelog
|
|
2
|
+
|
|
3
|
+
## [0.1.0] — 2026-06-22
|
|
4
|
+
|
|
5
|
+
Initial release.
|
|
6
|
+
|
|
7
|
+
Pure-Python HTTPS/TLS + web-endpoint helpers — the importable logic core behind
|
|
8
|
+
the cross-platform web runnables in `runspec-linux` and `runspec-windows`. Stdlib
|
|
9
|
+
only (`ssl` / `socket` / `http.client`); **no `runspec` dependency, no
|
|
10
|
+
`runspec.toml`, and no entry points**, so it surfaces zero runnables and stays
|
|
11
|
+
invisible to `runspec local` / `runspec serve` discovery.
|
|
12
|
+
|
|
13
|
+
Public API (every function returns plain data and raises on failure — see
|
|
14
|
+
`runspec_webops_core.errors`):
|
|
15
|
+
|
|
16
|
+
- **TLS** — `connect_peer(host, port, timeout)` opens an inspection connection
|
|
17
|
+
(verification disabled, so it still reports on expired/self-signed endpoints)
|
|
18
|
+
and a second verifying connection for the trust verdict; `leaf_report`,
|
|
19
|
+
`chain_report`, `days_until`, `cert_status` are the pure reporting helpers.
|
|
20
|
+
The certificate chain is read via `SSLSocket.get_unverified_chain()` (Python
|
|
21
|
+
3.13+), the underlying `_sslobj` method on 3.10–3.12, or the leaf-only
|
|
22
|
+
fallback when neither is available.
|
|
23
|
+
- **HTTP** — `probe` (single request, no redirect following), `classify`,
|
|
24
|
+
`fetch_headers` (with a security-header report), `trace_redirects` (walks the
|
|
25
|
+
redirect chain with loop detection).
|
|
26
|
+
- **DNS** — `resolve(host, want_reverse=...)` for forward (A/AAAA + canonical
|
|
27
|
+
name) and reverse (PTR) lookups via `socket`.
|
|
28
|
+
- **OpenAPI/Swagger** — `load_document` (fetch a URL or read a file),
|
|
29
|
+
`parse_document`, `summarize_spec` (bounded overview: info, servers,
|
|
30
|
+
operations) and `describe_operation` (one operation's params, request body and
|
|
31
|
+
responses, with `$ref`s resolved to bounded schema summaries). Understands
|
|
32
|
+
OpenAPI 3.x and Swagger 2.0. JSON parses with the stdlib; YAML needs the
|
|
33
|
+
optional `yaml` extra (`pip install runspec-webops-core[yaml]`). `fetch_body`
|
|
34
|
+
is the redirect-following, size-capped GET behind it.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: runspec-webops-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pure-Python HTTPS/TLS certificate + web-endpoint helpers — the importable core behind the runspec web runnables (no runspec dependency, no runnables)
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Provides-Extra: dev
|
|
7
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
8
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
9
|
+
Requires-Dist: pyyaml>=6.0; extra == 'dev'
|
|
10
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
11
|
+
Requires-Dist: types-pyyaml; extra == 'dev'
|
|
12
|
+
Provides-Extra: yaml
|
|
13
|
+
Requires-Dist: pyyaml>=6.0; extra == 'yaml'
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# runspec-webops-core
|
|
2
|
+
|
|
3
|
+
Pure-Python HTTPS/TLS certificate + web-endpoint helpers — the importable logic
|
|
4
|
+
core behind the cross-platform web runnables in
|
|
5
|
+
[`runspec-linux`](../runspec-linux) and [`runspec-windows`](../runspec-windows).
|
|
6
|
+
|
|
7
|
+
Stdlib only (`ssl` / `socket` / `http.client`) — **no `runspec` dependency, no
|
|
8
|
+
`runspec.toml`, and no entry points**, so installing it exposes the helper
|
|
9
|
+
functions for import without surfacing any runnables (it is invisible to
|
|
10
|
+
`runspec local` / `runspec serve` discovery).
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
from runspec_webops_core import connect_peer, leaf_report, probe, resolve
|
|
14
|
+
|
|
15
|
+
peer = connect_peer("example.com", 443, 10)
|
|
16
|
+
print(leaf_report(peer, warn_days=30)) # days-to-expiry, issuer, SANs, trust
|
|
17
|
+
print(probe("https://example.com", 10)) # status, timing, redirect target
|
|
18
|
+
print(resolve("example.com", want_reverse=True))
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
| Function | What it does |
|
|
22
|
+
|---|---|
|
|
23
|
+
| `connect_peer` / `leaf_report` / `chain_report` | TLS certificate inspection — expiry, issuer, SANs, fingerprint, full chain, trust result |
|
|
24
|
+
| `probe` / `classify` | Single HTTP(S) request — status, response time, redirect target |
|
|
25
|
+
| `fetch_headers` | Response headers + a security-header report (HSTS, CSP, …) |
|
|
26
|
+
| `trace_redirects` | Walk the redirect chain, hop by hop, with loop detection |
|
|
27
|
+
| `resolve` | Forward (A/AAAA) and reverse (PTR) DNS lookups |
|
|
28
|
+
| `load_document` / `summarize_spec` / `describe_operation` | Read an OpenAPI/Swagger spec (URL or file) and condense its shape — operations, params, request/response schemas — to bounded JSON |
|
|
29
|
+
|
|
30
|
+
OpenAPI/Swagger specs in JSON parse with the stdlib; YAML needs the optional
|
|
31
|
+
`yaml` extra (`pip install runspec-webops-core[yaml]`).
|
|
32
|
+
|
|
33
|
+
Each function returns plain data and *raises* on failure (`ConnectError`,
|
|
34
|
+
`ResolveError` — both subclasses of `WebopsCoreError`).
|
|
35
|
+
|
|
36
|
+
## Corporate wrapping
|
|
37
|
+
|
|
38
|
+
Depend on `runspec-webops-core` alone, import the helpers, and ship your own
|
|
39
|
+
private package with its own `runspec.toml` that bakes in corporate defaults
|
|
40
|
+
(proxies, allowed hosts, redaction) as plain params — only your wrapped
|
|
41
|
+
runnables surface in the venv, the generic published code stays clean.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "runspec-webops-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
|
+
description = "Pure-Python HTTPS/TLS certificate + web-endpoint helpers — the importable core behind the runspec web runnables (no runspec dependency, no runnables)"
|
|
10
|
+
dependencies = []
|
|
11
|
+
|
|
12
|
+
[project.optional-dependencies]
|
|
13
|
+
# Optional: parse YAML OpenAPI/Swagger specs. JSON specs need no extra.
|
|
14
|
+
yaml = [
|
|
15
|
+
"pyyaml>=6.0",
|
|
16
|
+
]
|
|
17
|
+
dev = [
|
|
18
|
+
"ruff",
|
|
19
|
+
"mypy",
|
|
20
|
+
"pytest>=8.0",
|
|
21
|
+
"pyyaml>=6.0",
|
|
22
|
+
"types-pyyaml",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.pytest.ini_options]
|
|
26
|
+
testpaths = ["tests"]
|
|
27
|
+
|
|
28
|
+
[tool.mypy]
|
|
29
|
+
python_version = "3.10"
|
|
30
|
+
|
|
31
|
+
[tool.ruff]
|
|
32
|
+
line-length = 200
|
|
33
|
+
target-version = "py310"
|
|
34
|
+
|
|
35
|
+
[tool.ruff.lint]
|
|
36
|
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""runspec-webops-core — pure-Python HTTPS/TLS + web-endpoint helpers.
|
|
2
|
+
|
|
3
|
+
This package has **no dependency on runspec** and ships **no runspec.toml and no
|
|
4
|
+
entry points**, so installing it exposes the helper functions for import without
|
|
5
|
+
surfacing any runnables (it is invisible to ``runspec local`` / ``runspec serve``
|
|
6
|
+
discovery). ``runspec-linux`` and ``runspec-windows`` both depend on it and wrap
|
|
7
|
+
each helper in a cross-platform runnable; a private (e.g. Nexus-hosted) package
|
|
8
|
+
can instead import these helpers directly and ship its own runnables.
|
|
9
|
+
|
|
10
|
+
Everything is stdlib only (``ssl`` / ``socket`` / ``http.client``). Each function
|
|
11
|
+
returns plain data and *raises* on failure (see ``runspec_webops_core.errors``).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from runspec_webops_core.dns import resolve
|
|
15
|
+
from runspec_webops_core.errors import ConnectError, ResolveError, WebopsCoreError
|
|
16
|
+
from runspec_webops_core.http import (
|
|
17
|
+
classify,
|
|
18
|
+
fetch_body,
|
|
19
|
+
fetch_headers,
|
|
20
|
+
probe,
|
|
21
|
+
trace_redirects,
|
|
22
|
+
)
|
|
23
|
+
from runspec_webops_core.openapi import (
|
|
24
|
+
SpecError,
|
|
25
|
+
describe_operation,
|
|
26
|
+
load_document,
|
|
27
|
+
parse_document,
|
|
28
|
+
summarize_spec,
|
|
29
|
+
)
|
|
30
|
+
from runspec_webops_core.tls import (
|
|
31
|
+
CertInfo,
|
|
32
|
+
TlsPeer,
|
|
33
|
+
cert_status,
|
|
34
|
+
chain_report,
|
|
35
|
+
connect_peer,
|
|
36
|
+
days_until,
|
|
37
|
+
leaf_report,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
# errors
|
|
42
|
+
"WebopsCoreError",
|
|
43
|
+
"ConnectError",
|
|
44
|
+
"ResolveError",
|
|
45
|
+
"SpecError",
|
|
46
|
+
# tls
|
|
47
|
+
"CertInfo",
|
|
48
|
+
"TlsPeer",
|
|
49
|
+
"connect_peer",
|
|
50
|
+
"leaf_report",
|
|
51
|
+
"chain_report",
|
|
52
|
+
"days_until",
|
|
53
|
+
"cert_status",
|
|
54
|
+
# http
|
|
55
|
+
"probe",
|
|
56
|
+
"classify",
|
|
57
|
+
"fetch_headers",
|
|
58
|
+
"fetch_body",
|
|
59
|
+
"trace_redirects",
|
|
60
|
+
# dns
|
|
61
|
+
"resolve",
|
|
62
|
+
# openapi
|
|
63
|
+
"load_document",
|
|
64
|
+
"parse_document",
|
|
65
|
+
"summarize_spec",
|
|
66
|
+
"describe_operation",
|
|
67
|
+
]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""DNS resolution — forward (A/AAAA) and reverse (PTR) via stdlib ``socket``."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import socket
|
|
6
|
+
|
|
7
|
+
from runspec_webops_core.errors import ResolveError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _is_ip(value: str) -> bool:
|
|
11
|
+
for family in (socket.AF_INET, socket.AF_INET6):
|
|
12
|
+
try:
|
|
13
|
+
socket.inet_pton(family, value)
|
|
14
|
+
return True
|
|
15
|
+
except OSError:
|
|
16
|
+
continue
|
|
17
|
+
return False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _reverse(ip: str) -> list[str]:
|
|
21
|
+
"""PTR names for an address. Returns ``[]`` when there is no reverse record."""
|
|
22
|
+
try:
|
|
23
|
+
name, aliases, _ = socket.gethostbyaddr(ip)
|
|
24
|
+
except OSError:
|
|
25
|
+
return []
|
|
26
|
+
return [name, *aliases]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def resolve(host: str, want_reverse: bool = False) -> dict:
|
|
30
|
+
"""Resolve ``host``.
|
|
31
|
+
|
|
32
|
+
A bare IP yields its PTR records. A name yields its A/AAAA addresses and
|
|
33
|
+
canonical name; with ``want_reverse`` each address is also reverse-resolved.
|
|
34
|
+
Raises :class:`ResolveError` when the name cannot be resolved.
|
|
35
|
+
"""
|
|
36
|
+
if _is_ip(host):
|
|
37
|
+
return {"host": host, "is_ip": True, "reverse": {host: _reverse(host)}}
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
infos = socket.getaddrinfo(host, None, proto=socket.IPPROTO_TCP, flags=socket.AI_CANONNAME)
|
|
41
|
+
except socket.gaierror as e:
|
|
42
|
+
raise ResolveError(f"could not resolve {host!r}: {e}") from e
|
|
43
|
+
|
|
44
|
+
addresses: list[dict] = []
|
|
45
|
+
seen: set[tuple[int, str]] = set()
|
|
46
|
+
canonical = ""
|
|
47
|
+
for family, _type, _proto, canonname, sockaddr in infos:
|
|
48
|
+
if canonname:
|
|
49
|
+
canonical = canonname
|
|
50
|
+
ip = str(sockaddr[0])
|
|
51
|
+
key = (int(family), ip)
|
|
52
|
+
if key in seen:
|
|
53
|
+
continue
|
|
54
|
+
seen.add(key)
|
|
55
|
+
addresses.append({"ip": ip, "family": "ipv6" if family == socket.AF_INET6 else "ipv4"})
|
|
56
|
+
|
|
57
|
+
result: dict = {
|
|
58
|
+
"host": host,
|
|
59
|
+
"is_ip": False,
|
|
60
|
+
"addresses": addresses,
|
|
61
|
+
"canonical_name": canonical or None,
|
|
62
|
+
}
|
|
63
|
+
if want_reverse:
|
|
64
|
+
result["reverse"] = {entry["ip"]: _reverse(entry["ip"]) for entry in addresses}
|
|
65
|
+
return result
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Exceptions raised by the webops helpers.
|
|
2
|
+
|
|
3
|
+
The wrappers in ``runspec-linux`` / ``runspec-windows`` catch these to reproduce
|
|
4
|
+
the CLI/agent behaviour (a connect failure exits differently from a bad result).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class WebopsCoreError(Exception):
|
|
11
|
+
"""Base class for all runspec-webops-core failures."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ConnectError(WebopsCoreError):
|
|
15
|
+
"""A TLS/HTTP connection could not be established (refused, timed out, reset)."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ResolveError(WebopsCoreError):
|
|
19
|
+
"""A DNS name could not be resolved."""
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""HTTP(S) endpoint probing — stdlib ``http.client`` / ``ssl`` only.
|
|
2
|
+
|
|
3
|
+
A single request is issued and its status / timing / a few headers are captured;
|
|
4
|
+
the body is drained, not buffered. ``classify`` and the security-header analysis
|
|
5
|
+
are pure. Redirects are *not* followed by :func:`probe` — :func:`trace_redirects`
|
|
6
|
+
walks them explicitly so each hop is visible.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import http.client
|
|
12
|
+
import socket
|
|
13
|
+
import ssl
|
|
14
|
+
import time
|
|
15
|
+
from urllib.parse import urljoin, urlsplit
|
|
16
|
+
|
|
17
|
+
from runspec_webops_core.errors import ConnectError
|
|
18
|
+
|
|
19
|
+
_USER_AGENT = "runspec-webops"
|
|
20
|
+
|
|
21
|
+
# Response security headers worth surfacing — present value or flagged as missing.
|
|
22
|
+
SECURITY_HEADERS: dict[str, str] = {
|
|
23
|
+
"strict-transport-security": "strict_transport_security",
|
|
24
|
+
"content-security-policy": "content_security_policy",
|
|
25
|
+
"x-frame-options": "x_frame_options",
|
|
26
|
+
"x-content-type-options": "x_content_type_options",
|
|
27
|
+
"referrer-policy": "referrer_policy",
|
|
28
|
+
"permissions-policy": "permissions_policy",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def classify(status: int) -> str:
|
|
33
|
+
"""Bucket an HTTP status code. Pure."""
|
|
34
|
+
if 200 <= status < 300:
|
|
35
|
+
return "ok"
|
|
36
|
+
if 300 <= status < 400:
|
|
37
|
+
return "redirect"
|
|
38
|
+
if 400 <= status < 500:
|
|
39
|
+
return "client-error"
|
|
40
|
+
if 500 <= status < 600:
|
|
41
|
+
return "server-error"
|
|
42
|
+
return "unknown"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _request(url: str, timeout: float, method: str, insecure: bool) -> tuple[int, str, list[tuple[str, str]], float]:
|
|
46
|
+
"""Issue one request and return ``(status, reason, headers, elapsed_ms)``.
|
|
47
|
+
|
|
48
|
+
Raises :class:`ConnectError` on any transport-level failure.
|
|
49
|
+
"""
|
|
50
|
+
parts = urlsplit(url)
|
|
51
|
+
if parts.scheme not in ("http", "https"):
|
|
52
|
+
raise ConnectError(f"unsupported URL scheme: {parts.scheme!r} (expected http or https)")
|
|
53
|
+
if not parts.hostname:
|
|
54
|
+
raise ConnectError(f"no host in URL: {url!r}")
|
|
55
|
+
|
|
56
|
+
is_https = parts.scheme == "https"
|
|
57
|
+
port = parts.port or (443 if is_https else 80)
|
|
58
|
+
path = parts.path or "/"
|
|
59
|
+
if parts.query:
|
|
60
|
+
path = f"{path}?{parts.query}"
|
|
61
|
+
|
|
62
|
+
if is_https:
|
|
63
|
+
ctx = ssl.create_default_context()
|
|
64
|
+
if insecure:
|
|
65
|
+
ctx.check_hostname = False
|
|
66
|
+
ctx.verify_mode = ssl.CERT_NONE
|
|
67
|
+
conn: http.client.HTTPConnection = http.client.HTTPSConnection(parts.hostname, port, timeout=timeout, context=ctx)
|
|
68
|
+
else:
|
|
69
|
+
conn = http.client.HTTPConnection(parts.hostname, port, timeout=timeout)
|
|
70
|
+
|
|
71
|
+
start = time.monotonic()
|
|
72
|
+
try:
|
|
73
|
+
conn.request(method, path, headers={"User-Agent": _USER_AGENT, "Accept": "*/*"})
|
|
74
|
+
resp = conn.getresponse()
|
|
75
|
+
resp.read() # drain so the socket can be reused/closed cleanly
|
|
76
|
+
elapsed = round((time.monotonic() - start) * 1000, 1)
|
|
77
|
+
return resp.status, resp.reason or "", resp.getheaders(), elapsed
|
|
78
|
+
except (OSError, TimeoutError, ssl.SSLError, http.client.HTTPException, socket.gaierror) as e:
|
|
79
|
+
raise ConnectError(f"request to {url} failed: {e}") from e
|
|
80
|
+
finally:
|
|
81
|
+
conn.close()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def fetch_body(url: str, timeout: float = 15.0, insecure: bool = False, max_bytes: int = 10_000_000, max_redirects: int = 5) -> tuple[int, str]:
|
|
85
|
+
"""GET a URL and return ``(status, body_text)``, following redirects.
|
|
86
|
+
|
|
87
|
+
Used to retrieve documents (e.g. an OpenAPI spec). The body is size-capped so
|
|
88
|
+
a runaway endpoint can't exhaust memory. Raises :class:`ConnectError` on a
|
|
89
|
+
transport failure or when the body exceeds ``max_bytes``.
|
|
90
|
+
"""
|
|
91
|
+
current = url
|
|
92
|
+
seen: set[str] = set()
|
|
93
|
+
for _ in range(max_redirects + 1):
|
|
94
|
+
parts = urlsplit(current)
|
|
95
|
+
if parts.scheme not in ("http", "https"):
|
|
96
|
+
raise ConnectError(f"unsupported URL scheme: {parts.scheme!r} (expected http or https)")
|
|
97
|
+
if not parts.hostname:
|
|
98
|
+
raise ConnectError(f"no host in URL: {current!r}")
|
|
99
|
+
if current in seen:
|
|
100
|
+
raise ConnectError(f"redirect loop while fetching {url}")
|
|
101
|
+
seen.add(current)
|
|
102
|
+
|
|
103
|
+
is_https = parts.scheme == "https"
|
|
104
|
+
port = parts.port or (443 if is_https else 80)
|
|
105
|
+
path = parts.path or "/"
|
|
106
|
+
if parts.query:
|
|
107
|
+
path = f"{path}?{parts.query}"
|
|
108
|
+
|
|
109
|
+
if is_https:
|
|
110
|
+
ctx = ssl.create_default_context()
|
|
111
|
+
if insecure:
|
|
112
|
+
ctx.check_hostname = False
|
|
113
|
+
ctx.verify_mode = ssl.CERT_NONE
|
|
114
|
+
conn: http.client.HTTPConnection = http.client.HTTPSConnection(parts.hostname, port, timeout=timeout, context=ctx)
|
|
115
|
+
else:
|
|
116
|
+
conn = http.client.HTTPConnection(parts.hostname, port, timeout=timeout)
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
conn.request("GET", path, headers={"User-Agent": _USER_AGENT, "Accept": "application/json, application/yaml;q=0.9, */*;q=0.8"})
|
|
120
|
+
resp = conn.getresponse()
|
|
121
|
+
if 300 <= resp.status < 400:
|
|
122
|
+
location = resp.getheader("location")
|
|
123
|
+
resp.read()
|
|
124
|
+
if location:
|
|
125
|
+
current = urljoin(current, location)
|
|
126
|
+
continue
|
|
127
|
+
raw = resp.read(max_bytes + 1)
|
|
128
|
+
if len(raw) > max_bytes:
|
|
129
|
+
raise ConnectError(f"response from {current} exceeds {max_bytes} bytes")
|
|
130
|
+
charset = "utf-8"
|
|
131
|
+
ctype = resp.getheader("content-type") or ""
|
|
132
|
+
if "charset=" in ctype:
|
|
133
|
+
charset = ctype.split("charset=", 1)[1].split(";")[0].strip() or "utf-8"
|
|
134
|
+
return resp.status, raw.decode(charset, errors="replace")
|
|
135
|
+
except (OSError, TimeoutError, ssl.SSLError, http.client.HTTPException, socket.gaierror) as e:
|
|
136
|
+
raise ConnectError(f"request to {current} failed: {e}") from e
|
|
137
|
+
finally:
|
|
138
|
+
conn.close()
|
|
139
|
+
|
|
140
|
+
raise ConnectError(f"too many redirects while fetching {url}")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _header(headers: list[tuple[str, str]], name: str) -> str | None:
|
|
144
|
+
target = name.lower()
|
|
145
|
+
for key, value in headers:
|
|
146
|
+
if key.lower() == target:
|
|
147
|
+
return value
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def probe(url: str, timeout: float = 10.0, method: str = "GET", insecure: bool = False) -> dict:
|
|
152
|
+
"""Single-request probe: status, timing, ``server`` / ``location`` headers."""
|
|
153
|
+
status, reason, headers, elapsed = _request(url, timeout, method, insecure)
|
|
154
|
+
return {
|
|
155
|
+
"url": url,
|
|
156
|
+
"status": status,
|
|
157
|
+
"status_text": reason,
|
|
158
|
+
"time_ms": elapsed,
|
|
159
|
+
"server": _header(headers, "server"),
|
|
160
|
+
"location": _header(headers, "location"),
|
|
161
|
+
"class": classify(status),
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def fetch_headers(url: str, timeout: float = 10.0, method: str = "HEAD", insecure: bool = False) -> dict:
|
|
166
|
+
"""Fetch response headers and report on the security headers (present/missing)."""
|
|
167
|
+
status, reason, headers, elapsed = _request(url, timeout, method, insecure)
|
|
168
|
+
header_map = {key.lower(): value for key, value in headers}
|
|
169
|
+
security: dict[str, str | None] = {}
|
|
170
|
+
missing: list[str] = []
|
|
171
|
+
for raw, friendly in SECURITY_HEADERS.items():
|
|
172
|
+
value = header_map.get(raw)
|
|
173
|
+
security[friendly] = value
|
|
174
|
+
if value is None:
|
|
175
|
+
missing.append(raw)
|
|
176
|
+
return {
|
|
177
|
+
"url": url,
|
|
178
|
+
"status": status,
|
|
179
|
+
"status_text": reason,
|
|
180
|
+
"time_ms": elapsed,
|
|
181
|
+
"class": classify(status),
|
|
182
|
+
"headers": dict(header_map),
|
|
183
|
+
"security": security,
|
|
184
|
+
"missing_security_headers": missing,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def trace_redirects(url: str, timeout: float = 10.0, max_hops: int = 10, insecure: bool = False) -> dict:
|
|
189
|
+
"""Walk the redirect chain, recording each hop until a non-3xx or ``max_hops``."""
|
|
190
|
+
hops: list[dict] = []
|
|
191
|
+
seen: set[str] = set()
|
|
192
|
+
current = url
|
|
193
|
+
final_status = 0
|
|
194
|
+
for _ in range(max_hops + 1):
|
|
195
|
+
if current in seen:
|
|
196
|
+
hops.append({"url": current, "status": None, "status_text": "redirect loop", "location": None, "time_ms": 0.0})
|
|
197
|
+
break
|
|
198
|
+
seen.add(current)
|
|
199
|
+
status, reason, headers, elapsed = _request(current, timeout, "GET", insecure)
|
|
200
|
+
location = _header(headers, "location")
|
|
201
|
+
hops.append(
|
|
202
|
+
{
|
|
203
|
+
"url": current,
|
|
204
|
+
"status": status,
|
|
205
|
+
"status_text": reason,
|
|
206
|
+
"location": location,
|
|
207
|
+
"time_ms": elapsed,
|
|
208
|
+
}
|
|
209
|
+
)
|
|
210
|
+
final_status = status
|
|
211
|
+
if not (300 <= status < 400) or not location:
|
|
212
|
+
break
|
|
213
|
+
current = urljoin(current, location)
|
|
214
|
+
else:
|
|
215
|
+
# Loop exhausted without a terminal response.
|
|
216
|
+
hops[-1]["status_text"] = (hops[-1]["status_text"] or "") + " (max hops reached)"
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
"url": url,
|
|
220
|
+
"hops": hops,
|
|
221
|
+
"final_url": hops[-1]["url"] if hops else url,
|
|
222
|
+
"final_status": final_status,
|
|
223
|
+
"redirect_count": max(len(hops) - 1, 0),
|
|
224
|
+
}
|