pagedigest 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ Metadata-Version: 2.4
2
+ Name: pagedigest
3
+ Version: 0.1.0
4
+ Summary: Minimal pagedigest consumer reference implementation
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://pagedigest.org
7
+ Project-URL: Repository, https://github.com/maxwellsantoro/pagedigest
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: requests>=2.31.0
11
+ Provides-Extra: dev
12
+ Requires-Dist: jsonschema>=4.23.0; extra == "dev"
13
+ Requires-Dist: ruff>=0.8.0; extra == "dev"
14
+
15
+ # Python Consumer (Minimal Reference)
16
+
17
+ ## Install (from repo)
18
+
19
+ ```bash
20
+ cd implementations/python-consumer
21
+ uv sync
22
+ uv pip install -e .
23
+ ```
24
+
25
+ Requires Python ≥3.9. Runtime dependency: `requests`.
26
+
27
+ ## API
28
+
29
+ - `fetch` — fetch and validate manifest; graceful fallback on errors
30
+ - `diff` — compare manifest to cached `site_rev` / per-URL `rev`
31
+ - `audit` — identity-encoding digest check (streams the body with a size cap)
32
+ - `check_site` — `fetch` + `diff` + optional sampled audit plan
33
+ - `verify_live` — fetch a manifest and sample live identity-encoded responses for digest verification
34
+ - `identity_digest` — stream-hash a `stream=True` response body, capped at `max_bytes`, for custom audit pipelines
35
+ - `validate_manifest`, `resolve_url_key`, `manifest_url` — validation and URL helpers
36
+ - `format_state_header`, `parse_state_header` — strict optional `PageDigest-State` helpers
37
+
38
+ ## CLI
39
+
40
+ ```bash
41
+ pagedigest verify-live https://example.com --sample-size 25
42
+ ```
43
+
44
+ `verify-live` exits `2` on digest mismatches so it can act as a deployment gate.
45
+ Redirects, network errors, and body-size caps are reported as inconclusive.
46
+
47
+ ## Example
48
+
49
+ ```python
50
+ from pagedigest import check_site
51
+
52
+ decision = check_site(
53
+ "https://example.com",
54
+ cached_site_rev=12,
55
+ cached_revs={"/": 3, "/about": 1},
56
+ sample_audit_rate=0.01,
57
+ )
58
+ ```
59
+
60
+ After a successful manifest check, an integration may make its observed state
61
+ visible on subsequent page requests:
62
+
63
+ ```python
64
+ from pagedigest import format_state_header
65
+
66
+ headers = {
67
+ "PageDigest-State": format_state_header(
68
+ decision["manifest"]["site_rev"],
69
+ "/.well-known/pagedigest.json",
70
+ )
71
+ }
72
+ ```
73
+
74
+ This is a corroborating observation signal, not authentication. See
75
+ [SPEC.md §5.4](../../SPEC.md#54-optional-cooperation-request-header).
76
+
77
+ Conformance fixtures: `tests/test_vectors.py` exercises `../../test-vectors/`.
78
+
79
+ ## Persistent cache example
80
+
81
+ ```bash
82
+ uv run python examples/cache_persistence.py https://example.com ./pagedigest-cache.json
83
+ ```
84
+
85
+ The example stores `site_rev`, per-URL `rev`, `ETag`, and `Last-Modified`
86
+ between runs. It prints page fetch decisions so crawler/indexer integrations can
87
+ replace the `print` calls with their own fetch pipeline.
@@ -0,0 +1,73 @@
1
+ # Python Consumer (Minimal Reference)
2
+
3
+ ## Install (from repo)
4
+
5
+ ```bash
6
+ cd implementations/python-consumer
7
+ uv sync
8
+ uv pip install -e .
9
+ ```
10
+
11
+ Requires Python ≥3.9. Runtime dependency: `requests`.
12
+
13
+ ## API
14
+
15
+ - `fetch` — fetch and validate manifest; graceful fallback on errors
16
+ - `diff` — compare manifest to cached `site_rev` / per-URL `rev`
17
+ - `audit` — identity-encoding digest check (streams the body with a size cap)
18
+ - `check_site` — `fetch` + `diff` + optional sampled audit plan
19
+ - `verify_live` — fetch a manifest and sample live identity-encoded responses for digest verification
20
+ - `identity_digest` — stream-hash a `stream=True` response body, capped at `max_bytes`, for custom audit pipelines
21
+ - `validate_manifest`, `resolve_url_key`, `manifest_url` — validation and URL helpers
22
+ - `format_state_header`, `parse_state_header` — strict optional `PageDigest-State` helpers
23
+
24
+ ## CLI
25
+
26
+ ```bash
27
+ pagedigest verify-live https://example.com --sample-size 25
28
+ ```
29
+
30
+ `verify-live` exits `2` on digest mismatches so it can act as a deployment gate.
31
+ Redirects, network errors, and body-size caps are reported as inconclusive.
32
+
33
+ ## Example
34
+
35
+ ```python
36
+ from pagedigest import check_site
37
+
38
+ decision = check_site(
39
+ "https://example.com",
40
+ cached_site_rev=12,
41
+ cached_revs={"/": 3, "/about": 1},
42
+ sample_audit_rate=0.01,
43
+ )
44
+ ```
45
+
46
+ After a successful manifest check, an integration may make its observed state
47
+ visible on subsequent page requests:
48
+
49
+ ```python
50
+ from pagedigest import format_state_header
51
+
52
+ headers = {
53
+ "PageDigest-State": format_state_header(
54
+ decision["manifest"]["site_rev"],
55
+ "/.well-known/pagedigest.json",
56
+ )
57
+ }
58
+ ```
59
+
60
+ This is a corroborating observation signal, not authentication. See
61
+ [SPEC.md §5.4](../../SPEC.md#54-optional-cooperation-request-header).
62
+
63
+ Conformance fixtures: `tests/test_vectors.py` exercises `../../test-vectors/`.
64
+
65
+ ## Persistent cache example
66
+
67
+ ```bash
68
+ uv run python examples/cache_persistence.py https://example.com ./pagedigest-cache.json
69
+ ```
70
+
71
+ The example stores `site_rev`, per-URL `rev`, `ETag`, and `Last-Modified`
72
+ between runs. It prints page fetch decisions so crawler/indexer integrations can
73
+ replace the `print` calls with their own fetch pipeline.
@@ -0,0 +1,31 @@
1
+ from .core import (
2
+ audit,
3
+ check_site,
4
+ diff,
5
+ fetch,
6
+ fetch_manifest_url,
7
+ format_state_header,
8
+ identity_digest,
9
+ live_manifest_url,
10
+ manifest_url,
11
+ parse_state_header,
12
+ resolve_url_key,
13
+ validate_manifest,
14
+ verify_live,
15
+ )
16
+
17
+ __all__ = [
18
+ "fetch",
19
+ "fetch_manifest_url",
20
+ "format_state_header",
21
+ "diff",
22
+ "audit",
23
+ "check_site",
24
+ "identity_digest",
25
+ "live_manifest_url",
26
+ "manifest_url",
27
+ "parse_state_header",
28
+ "resolve_url_key",
29
+ "validate_manifest",
30
+ "verify_live",
31
+ ]
@@ -0,0 +1,113 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import sys
5
+ from collections.abc import Sequence
6
+ from typing import TextIO
7
+
8
+ from .core import DEFAULT_MAX_AUDIT_BYTES, DEFAULT_MAX_MANIFEST_BYTES, verify_live
9
+
10
+
11
+ def _add_verify_live_args(parser: argparse.ArgumentParser) -> None:
12
+ parser.add_argument("base_url", help="Site base URL, e.g. https://example.com")
13
+ parser.add_argument(
14
+ "--manifest-url",
15
+ help="Override manifest URL (defaults to /.well-known/pagedigest.json)",
16
+ )
17
+ parser.add_argument(
18
+ "--sample-size",
19
+ type=int,
20
+ default=25,
21
+ help="Number of digest entries to sample",
22
+ )
23
+ parser.add_argument(
24
+ "--seed",
25
+ type=int,
26
+ default=42,
27
+ help="Random seed for deterministic sampling",
28
+ )
29
+ parser.add_argument("--timeout", type=int, default=15, help="HTTP timeout seconds")
30
+ parser.add_argument(
31
+ "--max-bytes",
32
+ type=int,
33
+ default=DEFAULT_MAX_AUDIT_BYTES,
34
+ help="Abort identity fetches larger than this many bytes",
35
+ )
36
+ parser.add_argument(
37
+ "--manifest-max-bytes",
38
+ type=int,
39
+ default=DEFAULT_MAX_MANIFEST_BYTES,
40
+ help="Abort manifest fetches larger than this many bytes",
41
+ )
42
+
43
+
44
+ def _run_verify_live(args: argparse.Namespace, stdout: TextIO, stderr: TextIO) -> int:
45
+ if args.sample_size < 0:
46
+ print("sample-size must be non-negative", file=stderr)
47
+ return 1
48
+ if args.timeout <= 0:
49
+ print("timeout must be positive", file=stderr)
50
+ return 1
51
+ if args.max_bytes <= 0:
52
+ print("max-bytes must be positive", file=stderr)
53
+ return 1
54
+ if args.manifest_max_bytes <= 0:
55
+ print("manifest-max-bytes must be positive", file=stderr)
56
+ return 1
57
+
58
+ result = verify_live(
59
+ args.base_url,
60
+ manifest_url_override=args.manifest_url,
61
+ sample_size=args.sample_size,
62
+ seed=args.seed,
63
+ timeout=args.timeout,
64
+ max_bytes=args.max_bytes,
65
+ manifest_max_bytes=args.manifest_max_bytes,
66
+ )
67
+
68
+ print(f"manifest: {result.get('manifest_url')}", file=stdout)
69
+ if not result.get("ok"):
70
+ error = result.get("error") or "unknown-error"
71
+ status_code = result.get("status_code")
72
+ suffix = f":{status_code}" if status_code is not None else ""
73
+ print(f"error: {error}{suffix}", file=stderr)
74
+ return 1
75
+
76
+ print(f"sampled: {result['sampled']}", file=stdout)
77
+ print(f"match: {result['match_count']}", file=stdout)
78
+ print(f"mismatch: {result['mismatch_count']}", file=stdout)
79
+ print(f"inconclusive: {result['inconclusive_count']}", file=stdout)
80
+
81
+ for item in result["results"]:
82
+ if item["status"] != "match":
83
+ print(f"- {item['status']}: {item['url']} ({item['detail']})", file=stdout)
84
+
85
+ if result["mismatch_count"] > 0:
86
+ return 2
87
+ return 0
88
+
89
+
90
+ def verify_live_main(
91
+ argv: Sequence[str] | None = None, stdout: TextIO | None = None, stderr: TextIO | None = None
92
+ ) -> int:
93
+ parser = argparse.ArgumentParser(description="Verify pagedigest digest values over the wire")
94
+ _add_verify_live_args(parser)
95
+ return _run_verify_live(parser.parse_args(argv), stdout or sys.stdout, stderr or sys.stderr)
96
+
97
+
98
+ def main(argv: Sequence[str] | None = None, stdout: TextIO | None = None, stderr: TextIO | None = None) -> int:
99
+ parser = argparse.ArgumentParser(prog="pagedigest")
100
+ subcommands = parser.add_subparsers(dest="command")
101
+ verify_parser = subcommands.add_parser("verify-live", help="Verify manifest digests against live responses")
102
+ _add_verify_live_args(verify_parser)
103
+
104
+ args = parser.parse_args(argv)
105
+ if args.command == "verify-live":
106
+ return _run_verify_live(args, stdout or sys.stdout, stderr or sys.stderr)
107
+
108
+ parser.print_help(stderr or sys.stderr)
109
+ return 1
110
+
111
+
112
+ if __name__ == "__main__":
113
+ raise SystemExit(main())