@ictechgy/context-guard 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.ko.md +59 -31
  3. package/README.md +85 -36
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  8. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  9. package/docs/benchmark-workflow-examples.md +3 -0
  10. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  11. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  12. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  13. package/docs/distribution.md +10 -7
  14. package/docs/experimental-benchmark-fixtures.md +30 -6
  15. package/package.json +4 -6
  16. package/packaging/homebrew/context-guard.rb.template +1 -1
  17. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  18. package/plugins/context-guard/README.ko.md +20 -14
  19. package/plugins/context-guard/README.md +26 -17
  20. package/plugins/context-guard/bin/context-guard +147 -25
  21. package/plugins/context-guard/bin/context-guard-artifact +884 -79
  22. package/plugins/context-guard/bin/context-guard-audit +33 -2
  23. package/plugins/context-guard/bin/context-guard-bench +1542 -31
  24. package/plugins/context-guard/bin/context-guard-cache-score +665 -0
  25. package/plugins/context-guard/bin/context-guard-compress +146 -1
  26. package/plugins/context-guard/bin/context-guard-cost +790 -6
  27. package/plugins/context-guard/bin/context-guard-experiments +463 -26
  28. package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
  29. package/plugins/context-guard/bin/context-guard-filter +163 -7
  30. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  31. package/plugins/context-guard/bin/context-guard-pack +892 -49
  32. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  33. package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
  34. package/plugins/context-guard/bin/context-guard-setup +165 -31
  35. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  36. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  37. package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
  38. package/plugins/context-guard/bin/context-guard-trim-output +288 -41
  39. package/plugins/context-guard/brief/README.md +5 -5
  40. package/plugins/context-guard/lib/context_guard_commands.py +230 -0
  41. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  42. package/context-guard-kit/README.md +0 -91
  43. package/context-guard-kit/benchmark_runner.py +0 -2401
  44. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  45. package/context-guard-kit/context_compress.py +0 -695
  46. package/context-guard-kit/context_escrow.py +0 -935
  47. package/context-guard-kit/context_filter.py +0 -637
  48. package/context-guard-kit/context_guard_cli.py +0 -325
  49. package/context-guard-kit/context_guard_diet.py +0 -1711
  50. package/context-guard-kit/context_pack.py +0 -2713
  51. package/context-guard-kit/cost_guard.py +0 -2349
  52. package/context-guard-kit/experimental_registry.py +0 -4348
  53. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  54. package/context-guard-kit/guard_large_read.py +0 -690
  55. package/context-guard-kit/hook_secret_patterns.py +0 -43
  56. package/context-guard-kit/read_symbol.py +0 -483
  57. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  58. package/context-guard-kit/sanitize_output.py +0 -725
  59. package/context-guard-kit/settings.example.json +0 -67
  60. package/context-guard-kit/setup_wizard.py +0 -2515
  61. package/context-guard-kit/statusline.sh +0 -362
  62. package/context-guard-kit/statusline_merged.sh +0 -157
  63. package/context-guard-kit/tool_schema_pruner.py +0 -837
  64. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -1,4348 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Default-off ContextGuard experimental feature registry.
3
-
4
- The registry is intentionally passive: it records explicit project-local opt-in
5
- state for experimental lanes, but it does not activate runtime behavior by
6
- itself. Individual helpers must still require their own explicit experimental
7
- flags before changing stable behavior.
8
- """
9
- from __future__ import annotations
10
-
11
- import argparse
12
- from dataclasses import asdict, dataclass
13
- from datetime import datetime, timezone
14
- import http.client
15
- from http.server import BaseHTTPRequestHandler, HTTPServer
16
- import hashlib
17
- import ipaddress
18
- import json
19
- import math
20
- import os
21
- import re
22
- import secrets
23
- import shlex
24
- import socket
25
- from socketserver import TCPServer
26
- from pathlib import Path
27
- import stat
28
- import sys
29
- from typing import Any, NoReturn
30
- import unicodedata
31
- from urllib.parse import urlparse
32
-
33
- TOOL_NAME = "context-guard-experiments"
34
- CONFIG_SCHEMA_VERSION = "contextguard.experiments.v1"
35
- DEFAULT_CONFIG = Path(".context-guard") / "experiments.json"
36
- MAX_CONFIG_BYTES = 64_000
37
- MAX_CONTEXT_DIFF_INPUT_BYTES = 256_000
38
- MAX_CONTEXT_DIFF_REPLACEMENT_BYTES = 128_000
39
- MAX_CONTEXT_DIFF_ARTIFACT_METADATA_BYTES = 64_000
40
- DEFAULT_CONTEXT_DIFF_ARTIFACT_DIR = Path(".context-guard") / "artifacts"
41
- LEGACY_CONTEXT_DIFF_ARTIFACT_DIR = Path(".claude-token-optimizer") / "artifacts"
42
- MAX_VISUAL_OCR_TEXT_BYTES = 64_000
43
- MAX_LEARNED_COMPRESSION_INPUT_BYTES = 128_000
44
- MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES = 64_000
45
- MAX_LEARNED_COMPRESSION_ARTIFACT_METADATA_BYTES = 64_000
46
- MAX_SELF_HOSTED_METRICS_INPUT_BYTES = 64_000
47
- SELF_HOSTED_METRICS_SCHEMA_VERSION = "contextguard.bench.self-hosted-metrics.v1"
48
- SELF_HOSTED_METRICS_KEY = "self_hosted_metrics"
49
- SELF_HOSTED_METRICS_CLAIM_BOUNDARY = "self_hosted_metrics_only_not_hosted_api_token_or_cost_savings"
50
- BENCH_RUN_EVIDENCE_SCHEMA_VERSION = "contextguard.bench.run-evidence.v1"
51
- MAX_SELF_HOSTED_LABEL_CHARS = 120
52
- MAX_SELF_HOSTED_LATENCY_MS = 7 * 24 * 60 * 60 * 1000
53
- MAX_SELF_HOSTED_MEMORY_MB = 10_000_000
54
- MAX_SELF_HOSTED_ENERGY_WH = 1_000_000
55
- MAX_SELF_HOSTED_LOCAL_COST_USD = 1_000_000
56
- MAX_SELF_HOSTED_TOKENS_PER_SECOND = 10_000_000
57
- TOKEN_PROXY_BYTES_PER_TOKEN = 4
58
- MAX_SELF_HOSTED_JSON_DEPTH = 100
59
- MAX_SELF_HOSTED_JSON_NODES = 10_000
60
- LOCAL_PROXY_SCHEMA_VERSION = "contextguard.experiments.local-proxy-plan.v1"
61
- LOCAL_PROXY_GATE_SCHEMA_VERSION = "contextguard.experiments.local-proxy-gate.v1"
62
- LOCAL_PROXY_FORWARD_SCHEMA_VERSION = "contextguard.experiments.local-proxy-forward.v1"
63
- LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION = "contextguard.experiments.local-proxy-forward-diagnostic.v1"
64
- LOCAL_PROXY_READY_SCHEMA_VERSION = "contextguard.experiments.local-proxy-ready.v1"
65
- LOCAL_PROXY_EXTERNAL_DESIGN_SCHEMA_VERSION = "contextguard.experiments.local-proxy-external-forwarding-design.v1"
66
- LOCAL_PROXY_DEFAULT_BIND_HOST = "127.0.0.1"
67
- LOCAL_PROXY_DEFAULT_BIND_PORT = 0
68
- LOCAL_PROXY_DEFAULT_TARGET_HOST = "127.0.0.1"
69
- LOCAL_PROXY_DEFAULT_TARGET_PORT = 0
70
- LOCAL_PROXY_LOCALHOST_NAMES = {"localhost"}
71
- LOCAL_PROXY_TRUE_VALUES = {"1", "on", "true", "yes", "y"}
72
- LOCAL_PROXY_FALSE_VALUES = {"", "0", "false", "n", "no", "off"}
73
- LOCAL_PROXY_DEFAULT_MAX_REQUEST_BYTES = 64 * 1024
74
- LOCAL_PROXY_DEFAULT_MAX_RESPONSE_BYTES = 256 * 1024
75
- LOCAL_PROXY_MAX_FORWARD_BYTES = 2 * 1024 * 1024
76
- LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS = 5.0
77
- LOCAL_PROXY_MAX_TIMEOUT_SECONDS = 30.0
78
- LOCAL_PROXY_EXTERNAL_ALLOWED_SCHEMES = {"https"}
79
- LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY = "strip-sensitive-headers"
80
- LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY = "diagnostic-only-provider-measured-required"
81
- LOCAL_PROXY_SENSITIVE_HEADER_NAMES = {
82
- "authorization",
83
- "proxy-authorization",
84
- "x-api-key",
85
- "api-key",
86
- "x-anthropic-api-key",
87
- "x-openai-api-key",
88
- "openai-api-key",
89
- "cookie",
90
- "set-cookie",
91
- }
92
- LOCAL_PROXY_HOP_BY_HOP_HEADERS = {
93
- "connection",
94
- "keep-alive",
95
- "proxy-authenticate",
96
- "proxy-authorization",
97
- "te",
98
- "trailer",
99
- "transfer-encoding",
100
- "upgrade",
101
- }
102
- ALLOWED_FIRST_COMPONENT_SYMLINKS = {
103
- "tmp": Path("/private/tmp"),
104
- "var": Path("/private/var"),
105
- }
106
- DIR_FD_OPEN_SUPPORTED = os.open in getattr(os, "supports_dir_fd", set())
107
- DIR_FD_MKDIR_SUPPORTED = os.mkdir in getattr(os, "supports_dir_fd", set())
108
- DIR_FD_STAT_NOFOLLOW_SUPPORTED = (
109
- os.stat in getattr(os, "supports_dir_fd", set())
110
- and os.stat in getattr(os, "supports_follow_symlinks", set())
111
- )
112
- NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
113
-
114
-
115
- @dataclass(frozen=True)
116
- class Experiment:
117
- id: str
118
- name: str
119
- summary: str
120
- stability: str
121
- default_enabled: bool
122
- risk_level: str
123
- claim_boundary: str
124
- gate_requirements: tuple[str, ...]
125
- runtime_status: str = "metadata-only"
126
- commands: tuple[str, ...] = ()
127
- opt_in_flags: tuple[str, ...] = ()
128
- config_effect: str = (
129
- "Registry enablement records project-local intent only; helpers still require explicit experimental flags."
130
- )
131
- evidence_contract: str = "Evidence is local metadata only unless a later story adds a measured runtime gate."
132
-
133
- def to_json(self, *, enabled: bool = False) -> dict[str, Any]:
134
- data = asdict(self)
135
- for key in ("gate_requirements", "commands", "opt_in_flags"):
136
- data[key] = list(getattr(self, key))
137
- data["enabled"] = bool(enabled)
138
- return data
139
-
140
-
141
- EXPERIMENTS: tuple[Experiment, ...] = (
142
- Experiment(
143
- id="output-receipt-trim",
144
- name="Receipt-backed output trimming",
145
- summary="Opt-in digest output with local artifact receipts and exact re-expand instructions.",
146
- stability="experimental",
147
- default_enabled=False,
148
- risk_level="low",
149
- claim_boundary="Local output-size reduction only; no hosted API token/cost savings claim without provider-measured matched tasks.",
150
- gate_requirements=("explicit opt-in", "local artifact receipt", "exact re-expand command"),
151
- runtime_status="available-explicit-flags",
152
- commands=(
153
- "context-guard-trim-output --digest markdown --artifact-receipt -- <command>",
154
- "context-guard-trim-output --digest json --artifact-receipt -- <command>",
155
- ),
156
- opt_in_flags=("--digest markdown|json", "--artifact-receipt"),
157
- config_effect=(
158
- "Registry enablement records project-local intent only; output trimming still runs only when the helper is "
159
- "invoked with --digest markdown|json plus --artifact-receipt."
160
- ),
161
- evidence_contract=(
162
- "Stores the exact sanitized full output as a local context-guard-artifact receipt and emits an exact "
163
- "re-expand command before omitted details are relied on."
164
- ),
165
- ),
166
- Experiment(
167
- id="protected-zone-policy",
168
- name="Protected-zone transform policy",
169
- summary="Metadata policy that denies semantic rewrites for code, diffs, identifiers, hashes, paths, and other exact evidence.",
170
- stability="experimental",
171
- default_enabled=False,
172
- risk_level="low",
173
- claim_boundary="Policy metadata only; it does not prove provider cache or token savings.",
174
- gate_requirements=("explicit opt-in", "protected-zone detection", "exact retrieval fallback"),
175
- runtime_status="available-explicit-flags",
176
- commands=(
177
- "context-guard-compress --json --protected-policy",
178
- "context-guard cost compile --json",
179
- "context-guard-cost compile --json",
180
- ),
181
- opt_in_flags=("--protected-policy", "protected=true manifest sections for cost compile"),
182
- config_effect=(
183
- "Registry enablement records project-local intent only; protected-zone policy metadata still appears only "
184
- "when explicit helper flags or protected manifest sections are used."
185
- ),
186
- evidence_contract=(
187
- "Denies semantic/paraphrase rewrites for protected classes and requires structural transforms plus exact "
188
- "artifact retrieval guidance for protected evidence."
189
- ),
190
- ),
191
- Experiment(
192
- id="context-diff-compaction",
193
- name="Reviewable context-diff compaction",
194
- summary="Explicit receipt-backed runtime for caller-supplied compact diff replacements with stable exact handles.",
195
- stability="experimental",
196
- default_enabled=False,
197
- risk_level="medium",
198
- claim_boundary="Smaller local diffs are proxy evidence only; hosted savings require provider-measured matched tasks.",
199
- gate_requirements=("explicit opt-in", "human-reviewable diff", "local receipt", "exact re-expand handle"),
200
- runtime_status="available-explicit-runtime",
201
- commands=(
202
- "context-guard experiments plan context-diff-compaction",
203
- "context-guard experiments emit context-diff-compaction --receipt-id <id> --reexpand-command <cmd>",
204
- ),
205
- opt_in_flags=(
206
- "plan context-diff-compaction",
207
- "emit context-diff-compaction",
208
- "--receipt-id",
209
- "--reexpand-command",
210
- "--replacement-text|--replacement-file",
211
- ),
212
- config_effect=(
213
- "Registry enablement records project-local intent only; context-diff replacement emits only through the "
214
- "explicit emit command with exact retrieval metadata and caller-supplied compact text."
215
- ),
216
- evidence_contract=(
217
- "Emitted replacements require human-reviewable hunks, caller-supplied compact text, and exact local "
218
- "artifact content that matches the input diff plus re-expand metadata; smaller local diffs remain proxy "
219
- "evidence only."
220
- ),
221
- ),
222
- Experiment(
223
- id="visual-crop-ocr",
224
- name="Visual crop/OCR evidence pack",
225
- summary="Explicit local runtime for caller-supplied visual crop/OCR evidence packs.",
226
- stability="experimental",
227
- default_enabled=False,
228
- risk_level="medium",
229
- claim_boundary="Image/OCR byte reductions are proxy evidence until provider image/text token fields are measured.",
230
- gate_requirements=("explicit opt-in", "original evidence preserved", "confidence/error notes", "missed-context guardrail"),
231
- runtime_status="available-explicit-runtime",
232
- commands=(
233
- "context-guard experiments plan visual-crop-ocr",
234
- "context-guard experiments emit visual-crop-ocr",
235
- ),
236
- opt_in_flags=(
237
- "plan visual-crop-ocr",
238
- "emit visual-crop-ocr",
239
- "--full-evidence-receipt",
240
- "--crop-bounds",
241
- "--image-size",
242
- "--ocr-text|--ocr-text-file",
243
- "--ocr-confidence",
244
- "--ocr-error-note",
245
- "--missed-context-note",
246
- ),
247
- config_effect=(
248
- "Registry enablement records project-local intent only; visual crop/OCR evidence packs emit only through "
249
- "the explicit emit command and do not run OCR, crop images, call providers, write files, or change stable behavior."
250
- ),
251
- evidence_contract=(
252
- "Emitted evidence packs require the full visual evidence receipt plus caller-supplied crop/OCR evidence, "
253
- "OCR confidence/error notes when OCR is present, and missed-context guardrails before human review."
254
- ),
255
- ),
256
- Experiment(
257
- id="learned-compression",
258
- name="Learned/synthetic compression candidate gate",
259
- summary="Explicit local runtime for caller-supplied compact prose candidates with verified exact fallback.",
260
- stability="experimental",
261
- default_enabled=False,
262
- risk_level="high",
263
- claim_boundary="Semantic compression cannot claim savings or correctness without matched-task quality and provider token evidence.",
264
- gate_requirements=("explicit opt-in", "sanitized unprotected prose only", "protected-zone denial", "exact fallback or receipt"),
265
- runtime_status="available-explicit-runtime",
266
- commands=(
267
- "context-guard experiments plan learned-compression",
268
- "context-guard experiments emit learned-compression --exact-fallback-receipt <id> --reexpand-command <cmd>",
269
- ),
270
- opt_in_flags=(
271
- "plan learned-compression",
272
- "emit learned-compression",
273
- "--sanitized",
274
- "--trusted-source",
275
- "--exact-fallback-receipt",
276
- "--reexpand-command",
277
- "--replacement-text|--replacement-file",
278
- ),
279
- config_effect=(
280
- "Registry enablement records project-local intent only; learned-compression candidates emit only through "
281
- "the explicit emit command and do not run learned compressors, embeddings, rerankers, model calls, subprocesses, or external services."
282
- ),
283
- evidence_contract=(
284
- "Emitted candidates require caller-asserted sanitized trusted prose, verified exact local fallback content, "
285
- "a smaller caller-supplied prose candidate, and denial of protected or prompt-like signals."
286
- ),
287
- ),
288
- Experiment(
289
- id="self-hosted-metrics-ledger",
290
- name="Self-hosted metrics ledger",
291
- summary="Explicit local ledger runtime for self-hosted/local metrics sidecars kept separate from hosted API claims.",
292
- stability="experimental",
293
- default_enabled=False,
294
- risk_level="low",
295
- claim_boundary="Self-hosted memory/latency metrics must stay separate from hosted API token/cost claims.",
296
- gate_requirements=("explicit opt-in", "separate ledger fields", "shifted-cost accounting"),
297
- runtime_status="available-explicit-runtime",
298
- commands=(
299
- "context-guard experiments plan self-hosted-metrics-ledger",
300
- "context-guard experiments record self-hosted-metrics-ledger --ledger-jsonl <path>",
301
- ),
302
- opt_in_flags=(
303
- "plan self-hosted-metrics-ledger",
304
- "record self-hosted-metrics-ledger",
305
- "--ledger-jsonl",
306
- "--input",
307
- "--latency-ms",
308
- "--peak-memory-mb",
309
- "--quality-score",
310
- "--energy-wh",
311
- "--local-cost-usd",
312
- "--tokens-per-second",
313
- "--model-server",
314
- "--optimization",
315
- ),
316
- config_effect=(
317
- "Registry enablement records project-local intent only; self-hosted metrics still write a ledger only "
318
- "when the explicit record command is invoked with --ledger-jsonl."
319
- ),
320
- evidence_contract=(
321
- "The explicit record command writes context-guard-bench JSONL ledger sidecars; self-hosted metrics "
322
- "remain separate from hosted API token/cost savings."
323
- ),
324
- ),
325
- Experiment(
326
- id="local-proxy",
327
- name="Local proxy runtime gate",
328
- summary="Explicit local gate-record runtime for localhost-only proxy experiments with no hidden forwarding or API-key persistence.",
329
- stability="experimental",
330
- default_enabled=False,
331
- risk_level="high",
332
- claim_boundary="Proxy metrics are diagnostic only; no hosted savings claim without provider-measured evidence.",
333
- gate_requirements=("explicit opt-in", "localhost-only default", "no API-key persistence", "no hidden external forwarding"),
334
- runtime_status="available-explicit-runtime",
335
- commands=(
336
- "context-guard experiments plan local-proxy",
337
- "context-guard experiments plan local-proxy-external-forwarding",
338
- "context-guard experiments record local-proxy-runtime-gate --ledger-jsonl <path>",
339
- "context-guard experiments serve local-proxy --bind-host 127.0.0.1 --bind-port <port> --target-host 127.0.0.1 --target-port <port> --runtime-gate-ack --forwarding-gate-ack --once",
340
- "context-guard experiments serve local-proxy --diagnostic-ledger-jsonl <path> ...",
341
- ),
342
- opt_in_flags=(
343
- "plan local-proxy",
344
- "plan local-proxy-external-forwarding",
345
- "record local-proxy-runtime-gate",
346
- "serve local-proxy",
347
- "--bind-host",
348
- "--bind-port",
349
- "--target-host",
350
- "--target-port",
351
- "--upstream-url",
352
- "--ledger-jsonl",
353
- "--runtime-gate-ack",
354
- "--forwarding-gate-ack",
355
- "--once",
356
- "--max-request-bytes",
357
- "--max-response-bytes",
358
- "--diagnostic-ledger-jsonl",
359
- "--external-forwarding-intent",
360
- "--external-forwarding-design-ack",
361
- "--allow-host",
362
- "--allow-scheme",
363
- "--threat-model-note",
364
- "--credential-redaction-policy",
365
- "--provider-evidence-boundary",
366
- "--persist-api-key",
367
- ),
368
- config_effect=(
369
- "Registry enablement records project-local intent only; local proxy record/serve runtimes run only through "
370
- "explicit commands. Serve binds and forwards only literal loopback addresses, blocks credential material, "
371
- "and never persists API keys or calls non-local services; external-forwarding planning is design-only."
372
- ),
373
- evidence_contract=(
374
- "Gate rows require localhost-only bind/target metadata and explicit runtime gate acknowledgement. Serve "
375
- "evidence requires loopback-only bind/target IPs, explicit forwarding acknowledgement, no credential "
376
- "forwarding or persistence, bounded bytes/timeouts, and optional diagnostic ledger rows that remain "
377
- "shifted-cost evidence only. External-forwarding design plans require threat model notes, explicit "
378
- "allowlists, credential redaction policy, and provider-evidence boundaries before any future runtime."
379
- ),
380
- ),
381
- )
382
-
383
- REGISTRY = {experiment.id: experiment for experiment in EXPERIMENTS}
384
-
385
-
386
- class RegistryError(RuntimeError):
387
- pass
388
-
389
-
390
- def fail(message: str, code: int = 2) -> NoReturn:
391
- print(f"{TOOL_NAME}: {message}", file=sys.stderr)
392
- raise SystemExit(code)
393
-
394
-
395
- def os_error_detail(exc: OSError) -> str:
396
- detail = exc.strerror or exc.__class__.__name__
397
- if exc.errno is not None:
398
- return f"{detail} (errno {exc.errno})"
399
- return detail
400
-
401
-
402
- def _no_follow_flag(*, label: str) -> int:
403
- if not NO_FOLLOW_SUPPORTED:
404
- raise RegistryError(f"{label} requires O_NOFOLLOW support")
405
- return os.O_NOFOLLOW
406
-
407
-
408
- def _directory_open_flags(*, follow_final: bool = False, label: str) -> int:
409
- flags = os.O_RDONLY
410
- if hasattr(os, "O_CLOEXEC"):
411
- flags |= os.O_CLOEXEC
412
- if hasattr(os, "O_DIRECTORY"):
413
- flags |= os.O_DIRECTORY
414
- if not follow_final:
415
- flags |= _no_follow_flag(label=label)
416
- return flags
417
-
418
-
419
- def _file_open_flags(*, label: str, write: bool = False) -> int:
420
- flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC if write else os.O_RDONLY
421
- flags |= _no_follow_flag(label=label)
422
- if hasattr(os, "O_CLOEXEC"):
423
- flags |= os.O_CLOEXEC
424
- if hasattr(os, "O_NONBLOCK"):
425
- flags |= os.O_NONBLOCK
426
- if hasattr(os, "O_NOCTTY"):
427
- flags |= os.O_NOCTTY
428
- return flags
429
-
430
-
431
- def _temp_file_open_flags(*, label: str) -> int:
432
- flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
433
- flags |= _no_follow_flag(label=label)
434
- if hasattr(os, "O_CLOEXEC"):
435
- flags |= os.O_CLOEXEC
436
- if hasattr(os, "O_NOCTTY"):
437
- flags |= os.O_NOCTTY
438
- return flags
439
-
440
-
441
- def _append_file_open_flags(*, label: str) -> int:
442
- flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
443
- flags |= _no_follow_flag(label=label)
444
- if hasattr(os, "O_CLOEXEC"):
445
- flags |= os.O_CLOEXEC
446
- if hasattr(os, "O_NONBLOCK"):
447
- flags |= os.O_NONBLOCK
448
- if hasattr(os, "O_NOCTTY"):
449
- flags |= os.O_NOCTTY
450
- return flags
451
-
452
-
453
- def _leaf_name(path: Path, *, label: str) -> str:
454
- name = path.name
455
- if name in {"", ".", ".."}:
456
- raise RegistryError(f"{label} must name a regular file")
457
- return name
458
-
459
-
460
- def _normalized_link_target(anchor: Path, raw_target: str) -> Path:
461
- target = Path(raw_target)
462
- if target.is_absolute():
463
- return Path(os.path.normpath(str(target)))
464
- return Path(os.path.normpath(str(anchor / target)))
465
-
466
-
467
- def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
468
- if not path.is_absolute():
469
- return path
470
- parts = path.parts
471
- if len(parts) < 2:
472
- return path
473
- first = parts[1]
474
- expected = ALLOWED_FIRST_COMPONENT_SYMLINKS.get(first)
475
- if expected is None:
476
- return path
477
- link = Path(path.anchor) / first
478
- try:
479
- if link.is_symlink() and _normalized_link_target(Path(path.anchor), os.readlink(link)) == expected:
480
- return expected.joinpath(*parts[2:])
481
- except OSError:
482
- return path
483
- return path
484
-
485
-
486
- def normalize_local_path(path: Path) -> Path:
487
- path = path.expanduser()
488
- if not path.is_absolute():
489
- path = Path.cwd() / path
490
- return normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(path))))
491
-
492
-
493
- def normalize_project_path(root: Path, candidate: Path, *, label: str) -> Path:
494
- candidate = candidate.expanduser()
495
- if not candidate.is_absolute():
496
- candidate = root / candidate
497
- normalized = normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(candidate))))
498
- try:
499
- normalized.relative_to(root)
500
- except ValueError as exc:
501
- raise RegistryError(f"{label} must stay inside project root: {normalized}") from exc
502
- return normalized
503
-
504
-
505
- def open_directory_no_follow(path: Path, *, label: str, create: bool = False, missing_ok: bool = False) -> int | None:
506
- path = normalize_allowed_first_absolute_symlink(path)
507
- if not DIR_FD_OPEN_SUPPORTED:
508
- raise RegistryError(f"{label} requires dir_fd open support")
509
- if create and not DIR_FD_MKDIR_SUPPORTED:
510
- raise RegistryError(f"{label} requires dir_fd mkdir support")
511
- flags = _directory_open_flags(label=label)
512
- if path.is_absolute():
513
- anchor = path.anchor or os.sep
514
- parts = path.parts[1:]
515
- try:
516
- current_fd = os.open(anchor, _directory_open_flags(follow_final=True, label=label))
517
- except OSError as exc:
518
- raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
519
- else:
520
- parts = path.parts
521
- try:
522
- current_fd = os.open(".", flags)
523
- except OSError as exc:
524
- raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
525
- try:
526
- for part in parts:
527
- if part in {"", "."}:
528
- continue
529
- if part == "..":
530
- raise RegistryError(f"{label} must not contain parent traversal")
531
- next_fd = -1
532
- try:
533
- next_fd = os.open(part, flags, dir_fd=current_fd)
534
- except FileNotFoundError:
535
- if missing_ok:
536
- os.close(current_fd)
537
- current_fd = -1
538
- return None
539
- if not create:
540
- raise RegistryError(f"could not inspect {label}: missing directory component") from None
541
- try:
542
- os.mkdir(part, mode=0o755, dir_fd=current_fd)
543
- except FileExistsError:
544
- pass
545
- except OSError as exc:
546
- raise RegistryError(f"could not create {label}: {os_error_detail(exc)}") from exc
547
- try:
548
- next_fd = os.open(part, flags, dir_fd=current_fd)
549
- except OSError as exc:
550
- raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
551
- except OSError as exc:
552
- raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
553
- try:
554
- if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
555
- raise RegistryError(f"{label} must not traverse non-directory components")
556
- except Exception:
557
- if next_fd >= 0:
558
- try:
559
- os.close(next_fd)
560
- except OSError:
561
- pass
562
- raise
563
- try:
564
- os.close(current_fd)
565
- except OSError:
566
- pass
567
- current_fd = next_fd
568
- owned_fd = current_fd
569
- current_fd = -1
570
- return owned_fd
571
- finally:
572
- if current_fd >= 0:
573
- try:
574
- os.close(current_fd)
575
- except OSError:
576
- pass
577
-
578
-
579
- def _precheck_regular_leaf(parent_fd: int, leaf_name: str, *, label: str, missing_ok: bool = False) -> bool:
580
- if not DIR_FD_STAT_NOFOLLOW_SUPPORTED:
581
- raise RegistryError(f"{label} requires dir_fd stat support")
582
- try:
583
- st = os.stat(leaf_name, dir_fd=parent_fd, follow_symlinks=False)
584
- except FileNotFoundError:
585
- if missing_ok:
586
- return False
587
- raise RegistryError(f"could not inspect {label}: missing file") from None
588
- except OSError as exc:
589
- raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
590
- if not stat.S_ISREG(st.st_mode):
591
- raise RegistryError(f"{label} must be a regular file")
592
- return True
593
-
594
-
595
- def read_bounded_regular_file(path: Path, *, max_bytes: int, label: str, missing_ok: bool = False) -> tuple[bytes, bool] | None:
596
- path = normalize_local_path(path)
597
- parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", missing_ok=missing_ok)
598
- if parent_fd is None:
599
- return None
600
- fd = -1
601
- try:
602
- leaf = _leaf_name(path, label=label)
603
- exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=missing_ok)
604
- if not exists:
605
- return None
606
- fd = os.open(leaf, _file_open_flags(label=label), dir_fd=parent_fd)
607
- if not stat.S_ISREG(os.fstat(fd).st_mode):
608
- raise RegistryError(f"{label} must be a regular file")
609
- chunks: list[bytes] = []
610
- remaining = max_bytes + 1
611
- while remaining > 0:
612
- chunk = os.read(fd, min(64 * 1024, remaining))
613
- if not chunk:
614
- break
615
- chunks.append(chunk)
616
- remaining -= len(chunk)
617
- raw = b"".join(chunks)
618
- truncated = len(raw) > max_bytes
619
- return raw[:max_bytes], truncated
620
- except OSError as exc:
621
- raise RegistryError(f"could not read {label}: {os_error_detail(exc)}") from exc
622
- finally:
623
- if fd >= 0:
624
- try:
625
- os.close(fd)
626
- except OSError:
627
- pass
628
- try:
629
- os.close(parent_fd)
630
- except OSError:
631
- pass
632
-
633
-
634
- def write_all_fd(fd: int, data: bytes) -> None:
635
- view = memoryview(data)
636
- offset = 0
637
- while offset < len(view):
638
- written = os.write(fd, view[offset:])
639
- if written <= 0:
640
- raise OSError("short write")
641
- offset += written
642
-
643
-
644
- def write_regular_file_no_follow(path: Path, data: bytes, *, label: str) -> None:
645
- path = normalize_local_path(path)
646
- parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
647
- if parent_fd is None: # pragma: no cover - create=True never returns None.
648
- raise RegistryError(f"could not inspect {label} parent")
649
- fd = -1
650
- temp_leaf: str | None = None
651
- try:
652
- leaf = _leaf_name(path, label=label)
653
- exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
654
- mode = 0o644
655
- if exists:
656
- try:
657
- mode = stat.S_IMODE(os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False).st_mode) or 0o644
658
- except OSError:
659
- mode = 0o644
660
- for _attempt in range(20):
661
- candidate = _leaf_name(Path(f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.tmp"), label=f"{label} temp")
662
- try:
663
- fd = os.open(candidate, _temp_file_open_flags(label=f"{label} temp"), mode, dir_fd=parent_fd)
664
- temp_leaf = candidate
665
- break
666
- except FileExistsError:
667
- continue
668
- if fd < 0 or temp_leaf is None:
669
- raise RegistryError(f"could not create temporary {label}")
670
- if not stat.S_ISREG(os.fstat(fd).st_mode):
671
- raise RegistryError(f"{label} temp must be a regular file")
672
- write_all_fd(fd, data)
673
- try:
674
- os.fsync(fd)
675
- except OSError:
676
- pass
677
- try:
678
- os.close(fd)
679
- except OSError:
680
- pass
681
- fd = -1
682
- os.replace(temp_leaf, leaf, src_dir_fd=parent_fd, dst_dir_fd=parent_fd)
683
- temp_leaf = None
684
- except OSError as exc:
685
- raise RegistryError(f"could not write {label}: {os_error_detail(exc)}") from exc
686
- finally:
687
- if fd >= 0:
688
- try:
689
- os.close(fd)
690
- except OSError:
691
- pass
692
- if temp_leaf is not None:
693
- try:
694
- os.unlink(temp_leaf, dir_fd=parent_fd)
695
- except OSError:
696
- pass
697
- try:
698
- os.fsync(parent_fd)
699
- except OSError:
700
- pass
701
- try:
702
- os.close(parent_fd)
703
- except OSError:
704
- pass
705
-
706
-
707
- def _reject_parent_traversal(path: Path, *, label: str) -> None:
708
- if any(part == ".." for part in path.parts):
709
- raise RegistryError(f"{label} must not contain parent traversal")
710
-
711
-
712
- def write_regular_file_no_follow_exclusive(path: Path, data: bytes, *, label: str, mode: int = 0o600) -> None:
713
- _reject_parent_traversal(path, label=label)
714
- path = normalize_local_path(path)
715
- parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent")
716
- if parent_fd is None: # pragma: no cover - missing_ok is not enabled.
717
- raise RegistryError(f"could not inspect {label} parent")
718
- fd = -1
719
- created = False
720
- success = False
721
- try:
722
- leaf = _leaf_name(path, label=label)
723
- exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
724
- if exists:
725
- raise RegistryError(f"{label} must not already exist")
726
- flags = _temp_file_open_flags(label=label)
727
- fd = os.open(leaf, flags, mode, dir_fd=parent_fd)
728
- created = True
729
- if not stat.S_ISREG(os.fstat(fd).st_mode):
730
- raise RegistryError(f"{label} must be a regular file")
731
- try:
732
- os.fchmod(fd, mode)
733
- except OSError:
734
- pass
735
- write_all_fd(fd, data)
736
- try:
737
- os.fsync(fd)
738
- except OSError:
739
- pass
740
- success = True
741
- except FileExistsError as exc:
742
- raise RegistryError(f"{label} must not already exist") from exc
743
- except OSError as exc:
744
- raise RegistryError(f"could not write {label}: {os_error_detail(exc)}") from exc
745
- finally:
746
- if fd >= 0:
747
- try:
748
- os.close(fd)
749
- except OSError:
750
- pass
751
- if created and not success:
752
- try:
753
- os.unlink(_leaf_name(path, label=label), dir_fd=parent_fd)
754
- except OSError:
755
- pass
756
- try:
757
- os.fsync(parent_fd)
758
- except OSError:
759
- pass
760
- try:
761
- os.close(parent_fd)
762
- except OSError:
763
- pass
764
-
765
-
766
- def append_jsonl_no_follow(path: Path, payload: dict[str, Any], *, label: str) -> int:
767
- path = normalize_local_path(path)
768
- parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
769
- if parent_fd is None: # pragma: no cover - create=True never returns None.
770
- raise RegistryError(f"could not inspect {label} parent")
771
- fd = -1
772
- try:
773
- leaf = _leaf_name(path, label=label)
774
- _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
775
- fd = os.open(leaf, _append_file_open_flags(label=label), 0o600, dir_fd=parent_fd)
776
- if not stat.S_ISREG(os.fstat(fd).st_mode):
777
- raise RegistryError(f"{label} must be a regular file")
778
- data = json.dumps(payload, ensure_ascii=False, sort_keys=True).encode("utf-8") + b"\n"
779
- write_all_fd(fd, data)
780
- try:
781
- os.fsync(fd)
782
- except OSError:
783
- pass
784
- return len(data)
785
- except OSError as exc:
786
- raise RegistryError(f"could not append {label}: {os_error_detail(exc)}") from exc
787
- finally:
788
- if fd >= 0:
789
- try:
790
- os.close(fd)
791
- except OSError:
792
- pass
793
- try:
794
- os.fsync(parent_fd)
795
- except OSError:
796
- pass
797
- try:
798
- os.close(parent_fd)
799
- except OSError:
800
- pass
801
-
802
-
803
- def preflight_append_jsonl_no_follow(path: Path, *, label: str) -> None:
804
- """Validate that a JSONL append target is no-follow appendable before side effects."""
805
- path = normalize_local_path(path)
806
- parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
807
- if parent_fd is None: # pragma: no cover - create=True never returns None.
808
- raise RegistryError(f"could not inspect {label} parent")
809
- fd = -1
810
- temp_leaf: str | None = None
811
- try:
812
- leaf = _leaf_name(path, label=label)
813
- exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
814
- if exists:
815
- fd = os.open(leaf, _append_file_open_flags(label=label), 0o600, dir_fd=parent_fd)
816
- if not stat.S_ISREG(os.fstat(fd).st_mode):
817
- raise RegistryError(f"{label} must be a regular file")
818
- return
819
- for _attempt in range(20):
820
- candidate = _leaf_name(Path(f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.preflight"), label=f"{label} preflight")
821
- try:
822
- fd = os.open(candidate, _temp_file_open_flags(label=f"{label} preflight"), 0o600, dir_fd=parent_fd)
823
- temp_leaf = candidate
824
- break
825
- except FileExistsError:
826
- continue
827
- if fd < 0 or temp_leaf is None:
828
- raise RegistryError(f"could not create temporary {label} preflight")
829
- if not stat.S_ISREG(os.fstat(fd).st_mode):
830
- raise RegistryError(f"{label} preflight temp must be a regular file")
831
- except OSError as exc:
832
- raise RegistryError(f"could not append {label}: {os_error_detail(exc)}") from exc
833
- finally:
834
- if fd >= 0:
835
- try:
836
- os.close(fd)
837
- except OSError:
838
- pass
839
- if temp_leaf is not None:
840
- try:
841
- os.unlink(temp_leaf, dir_fd=parent_fd)
842
- except OSError:
843
- pass
844
- try:
845
- os.close(parent_fd)
846
- except OSError:
847
- pass
848
-
849
-
850
- def resolve_root(raw_root: str | None) -> Path:
851
- root = Path(raw_root) if raw_root else Path.cwd()
852
- try:
853
- return root.expanduser().resolve()
854
- except OSError as exc:
855
- raise RegistryError(f"could not resolve root: {root}: {exc}") from exc
856
-
857
-
858
- def resolve_config_path(root: Path, raw_config: str | None) -> Path:
859
- if raw_config:
860
- candidate = Path(raw_config)
861
- else:
862
- candidate = DEFAULT_CONFIG
863
- return normalize_project_path(root, candidate, label="config path")
864
-
865
-
866
- def load_config(path: Path) -> dict[str, Any]:
867
- loaded = read_bounded_regular_file(path, max_bytes=MAX_CONFIG_BYTES, label="config", missing_ok=True)
868
- if loaded is None:
869
- return {"schema_version": CONFIG_SCHEMA_VERSION, "enabled": []}
870
- raw, truncated = loaded
871
- if truncated:
872
- raise RegistryError("config exceeded max bytes")
873
- try:
874
- text = raw.decode("utf-8")
875
- except UnicodeDecodeError as exc:
876
- raise RegistryError(f"could not decode config UTF-8: {path}: {exc.reason}") from exc
877
- try:
878
- data = json.loads(text)
879
- except json.JSONDecodeError as exc:
880
- raise RegistryError(f"could not parse config JSON: {path}: {exc.msg}") from exc
881
- except OSError as exc:
882
- raise RegistryError(f"could not read config: {path}: {exc}") from exc
883
- if not isinstance(data, dict):
884
- raise RegistryError(f"config must be a JSON object: {path}")
885
- schema = data.get("schema_version")
886
- if schema not in (None, CONFIG_SCHEMA_VERSION):
887
- raise RegistryError(f"unsupported config schema_version: {schema!r}")
888
- enabled = data.get("enabled", [])
889
- if not isinstance(enabled, list) or not all(isinstance(item, str) for item in enabled):
890
- raise RegistryError("config enabled must be a list of experiment ids")
891
- return {"schema_version": CONFIG_SCHEMA_VERSION, "enabled": sorted(set(enabled))}
892
-
893
-
894
- def write_config(path: Path, enabled: set[str]) -> dict[str, Any]:
895
- data = {
896
- "schema_version": CONFIG_SCHEMA_VERSION,
897
- "updated_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
898
- "enabled": sorted(enabled),
899
- }
900
- payload = (json.dumps(data, indent=2, sort_keys=True) + "\n").encode("utf-8")
901
- write_regular_file_no_follow(path, payload, label="config")
902
- return data
903
-
904
-
905
- def configured_enabled_set(config: dict[str, Any]) -> set[str]:
906
- return set(config.get("enabled", []))
907
-
908
-
909
- def enabled_set(config: dict[str, Any]) -> set[str]:
910
- return {item for item in configured_enabled_set(config) if item in REGISTRY}
911
-
912
-
913
- def unknown_enabled(config: dict[str, Any]) -> list[str]:
914
- return sorted(item for item in set(config.get("enabled", [])) if item not in REGISTRY)
915
-
916
-
917
- def registry_payload(*, config_path: Path, config: dict[str, Any], root: Path) -> dict[str, Any]:
918
- enabled = enabled_set(config)
919
- return {
920
- "tool": TOOL_NAME,
921
- "schema_version": CONFIG_SCHEMA_VERSION,
922
- "root": str(root),
923
- "config_path": str(config_path),
924
- "default_off": True,
925
- "note": "Experiments are opt-in metadata gates; enabling an experiment does not activate stable runtime behavior by itself.",
926
- "unknown_enabled": unknown_enabled(config),
927
- "experiments": [experiment.to_json(enabled=experiment.id in enabled) for experiment in EXPERIMENTS],
928
- }
929
-
930
-
931
- def emit_json(payload: dict[str, Any]) -> None:
932
- print(json.dumps(payload, indent=2, sort_keys=True))
933
-
934
-
935
- def emit_human(payload: dict[str, Any], *, include_details: bool = False) -> None:
936
- print("ContextGuard experiments (default off; explicit opt-in required)")
937
- print(f"Config: {payload['config_path']}")
938
- print("Enabling an experiment records project-local intent only; helpers still require explicit experimental use.")
939
- for experiment in payload["experiments"]:
940
- state = "enabled" if experiment["enabled"] else "disabled"
941
- print(f"- {experiment['id']}: {state} [{experiment['stability']}, risk={experiment['risk_level']}]")
942
- if include_details:
943
- print(f" {experiment['summary']}")
944
- print(f" Runtime: {experiment['runtime_status']}")
945
- if experiment["commands"]:
946
- print(" Commands: " + "; ".join(experiment["commands"]))
947
- if experiment["opt_in_flags"]:
948
- print(" Opt-in flags: " + ", ".join(experiment["opt_in_flags"]))
949
- print(f" Config effect: {experiment['config_effect']}")
950
- print(f" Evidence contract: {experiment['evidence_contract']}")
951
- print(f" Claim boundary: {experiment['claim_boundary']}")
952
- if payload["unknown_enabled"]:
953
- print("Unknown enabled ids in config: " + ", ".join(payload["unknown_enabled"]))
954
-
955
-
956
- def require_known(experiment_id: str) -> Experiment:
957
- try:
958
- return REGISTRY[experiment_id]
959
- except KeyError:
960
- choices = ", ".join(sorted(REGISTRY))
961
- fail(f"unknown experiment id {experiment_id!r}; known ids: {choices}")
962
-
963
-
964
- def command_list(args: argparse.Namespace) -> int:
965
- root, config_path, config = load_args_context(args)
966
- payload = registry_payload(config_path=config_path, config=config, root=root)
967
- if args.json:
968
- emit_json(payload)
969
- else:
970
- emit_human(payload, include_details=True)
971
- return 0
972
-
973
-
974
- def command_status(args: argparse.Namespace) -> int:
975
- root, config_path, config = load_args_context(args)
976
- payload = registry_payload(config_path=config_path, config=config, root=root)
977
- if args.json:
978
- emit_json(payload)
979
- else:
980
- emit_human(payload, include_details=False)
981
- return 0
982
-
983
-
984
- def command_enable(args: argparse.Namespace) -> int:
985
- require_known(args.experiment_id)
986
- root, config_path, config = load_args_context(args)
987
- enabled = configured_enabled_set(config)
988
- changed = args.experiment_id not in enabled
989
- enabled.add(args.experiment_id)
990
- written = write_config(config_path, enabled)
991
- payload = registry_payload(config_path=config_path, config=written, root=root)
992
- payload["changed"] = changed
993
- payload["experiment_id"] = args.experiment_id
994
- if args.json:
995
- emit_json(payload)
996
- else:
997
- print(f"enabled {args.experiment_id} in {config_path}")
998
- return 0
999
-
1000
-
1001
- def command_disable(args: argparse.Namespace) -> int:
1002
- require_known(args.experiment_id)
1003
- root, config_path, config = load_args_context(args)
1004
- enabled = configured_enabled_set(config)
1005
- changed = args.experiment_id in enabled
1006
- enabled.discard(args.experiment_id)
1007
- written = write_config(config_path, enabled)
1008
- payload = registry_payload(config_path=config_path, config=written, root=root)
1009
- payload["changed"] = changed
1010
- payload["experiment_id"] = args.experiment_id
1011
- if args.json:
1012
- emit_json(payload)
1013
- else:
1014
- print(f"disabled {args.experiment_id} in {config_path}")
1015
- return 0
1016
-
1017
-
1018
-
1019
- DIFF_GIT_RE = re.compile(r"^diff --git (?P<old>\S+) (?P<new>\S+)$")
1020
- HUNK_RE = re.compile(r"^@@\s+-(?P<old_start>\d+)(?:,(?P<old_count>\d+))?\s+\+(?P<new_start>\d+)(?:,(?P<new_count>\d+))?\s+@@(?P<section>.*)$")
1021
- CONTEXT_DIFF_ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
1022
-
1023
-
1024
- def read_bounded_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
1025
- source_label = args.source_label
1026
- if args.input:
1027
- path = Path(args.input)
1028
- source_label = source_label or str(path)
1029
- loaded = read_bounded_regular_file(path, max_bytes=MAX_CONTEXT_DIFF_INPUT_BYTES, label="input")
1030
- assert loaded is not None
1031
- raw, truncated = loaded
1032
- else:
1033
- source_label = source_label or "stdin"
1034
- raw = sys.stdin.buffer.read(MAX_CONTEXT_DIFF_INPUT_BYTES + 1)
1035
- truncated = len(raw) > MAX_CONTEXT_DIFF_INPUT_BYTES
1036
- raw = raw[:MAX_CONTEXT_DIFF_INPUT_BYTES]
1037
- if not raw:
1038
- raise RegistryError("context-diff-compaction plan requires diff input on stdin or --input")
1039
- text = raw.decode("utf-8", errors="replace")
1040
- metadata = {
1041
- "source_label": source_label,
1042
- "bytes": len(raw),
1043
- "lines": len(text.splitlines()),
1044
- "sha256": hashlib.sha256(raw).hexdigest(),
1045
- "truncated": truncated,
1046
- "max_bytes": MAX_CONTEXT_DIFF_INPUT_BYTES,
1047
- }
1048
- return text, metadata
1049
-
1050
-
1051
- def strip_diff_prefix(path: str) -> str:
1052
- if path.startswith(("a/", "b/")):
1053
- return path[2:]
1054
- return path
1055
-
1056
-
1057
- def summarize_diff(text: str, *, max_files: int = 50, max_hunks: int = 200) -> dict[str, Any]:
1058
- files: list[dict[str, Any]] = []
1059
- current: dict[str, Any] | None = None
1060
- current_hunk: dict[str, Any] | None = None
1061
- total_hunks = 0
1062
- summarized_hunks = 0
1063
- lines = text.splitlines()
1064
- diff_header_count = 0
1065
- for line_number, line in enumerate(lines, start=1):
1066
- match = DIFF_GIT_RE.match(line)
1067
- if match:
1068
- diff_header_count += 1
1069
- current_hunk = None
1070
- if len(files) >= max_files:
1071
- current = None
1072
- continue
1073
- current = {
1074
- "old_path": strip_diff_prefix(match.group("old")),
1075
- "new_path": strip_diff_prefix(match.group("new")),
1076
- "diff_header_line": line_number,
1077
- "hunks": [],
1078
- }
1079
- files.append(current)
1080
- continue
1081
- hunk = HUNK_RE.match(line)
1082
- if hunk:
1083
- total_hunks += 1
1084
- if current is None:
1085
- if len(files) >= max_files:
1086
- current_hunk = None
1087
- continue
1088
- current = {"old_path": None, "new_path": None, "diff_header_line": None, "hunks": []}
1089
- files.append(current)
1090
- if len(current["hunks"]) < max_hunks:
1091
- current_hunk = {
1092
- "line": line_number,
1093
- "old_start": int(hunk.group("old_start")),
1094
- "old_count": int(hunk.group("old_count") or "1"),
1095
- "new_start": int(hunk.group("new_start")),
1096
- "new_count": int(hunk.group("new_count") or "1"),
1097
- "section": hunk.group("section").strip()[:120],
1098
- "added_lines": 0,
1099
- "removed_lines": 0,
1100
- "context_lines": 0,
1101
- "body_lines": 0,
1102
- "reviewable": False,
1103
- }
1104
- current["hunks"].append(current_hunk)
1105
- summarized_hunks += 1
1106
- else:
1107
- current_hunk = None
1108
- continue
1109
- if current_hunk is not None:
1110
- changed = False
1111
- if line.startswith("+") and not line.startswith("+++"):
1112
- current_hunk["added_lines"] += 1
1113
- changed = True
1114
- elif line.startswith("-") and not line.startswith("---"):
1115
- current_hunk["removed_lines"] += 1
1116
- changed = True
1117
- elif line.startswith(" "):
1118
- current_hunk["context_lines"] += 1
1119
- else:
1120
- continue
1121
- current_hunk["body_lines"] += 1
1122
- reviewable_hunks = 0
1123
- malformed_hunks = 0
1124
- for file_summary in files:
1125
- for hunk_summary in file_summary["hunks"]:
1126
- old_body_lines = hunk_summary["removed_lines"] + hunk_summary["context_lines"]
1127
- new_body_lines = hunk_summary["added_lines"] + hunk_summary["context_lines"]
1128
- has_changes = bool(hunk_summary["added_lines"] or hunk_summary["removed_lines"])
1129
- well_formed = (
1130
- old_body_lines == hunk_summary["old_count"]
1131
- and new_body_lines == hunk_summary["new_count"]
1132
- )
1133
- hunk_summary["old_body_lines"] = old_body_lines
1134
- hunk_summary["new_body_lines"] = new_body_lines
1135
- hunk_summary["has_changes"] = has_changes
1136
- hunk_summary["well_formed"] = well_formed
1137
- hunk_summary["reviewable"] = bool(has_changes and well_formed)
1138
- if hunk_summary["reviewable"]:
1139
- reviewable_hunks += 1
1140
- elif not well_formed:
1141
- malformed_hunks += 1
1142
- return {
1143
- "file_count": len(files),
1144
- "hunk_count": total_hunks,
1145
- "summarized_hunk_count": summarized_hunks,
1146
- "reviewable_hunk_count": reviewable_hunks,
1147
- "malformed_hunk_count": malformed_hunks,
1148
- "truncated_files": max(0, diff_header_count - len(files)),
1149
- "truncated_hunks": max(0, total_hunks - summarized_hunks),
1150
- "files": files,
1151
- }
1152
-
1153
-
1154
- def valid_context_diff_reexpand_command(receipt_id: str | None, command: str | None) -> tuple[bool, str | None]:
1155
- if not receipt_id or not command:
1156
- return False, "missing_exact_receipt_or_reexpand_command"
1157
- if not CONTEXT_DIFF_ARTIFACT_ID_RE.fullmatch(receipt_id):
1158
- return False, "invalid_reexpand_command"
1159
- if any(token in command for token in (";", "|", "&", ">", "<", "`", "$", "\n", "\r")):
1160
- return False, "invalid_reexpand_command"
1161
- try:
1162
- argv = shlex.split(command)
1163
- except ValueError:
1164
- return False, "invalid_reexpand_command"
1165
- if argv == ["context-guard-artifact", "get", receipt_id, "--full"]:
1166
- return True, None
1167
- if argv == ["context-guard", "artifact", "get", receipt_id, "--full"]:
1168
- return True, None
1169
- return False, "invalid_reexpand_command"
1170
-
1171
-
1172
- def context_diff_artifact_read_dirs() -> list[Path]:
1173
- return [DEFAULT_CONTEXT_DIFF_ARTIFACT_DIR, LEGACY_CONTEXT_DIFF_ARTIFACT_DIR]
1174
-
1175
-
1176
- def context_diff_artifact_paths(directory: Path, receipt_id: str) -> tuple[Path, Path]:
1177
- return directory / f"{receipt_id}.txt", directory / f"{receipt_id}.json"
1178
-
1179
-
1180
- def verify_context_diff_artifact(
1181
- receipt_id: str | None,
1182
- *,
1183
- expected_sha256: str,
1184
- expected_bytes: int,
1185
- ) -> tuple[bool, str | None, dict[str, Any]]:
1186
- if not receipt_id or not CONTEXT_DIFF_ARTIFACT_ID_RE.fullmatch(receipt_id):
1187
- return False, "invalid_reexpand_command", {"checked": False, "read_directories": []}
1188
- read_dirs = context_diff_artifact_read_dirs()
1189
- details: dict[str, Any] = {
1190
- "checked": True,
1191
- "read_directories": [str(path) for path in read_dirs],
1192
- "matched_directory": None,
1193
- "content_sha256": None,
1194
- "content_bytes": None,
1195
- }
1196
- for directory in read_dirs:
1197
- content_path, meta_path = context_diff_artifact_paths(directory, receipt_id)
1198
- meta_loaded = read_bounded_regular_file(
1199
- meta_path,
1200
- max_bytes=MAX_CONTEXT_DIFF_ARTIFACT_METADATA_BYTES,
1201
- label="context-diff artifact metadata",
1202
- missing_ok=True,
1203
- )
1204
- content_loaded = read_bounded_regular_file(
1205
- content_path,
1206
- max_bytes=max(MAX_CONTEXT_DIFF_INPUT_BYTES, expected_bytes),
1207
- label="context-diff artifact content",
1208
- missing_ok=True,
1209
- )
1210
- if meta_loaded is None and content_loaded is None:
1211
- continue
1212
- if meta_loaded is None or content_loaded is None:
1213
- return False, "artifact_receipt_invalid", details
1214
- meta_raw, meta_truncated = meta_loaded
1215
- content_raw, content_truncated = content_loaded
1216
- if meta_truncated or content_truncated:
1217
- return False, "artifact_receipt_invalid", details
1218
- try:
1219
- metadata = json.loads(meta_raw.decode("utf-8"))
1220
- except (UnicodeDecodeError, json.JSONDecodeError):
1221
- return False, "artifact_receipt_invalid", details
1222
- if not isinstance(metadata, dict) or metadata.get("artifact_id") != receipt_id:
1223
- return False, "artifact_receipt_invalid", details
1224
- stored = metadata.get("stored_output")
1225
- stored_sha = stored.get("sha256") if isinstance(stored, dict) else None
1226
- stored_bytes = stored.get("bytes") if isinstance(stored, dict) else None
1227
- actual_sha = hashlib.sha256(content_raw).hexdigest()
1228
- actual_bytes = len(content_raw)
1229
- details.update({
1230
- "matched_directory": str(directory),
1231
- "content_sha256": actual_sha,
1232
- "content_bytes": actual_bytes,
1233
- })
1234
- if stored_sha != actual_sha or stored_bytes != actual_bytes:
1235
- return False, "artifact_receipt_invalid", details
1236
- if actual_sha != expected_sha256 or actual_bytes != expected_bytes:
1237
- return False, "artifact_content_mismatch", details
1238
- return True, None, details
1239
- return False, "artifact_receipt_not_found", details
1240
-
1241
-
1242
- def read_context_diff_replacement(args: argparse.Namespace) -> tuple[str | None, dict[str, Any]]:
1243
- if args.replacement_text is not None and args.replacement_file:
1244
- raise RegistryError("context-diff-compaction emit accepts only one of --replacement-text or --replacement-file")
1245
- if args.replacement_text is not None:
1246
- text = str(args.replacement_text)
1247
- raw = text.encode("utf-8")
1248
- truncated = len(raw) > MAX_CONTEXT_DIFF_REPLACEMENT_BYTES
1249
- raw = raw[:MAX_CONTEXT_DIFF_REPLACEMENT_BYTES]
1250
- text = raw.decode("utf-8", errors="replace")
1251
- source_label = "inline"
1252
- elif args.replacement_file:
1253
- path = Path(args.replacement_file)
1254
- loaded = read_bounded_regular_file(
1255
- path,
1256
- max_bytes=MAX_CONTEXT_DIFF_REPLACEMENT_BYTES,
1257
- label="context-diff replacement",
1258
- )
1259
- assert loaded is not None
1260
- raw, truncated = loaded
1261
- text = raw.decode("utf-8", errors="replace")
1262
- source_label = str(path)
1263
- else:
1264
- text = None
1265
- raw = b""
1266
- truncated = False
1267
- source_label = None
1268
- metadata = {
1269
- "source_label": source_label,
1270
- "bytes": len(raw),
1271
- "lines": len(text.splitlines()) if text is not None else 0,
1272
- "sha256": hashlib.sha256(raw).hexdigest() if text is not None else None,
1273
- "truncated": truncated,
1274
- "max_bytes": MAX_CONTEXT_DIFF_REPLACEMENT_BYTES,
1275
- }
1276
- return text, metadata
1277
-
1278
-
1279
- def context_diff_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
1280
- text, input_meta = read_bounded_input(args)
1281
- summary = summarize_diff(text)
1282
- receipt_id = args.receipt_id.strip() if args.receipt_id else None
1283
- reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
1284
- has_exact_handle = bool(receipt_id and reexpand_command)
1285
- readiness_blockers: list[str] = []
1286
- if not has_exact_handle:
1287
- readiness_blockers.append("missing_exact_receipt_or_reexpand_command")
1288
- if input_meta["truncated"]:
1289
- readiness_blockers.append("input_truncated")
1290
- if summary.get("truncated_files", 0) or summary.get("truncated_hunks", 0):
1291
- readiness_blockers.append("diff_summary_truncated")
1292
- if summary.get("malformed_hunk_count", 0):
1293
- readiness_blockers.append("malformed_diff_hunks")
1294
- if summary["file_count"] == 0 or summary.get("reviewable_hunk_count", 0) == 0:
1295
- readiness_blockers.append("no_reviewable_diff_hunks")
1296
- status = (
1297
- "ready_for_human_review"
1298
- if not readiness_blockers
1299
- else "blocked_until_reviewable_diff"
1300
- if has_exact_handle
1301
- else "blocked_until_exact_receipt"
1302
- )
1303
- return {
1304
- "tool": TOOL_NAME,
1305
- "schema_version": CONFIG_SCHEMA_VERSION,
1306
- "experiment_id": "context-diff-compaction",
1307
- "mode": "dry_run",
1308
- "status": status,
1309
- "input": input_meta,
1310
- "transform_policy": {
1311
- "automatic_compaction": False,
1312
- "lossy_replacement_allowed": False,
1313
- "semantic_rewrite_allowed": False,
1314
- "human_review_required": True,
1315
- "stable_runtime_behavior_changed": False,
1316
- },
1317
- "exact_retrieval": {
1318
- "required": True,
1319
- "available": has_exact_handle,
1320
- "artifact_id": receipt_id,
1321
- "cli": reexpand_command,
1322
- "verified": False,
1323
- "note": "Dry-run planning records user-supplied handles for human review only; it does not verify local receipt storage.",
1324
- },
1325
- "review_plan": {
1326
- "summary": summary,
1327
- "readiness_blockers": readiness_blockers,
1328
- "bounded_loss_disclosure": (
1329
- "No compacted replacement was produced. Any future lossy replacement must keep this diff reviewable "
1330
- "and provide exact receipt/re-expand handles before use."
1331
- ),
1332
- "next_steps": [
1333
- "Store exact original evidence with context-guard-artifact or another local receipt before compacting.",
1334
- "Review file and hunk summaries against the original diff.",
1335
- "Do not claim hosted token/cost savings from this dry-run plan.",
1336
- ],
1337
- },
1338
- "claim_boundary": "Dry-run local planning only; no hosted API token/cost savings claim without provider-measured matched successful tasks.",
1339
- "compacted_replacement": None,
1340
- }
1341
-
1342
-
1343
- def command_plan_context_diff_compaction(args: argparse.Namespace) -> int:
1344
- payload = context_diff_plan_payload(args)
1345
- if args.json:
1346
- emit_json(payload)
1347
- else:
1348
- print("ContextGuard context-diff compaction plan (dry-run only)")
1349
- print("No compaction was performed and no replacement text was emitted.")
1350
- print(f"Status: {payload['status']}")
1351
- print(f"Input: {payload['input']['source_label']} lines={payload['input']['lines']} sha256={payload['input']['sha256']}")
1352
- print(
1353
- f"Review summary: files={payload['review_plan']['summary']['file_count']} "
1354
- f"hunks={payload['review_plan']['summary']['hunk_count']}"
1355
- )
1356
- if not payload["exact_retrieval"]["available"]:
1357
- print("Exact receipt/re-expand command required before any lossy replacement can be reviewed.")
1358
- else:
1359
- print("Exact retrieval handle supplied for human review only; verified=false.")
1360
- if payload["review_plan"]["readiness_blockers"]:
1361
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
1362
- print(payload["claim_boundary"])
1363
- return 0
1364
-
1365
-
1366
- def context_diff_emit_payload(args: argparse.Namespace) -> dict[str, Any]:
1367
- payload = context_diff_plan_payload(args)
1368
- receipt_id = args.receipt_id.strip() if args.receipt_id else None
1369
- reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
1370
- reexpand_valid, reexpand_blocker = valid_context_diff_reexpand_command(receipt_id, reexpand_command)
1371
- replacement_text, replacement_meta = read_context_diff_replacement(args)
1372
- artifact_verified = False
1373
- artifact_blocker = None
1374
- artifact_verification: dict[str, Any] = {"checked": False, "read_directories": []}
1375
- if reexpand_valid:
1376
- artifact_verified, artifact_blocker, artifact_verification = verify_context_diff_artifact(
1377
- receipt_id,
1378
- expected_sha256=payload["input"]["sha256"],
1379
- expected_bytes=payload["input"]["bytes"],
1380
- )
1381
-
1382
- blockers = list(payload["review_plan"]["readiness_blockers"])
1383
- if reexpand_blocker:
1384
- blockers.append(reexpand_blocker)
1385
- if artifact_blocker:
1386
- blockers.append(artifact_blocker)
1387
- if replacement_text is None or not replacement_text.strip():
1388
- blockers.append("missing_compacted_replacement")
1389
- if replacement_meta["truncated"]:
1390
- blockers.append("replacement_truncated")
1391
- if (
1392
- replacement_text is not None
1393
- and not replacement_meta["truncated"]
1394
- and replacement_meta["bytes"] >= payload["input"]["bytes"]
1395
- ):
1396
- blockers.append("replacement_not_smaller_than_input")
1397
- blockers = list(dict.fromkeys(blockers))
1398
- ready = not blockers
1399
-
1400
- replacement_record = None
1401
- if ready and replacement_text is not None:
1402
- replacement_record = {
1403
- "text": replacement_text,
1404
- "bytes": replacement_meta["bytes"],
1405
- "lines": replacement_meta["lines"],
1406
- "sha256": replacement_meta["sha256"],
1407
- "source_label": replacement_meta["source_label"],
1408
- }
1409
-
1410
- payload["mode"] = "emit"
1411
- payload["status"] = "replacement_emitted" if ready else "blocked_until_emit_ready"
1412
- payload["transform_policy"] = {
1413
- "automatic_compaction": False,
1414
- "lossy_replacement_allowed": ready,
1415
- "semantic_rewrite_allowed": False,
1416
- "caller_supplied_replacement_required": True,
1417
- "human_review_required": True,
1418
- "stable_runtime_behavior_changed": False,
1419
- }
1420
- payload["exact_retrieval"] = {
1421
- "required": True,
1422
- "available": bool(receipt_id and reexpand_command and reexpand_valid and artifact_verified),
1423
- "artifact_id": receipt_id,
1424
- "cli": reexpand_command,
1425
- "verified": artifact_verified,
1426
- "valid_command_shape": reexpand_valid,
1427
- "verification": artifact_verification,
1428
- "note": "Emit mode validates exact local artifact command shape and verifies local artifact content matches the input diff.",
1429
- }
1430
- payload["replacement"] = replacement_meta
1431
- payload["review_plan"]["readiness_blockers"] = blockers
1432
- payload["review_plan"]["bounded_loss_disclosure"] = (
1433
- "Compacted replacement is caller supplied and lossy; use exact_retrieval.cli to recover the original diff "
1434
- "before relying on omitted details."
1435
- )
1436
- payload["review_plan"]["next_steps"] = [
1437
- "Human-review the compacted replacement against the original diff before use.",
1438
- "Use exact_retrieval.cli to recover the original diff whenever omitted details matter.",
1439
- "Treat bytes_before/bytes_after as local proxy evidence only; do not claim hosted token/cost savings.",
1440
- ]
1441
- payload["claim_boundary"] = (
1442
- "Explicit local context-diff replacement emission only; smaller local diffs are proxy evidence and are not "
1443
- "hosted API token or cost savings evidence."
1444
- )
1445
- bytes_after = replacement_meta["bytes"] if replacement_text is not None else 0
1446
- payload["compaction_evidence"] = {
1447
- "bytes_before": payload["input"]["bytes"],
1448
- "bytes_after": bytes_after,
1449
- "byte_reduction": max(0, payload["input"]["bytes"] - bytes_after),
1450
- "byte_reduction_proxy_only": True,
1451
- "hosted_api_token_savings_claim_allowed": False,
1452
- "hosted_api_cost_savings_claim_allowed": False,
1453
- }
1454
- payload["compacted_replacement"] = replacement_record
1455
- return payload
1456
-
1457
-
1458
- def command_emit_context_diff_compaction(args: argparse.Namespace) -> int:
1459
- payload = context_diff_emit_payload(args)
1460
- if args.json:
1461
- emit_json(payload)
1462
- else:
1463
- if payload["status"] == "replacement_emitted":
1464
- print("ContextGuard context-diff compact replacement emitted")
1465
- print(
1466
- f"Replacement: bytes={payload['replacement']['bytes']} "
1467
- f"sha256={payload['replacement']['sha256']}"
1468
- )
1469
- print(f"Exact re-expand: {payload['exact_retrieval']['cli']}")
1470
- else:
1471
- print("ContextGuard context-diff compact replacement blocked")
1472
- print(f"Status: {payload['status']}")
1473
- if payload["review_plan"]["readiness_blockers"]:
1474
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
1475
- print(payload["claim_boundary"])
1476
- return 0 if payload["status"] == "replacement_emitted" else 1
1477
-
1478
-
1479
- def clean_values(values: list[str] | None) -> list[str]:
1480
- return [value.strip() for value in values or [] if value.strip()]
1481
-
1482
-
1483
- def parse_int_tuple(raw: str | None, *, count: int) -> tuple[int, ...] | None:
1484
- if raw is None or not raw.strip():
1485
- return None
1486
- parts = [part.strip() for part in raw.split(",")]
1487
- if len(parts) != count:
1488
- return None
1489
- try:
1490
- return tuple(int(part, 10) for part in parts)
1491
- except ValueError:
1492
- return None
1493
-
1494
-
1495
- def crop_payload(bounds: tuple[int, ...] | None, image_size: tuple[int, ...] | None) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
1496
- bounds_payload = None
1497
- image_payload = None
1498
- if bounds is not None:
1499
- x, y, width, height = bounds
1500
- bounds_payload = {"x": x, "y": y, "width": width, "height": height}
1501
- if image_size is not None:
1502
- width, height = image_size
1503
- image_payload = {"width": width, "height": height}
1504
- return bounds_payload, image_payload
1505
-
1506
-
1507
- def valid_crop_geometry(bounds: tuple[int, ...] | None, image_size: tuple[int, ...] | None) -> tuple[bool, bool]:
1508
- if bounds is None or image_size is None:
1509
- return False, False
1510
- x, y, crop_width, crop_height = bounds
1511
- image_width, image_height = image_size
1512
- if x < 0 or y < 0 or crop_width <= 0 or crop_height <= 0 or image_width <= 0 or image_height <= 0:
1513
- return False, False
1514
- if x + crop_width > image_width or y + crop_height > image_height:
1515
- return True, True
1516
- return True, False
1517
-
1518
-
1519
- def parse_confidence(raw: str | None) -> tuple[float | None, str | None]:
1520
- if raw is None or not raw.strip():
1521
- return None, "missing"
1522
- try:
1523
- value = float(raw)
1524
- except ValueError:
1525
- return None, "invalid"
1526
- if not (0.0 <= value <= 1.0):
1527
- return None, "invalid"
1528
- return value, None
1529
-
1530
-
1531
- def read_visual_ocr_text(args: argparse.Namespace) -> dict[str, Any]:
1532
- if args.ocr_text is not None and args.ocr_text_file is not None:
1533
- raise RegistryError("--ocr-text and --ocr-text-file are mutually exclusive")
1534
- if args.ocr_text_file is not None:
1535
- path = Path(args.ocr_text_file)
1536
- source_label = args.ocr_source_label.strip() if args.ocr_source_label else path.name
1537
- loaded = read_bounded_regular_file(path, max_bytes=MAX_VISUAL_OCR_TEXT_BYTES, label="OCR text file")
1538
- assert loaded is not None
1539
- raw, truncated = loaded
1540
- source_type = "file"
1541
- elif args.ocr_text is not None:
1542
- raw = args.ocr_text.encode("utf-8")
1543
- source_label = args.ocr_source_label.strip() if args.ocr_source_label else "inline"
1544
- source_type = "inline"
1545
- truncated = len(raw) > MAX_VISUAL_OCR_TEXT_BYTES
1546
- raw = raw[:MAX_VISUAL_OCR_TEXT_BYTES]
1547
- else:
1548
- raw = b""
1549
- source_label = args.ocr_source_label.strip() if args.ocr_source_label else None
1550
- source_type = None
1551
- truncated = False
1552
- try:
1553
- text = raw.decode("utf-8")
1554
- valid_encoding = True
1555
- except UnicodeDecodeError:
1556
- text = raw.decode("utf-8", errors="replace")
1557
- valid_encoding = False
1558
- return {
1559
- "source_type": source_type,
1560
- "source_label": source_label,
1561
- "bytes": len(raw),
1562
- "lines": len(text.splitlines()),
1563
- "sha256": hashlib.sha256(raw).hexdigest() if raw else None,
1564
- "truncated": truncated,
1565
- "max_bytes": MAX_VISUAL_OCR_TEXT_BYTES,
1566
- "valid_utf8": valid_encoding,
1567
- "text": text,
1568
- "text_preview": text,
1569
- "has_text": bool(text.strip()),
1570
- }
1571
-
1572
-
1573
- def visual_crop_ocr_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
1574
- full_receipt = args.full_evidence_receipt.strip() if args.full_evidence_receipt else None
1575
- full_label = args.full_evidence_label.strip() if args.full_evidence_label else None
1576
- missed_context_notes = clean_values(args.missed_context_note)
1577
- ocr_error_notes = clean_values(args.ocr_error_note)
1578
- crop_label = args.crop_label.strip() if args.crop_label else None
1579
-
1580
- bounds = parse_int_tuple(args.crop_bounds, count=4)
1581
- image_size = parse_int_tuple(args.image_size, count=2)
1582
- bounds_payload, image_payload = crop_payload(bounds, image_size)
1583
- crop_fields_present = any(value is not None and str(value).strip() for value in (args.crop_label, args.crop_bounds, args.image_size))
1584
- crop_geometry_valid, crop_exceeds = valid_crop_geometry(bounds, image_size)
1585
- crop_complete = bool(crop_label and crop_geometry_valid and not crop_exceeds)
1586
-
1587
- ocr_text = read_visual_ocr_text(args)
1588
- confidence, confidence_error = parse_confidence(args.ocr_confidence)
1589
- ocr_fields_present = any(
1590
- [
1591
- args.ocr_text is not None,
1592
- args.ocr_text_file is not None,
1593
- args.ocr_confidence is not None,
1594
- bool(ocr_error_notes),
1595
- ]
1596
- )
1597
- ocr_complete = bool(
1598
- ocr_text["has_text"]
1599
- and ocr_text["valid_utf8"]
1600
- and not ocr_text["truncated"]
1601
- and confidence_error is None
1602
- and ocr_error_notes
1603
- )
1604
-
1605
- blockers: list[str] = []
1606
- if not full_receipt:
1607
- blockers.append("missing_full_evidence_receipt")
1608
- if not missed_context_notes:
1609
- blockers.append("missing_missed_context_note")
1610
- if not crop_complete and not ocr_complete:
1611
- blockers.append("missing_derived_evidence")
1612
-
1613
- if crop_fields_present and (not crop_label or not crop_geometry_valid):
1614
- blockers.append("invalid_crop_bounds")
1615
- elif crop_fields_present and crop_exceeds:
1616
- blockers.append("crop_exceeds_image_bounds")
1617
-
1618
- if ocr_fields_present:
1619
- if confidence_error == "missing":
1620
- blockers.append("missing_ocr_confidence")
1621
- elif confidence_error == "invalid":
1622
- blockers.append("invalid_ocr_confidence")
1623
- if not ocr_error_notes:
1624
- blockers.append("missing_ocr_error_note")
1625
- if not ocr_text["has_text"]:
1626
- blockers.append("missing_ocr_text")
1627
- if not ocr_text["valid_utf8"]:
1628
- blockers.append("invalid_ocr_text_encoding")
1629
- if ocr_text["truncated"]:
1630
- blockers.append("ocr_text_truncated")
1631
-
1632
- # Preserve stable ordering while avoiding duplicates when incomplete derived
1633
- # evidence also contributed path-specific blockers.
1634
- blockers = list(dict.fromkeys(blockers))
1635
- status = "ready_for_human_review" if not blockers else "blocked_until_visual_evidence"
1636
-
1637
- return {
1638
- "tool": TOOL_NAME,
1639
- "schema_version": CONFIG_SCHEMA_VERSION,
1640
- "experiment_id": "visual-crop-ocr",
1641
- "mode": "dry_run",
1642
- "status": status,
1643
- "external_services": {
1644
- "called": False,
1645
- "ocr_service": None,
1646
- "image_service": None,
1647
- "network": False,
1648
- },
1649
- "full_visual_evidence": {
1650
- "required": True,
1651
- "available": bool(full_receipt),
1652
- "receipt_id": full_receipt,
1653
- "label": full_label,
1654
- "verified": False,
1655
- "note": "G004 records user-supplied full visual evidence handles only; it does not verify receipt storage.",
1656
- },
1657
- "derived_evidence": {
1658
- "crop": {
1659
- "available": crop_complete,
1660
- "label": crop_label,
1661
- "bounds": bounds_payload,
1662
- "image_size": image_payload,
1663
- "source": "user_supplied_metadata" if crop_fields_present else None,
1664
- },
1665
- "ocr": {
1666
- "available": ocr_complete,
1667
- "source_type": ocr_text["source_type"],
1668
- "source_label": ocr_text["source_label"],
1669
- "text_preview": ocr_text["text_preview"] if ocr_text["has_text"] else None,
1670
- "metadata": {
1671
- "bytes": ocr_text["bytes"],
1672
- "lines": ocr_text["lines"],
1673
- "sha256": ocr_text["sha256"],
1674
- "truncated": ocr_text["truncated"],
1675
- "max_bytes": ocr_text["max_bytes"],
1676
- "valid_utf8": ocr_text["valid_utf8"],
1677
- },
1678
- "confidence": confidence,
1679
- "error_notes": ocr_error_notes,
1680
- },
1681
- },
1682
- "guardrails": {
1683
- "original_evidence_required": True,
1684
- "full_visual_evidence_must_remain_available": True,
1685
- "external_ocr_service_allowed": False,
1686
- "external_image_service_allowed": False,
1687
- "human_review_required": True,
1688
- "missed_context_review_required": True,
1689
- "confidence_error_notes_required_for_ocr": True,
1690
- "stable_runtime_behavior_changed": False,
1691
- "candidate_replacement_allowed": False,
1692
- },
1693
- "review_plan": {
1694
- "readiness_blockers": blockers,
1695
- "missed_context_notes": missed_context_notes,
1696
- "next_steps": [
1697
- "Keep full visual evidence retrievable before relying on cropped or OCR-derived evidence.",
1698
- "Review crop bounds and OCR text against the original evidence for missed context.",
1699
- "Do not claim hosted image/text token or cost savings from this dry-run plan.",
1700
- ],
1701
- },
1702
- "claim_boundary": (
1703
- "Dry-run visual/OCR fixture planning only; no hosted visual/text token or cost savings claim without "
1704
- "provider-measured matched successful tasks."
1705
- ),
1706
- "candidate_replacement": None,
1707
- }
1708
-
1709
-
1710
- def command_plan_visual_crop_ocr(args: argparse.Namespace) -> int:
1711
- payload = visual_crop_ocr_plan_payload(args)
1712
- if args.json:
1713
- emit_json(payload)
1714
- else:
1715
- print("ContextGuard visual crop/OCR plan (dry-run only)")
1716
- print("No external OCR/image service was called and no replacement evidence was emitted.")
1717
- print(f"Status: {payload['status']}")
1718
- print(f"Full evidence available: {payload['full_visual_evidence']['available']} verified=false")
1719
- print(
1720
- "Derived evidence: "
1721
- f"crop={payload['derived_evidence']['crop']['available']} "
1722
- f"ocr={payload['derived_evidence']['ocr']['available']}"
1723
- )
1724
- if payload["review_plan"]["readiness_blockers"]:
1725
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
1726
- print(payload["claim_boundary"])
1727
- return 0
1728
-
1729
-
1730
- def visual_crop_ocr_evidence_pack_payload(args: argparse.Namespace) -> dict[str, Any]:
1731
- payload = visual_crop_ocr_plan_payload(args)
1732
- blockers = list(payload["review_plan"]["readiness_blockers"])
1733
- ready = not blockers
1734
- crop = payload["derived_evidence"]["crop"]
1735
- ocr = payload["derived_evidence"]["ocr"]
1736
-
1737
- image_area = None
1738
- crop_area = None
1739
- if crop["bounds"] is not None and crop["image_size"] is not None:
1740
- image_area = crop["image_size"]["width"] * crop["image_size"]["height"]
1741
- crop_area = crop["bounds"]["width"] * crop["bounds"]["height"]
1742
-
1743
- payload["mode"] = "emit"
1744
- payload["status"] = "evidence_pack_emitted" if ready else "blocked_until_visual_evidence_pack_ready"
1745
- payload["guardrails"] = dict(payload["guardrails"])
1746
- payload["guardrails"].update({
1747
- "candidate_replacement_allowed": False,
1748
- "evidence_pack_allowed": ready,
1749
- "runtime_writes_files": False,
1750
- "external_services_called": False,
1751
- })
1752
- payload["claim_boundary"] = (
1753
- "Explicit local visual crop/OCR evidence-pack emission only; image area and OCR byte reductions are proxy "
1754
- "evidence and are not hosted API token or cost savings evidence."
1755
- )
1756
- payload["reduction_evidence"] = {
1757
- "image_area_before": image_area,
1758
- "crop_area_after": crop_area if crop["available"] else None,
1759
- "crop_area_reduction": (image_area - crop_area) if crop["available"] and image_area is not None and crop_area is not None else None,
1760
- "ocr_text_bytes": ocr["metadata"]["bytes"] if ocr["available"] else None,
1761
- "proxy_only": True,
1762
- "hosted_api_token_savings_claim_allowed": False,
1763
- "hosted_api_cost_savings_claim_allowed": False,
1764
- }
1765
- payload["review_plan"]["next_steps"] = [
1766
- "Human-review crop/OCR evidence against the full visual evidence receipt before using it as a substitute.",
1767
- "Read missed-context notes before relying on omitted visual regions.",
1768
- "Treat image area/OCR byte reductions as local proxy evidence only; do not claim hosted token/cost savings.",
1769
- ]
1770
- if ready:
1771
- payload["evidence_pack"] = {
1772
- "schema_version": "contextguard.visual-evidence-pack.v1",
1773
- "full_visual_evidence": payload["full_visual_evidence"],
1774
- "crop_evidence": crop if crop["available"] else None,
1775
- "ocr_evidence": (
1776
- {
1777
- "source_type": ocr["source_type"],
1778
- "source_label": ocr["source_label"],
1779
- "text": ocr["text_preview"],
1780
- "metadata": ocr["metadata"],
1781
- "confidence": ocr["confidence"],
1782
- "error_notes": ocr["error_notes"],
1783
- }
1784
- if ocr["available"]
1785
- else None
1786
- ),
1787
- "missed_context_notes": payload["review_plan"]["missed_context_notes"],
1788
- "guardrails": payload["guardrails"],
1789
- "reduction_evidence": payload["reduction_evidence"],
1790
- "claim_boundary": payload["claim_boundary"],
1791
- }
1792
- return payload
1793
-
1794
-
1795
- def command_emit_visual_crop_ocr(args: argparse.Namespace) -> int:
1796
- payload = visual_crop_ocr_evidence_pack_payload(args)
1797
- if args.json:
1798
- emit_json(payload)
1799
- else:
1800
- if payload["status"] == "evidence_pack_emitted":
1801
- print("ContextGuard visual crop/OCR evidence pack emitted")
1802
- print(f"Full evidence receipt: {payload['full_visual_evidence']['receipt_id']}")
1803
- print(
1804
- "Derived evidence: "
1805
- f"crop={payload['derived_evidence']['crop']['available']} "
1806
- f"ocr={payload['derived_evidence']['ocr']['available']}"
1807
- )
1808
- else:
1809
- print("ContextGuard visual crop/OCR evidence pack blocked")
1810
- print(f"Status: {payload['status']}")
1811
- if payload["review_plan"]["readiness_blockers"]:
1812
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
1813
- print(payload["claim_boundary"])
1814
- return 0 if payload["status"] == "evidence_pack_emitted" else 1
1815
-
1816
-
1817
- SECRET_LABEL_KEY_RE = (
1818
- r"[A-Za-z0-9_.-]*(?:"
1819
- r"api[-_]?key|apikey|token|secret|password|passwd|pwd|client[-_]?secret|"
1820
- r"auth|authorization|bearer|basic|pass|credential|credentials|signature|sig|"
1821
- r"x[-_]?amz[-_]?[a-z0-9_.-]*|aws[a-z0-9_.-]*|(?:aws[-_]?)?access[-_]?key(?:[-_]?id)?|"
1822
- r"private[-_]?key|privatekey|pgp[-_]?private[-_]?key|pgpprivatekey|ssh[-_]?key|sshkey"
1823
- r")[A-Za-z0-9_.-]*"
1824
- )
1825
- SECRET_LABEL_VALUE_RE = r"(?:'[^']*'|\"[^\"]*\"|[^\s,}&#;]+)"
1826
- SECRET_LABEL_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
1827
- (re.compile(r"(?i)\bAuthorization\s*:\s*(?:Bearer|Basic|AWS|AWS4-HMAC-SHA256)\s+[^\s,}\]]+(?:\s+[A-Za-z0-9_-]+=[^\s,}\]]+)*"), "Authorization: [REDACTED]"),
1828
- (re.compile(r"(?i)\b(?:Bearer|Basic)\s*(?:[:=]\s*)?[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
1829
- (re.compile(r"(?i)\b(?:AWS|AWS4-HMAC-SHA256)\s+[A-Za-z0-9,=:/+._~%-]+"), "[REDACTED]"),
1830
- (re.compile(rf"(?i)([?&#;]({SECRET_LABEL_KEY_RE})=)[^\s?&#;]+"), r"\1[REDACTED]"),
1831
- (
1832
- re.compile(rf"(?i)(^|[\s{{,?&#;])([\"']?(?:{SECRET_LABEL_KEY_RE})[\"']?\s*[:=]\s*){SECRET_LABEL_VALUE_RE}"),
1833
- r"\1\2[REDACTED]",
1834
- ),
1835
- (
1836
- re.compile(rf"(?i)(^|[\s\"'])(--(?:{SECRET_LABEL_KEY_RE})(?:\s+|=))(?:'[^']*'|\"[^\"]*\"|[^\s\"']+)"),
1837
- r"\1\2[REDACTED]",
1838
- ),
1839
- (re.compile(r"(?i)(^|[\s\"'])((?:-u|--user)(?:\s+|=))(?:'[^']*'|\"[^\"]*\"|[^\s\"']+)"), r"\1\2[REDACTED]"),
1840
- (re.compile(rf"(?i)(^|[/\\\s{{,?&#;\[\(<])({SECRET_LABEL_KEY_RE}(?:[:=][^\s,}}&#;\]\)\\/]*)?)"), r"\1[REDACTED]"),
1841
- (re.compile(r"gh[pousr]_[A-Za-z0-9_]{20,}"), "[REDACTED]"),
1842
- (re.compile(r"github_pat_[A-Za-z0-9_]{20,}"), "[REDACTED]"),
1843
- (re.compile(r"glpat-[A-Za-z0-9_-]{12,}"), "[REDACTED]"),
1844
- (re.compile(r"xox[abprs]-[A-Za-z0-9-]{10,}"), "[REDACTED]"),
1845
- (re.compile(r"(?:AKIA|ASIA)[0-9A-Z]{16}"), "[REDACTED]"),
1846
- (re.compile(r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}"), "[REDACTED]"),
1847
- (re.compile(r"sk-(?:ant|proj)-[A-Za-z0-9_-]{12,}"), "[REDACTED]"),
1848
- (re.compile(r"npm_[A-Za-z0-9]{20,}"), "[REDACTED]"),
1849
- (re.compile(r"AIza[0-9A-Za-z_\-]{20,}"), "[REDACTED]"),
1850
- (re.compile(r"SG\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}"), "[REDACTED]"),
1851
- (re.compile(r"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "[REDACTED]"),
1852
- (re.compile(r"([a-z][a-z0-9+.-]*://)[^/\s@]+@", re.IGNORECASE), r"\1[REDACTED]@"),
1853
- )
1854
-
1855
-
1856
- def sanitize_self_hosted_text(value: Any) -> str:
1857
- text = "" if value is None else str(value)
1858
- text = "".join(" " if unicodedata.category(ch)[0] == "C" else ch for ch in text)
1859
- text = " ".join(text.split())
1860
- for pattern, replacement in SECRET_LABEL_PATTERNS:
1861
- text = pattern.sub(replacement, text)
1862
- text = re.sub(r"\[REDACTED\]\]+", "[REDACTED]", text)
1863
- text = re.sub(r"(?:\[REDACTED\]\s*){2,}", "[REDACTED]", text)
1864
- if len(text) > MAX_SELF_HOSTED_LABEL_CHARS:
1865
- text = text[: MAX_SELF_HOSTED_LABEL_CHARS - 12].rstrip() + "…[truncated]"
1866
- return text
1867
-
1868
-
1869
- def sanitize_self_hosted_label(value: Any) -> str | None:
1870
- if not isinstance(value, str):
1871
- return None
1872
- text = sanitize_self_hosted_text(value)
1873
- if not text:
1874
- return None
1875
- return text
1876
-
1877
-
1878
- def sanitize_self_hosted_ignored_key(value: Any) -> str:
1879
- if not isinstance(value, str):
1880
- return "non_string_key"
1881
- text = sanitize_self_hosted_text(value)
1882
- if not text:
1883
- return "empty_key"
1884
- if "[REDACTED]" in text:
1885
- return "redacted_key"
1886
- return text
1887
-
1888
-
1889
- def normalize_self_hosted_metric(value: Any, *, maximum: float) -> float | None:
1890
- if isinstance(value, bool) or not isinstance(value, (int, float)):
1891
- return None
1892
- number = float(value)
1893
- if not math.isfinite(number) or number < 0 or number > maximum:
1894
- return None
1895
- return number
1896
-
1897
-
1898
- SELF_HOSTED_METRIC_LIMITS: dict[str, float] = {
1899
- "latency_ms": MAX_SELF_HOSTED_LATENCY_MS,
1900
- "peak_memory_mb": MAX_SELF_HOSTED_MEMORY_MB,
1901
- "quality_score": 1.0,
1902
- "energy_wh": MAX_SELF_HOSTED_ENERGY_WH,
1903
- "local_cost_usd": MAX_SELF_HOSTED_LOCAL_COST_USD,
1904
- "tokens_per_second": MAX_SELF_HOSTED_TOKENS_PER_SECOND,
1905
- }
1906
- SELF_HOSTED_LABEL_KEYS = ("model_server", "optimization", "quality_metric", "hardware", "runtime", "dataset")
1907
-
1908
-
1909
- def normalize_self_hosted_metrics(raw: Any, *, source: str) -> tuple[dict[str, Any] | None, list[str], list[str]]:
1910
- invalid_keys: list[str] = []
1911
- ignored_keys: list[str] = []
1912
- if not isinstance(raw, dict):
1913
- return None, ["self_hosted_metrics_not_object"], ignored_keys
1914
- metrics: dict[str, float] = {}
1915
- labels: dict[str, str] = {}
1916
- availability = {key: False for key in SELF_HOSTED_METRIC_LIMITS}
1917
- for key, value in raw.items():
1918
- if key in SELF_HOSTED_METRIC_LIMITS:
1919
- metric = normalize_self_hosted_metric(value, maximum=SELF_HOSTED_METRIC_LIMITS[key])
1920
- if metric is None:
1921
- invalid_keys.append(key)
1922
- else:
1923
- metrics[key] = metric
1924
- availability[key] = True
1925
- elif key in SELF_HOSTED_LABEL_KEYS:
1926
- label = sanitize_self_hosted_label(value)
1927
- if label is not None:
1928
- labels[key] = label
1929
- elif value is not None:
1930
- invalid_keys.append(key)
1931
- else:
1932
- ignored_keys.append(sanitize_self_hosted_ignored_key(key))
1933
- if not metrics:
1934
- return None, invalid_keys, ignored_keys
1935
- return {
1936
- "schema_version": SELF_HOSTED_METRICS_SCHEMA_VERSION,
1937
- "source": source,
1938
- "metrics": metrics,
1939
- "labels": labels,
1940
- "measurement_availability": availability,
1941
- "claim_boundary": {
1942
- "id": SELF_HOSTED_METRICS_CLAIM_BOUNDARY,
1943
- "hosted_api_token_savings_claim_allowed": False,
1944
- "hosted_api_cost_savings_claim_allowed": False,
1945
- "requires_provider_measured_matched_tasks_for_hosted_claims": True,
1946
- "reason": (
1947
- "Self-hosted local/model-server latency, memory, quality, energy, and local cost metrics "
1948
- "are not hosted API token or cost telemetry."
1949
- ),
1950
- },
1951
- }, invalid_keys, ignored_keys
1952
-
1953
-
1954
- def cli_self_hosted_metrics(args: argparse.Namespace) -> dict[str, Any]:
1955
- raw: dict[str, Any] = {}
1956
- for arg_name, metric_name in (
1957
- ("latency_ms", "latency_ms"),
1958
- ("peak_memory_mb", "peak_memory_mb"),
1959
- ("quality_score", "quality_score"),
1960
- ("energy_wh", "energy_wh"),
1961
- ("local_cost_usd", "local_cost_usd"),
1962
- ("tokens_per_second", "tokens_per_second"),
1963
- ):
1964
- value = getattr(args, arg_name)
1965
- if value is not None:
1966
- raw[metric_name] = value
1967
- for arg_name in SELF_HOSTED_LABEL_KEYS:
1968
- value = getattr(args, arg_name)
1969
- if value is not None:
1970
- raw[arg_name] = value
1971
- return raw
1972
-
1973
-
1974
- def reject_non_finite_json_constant(value: str) -> NoReturn:
1975
- raise ValueError(f"non-finite JSON value {value}")
1976
-
1977
-
1978
- def has_non_finite_json_number(value: Any) -> bool:
1979
- stack: list[tuple[Any, int]] = [(value, 0)]
1980
- visited = 0
1981
- while stack:
1982
- item, depth = stack.pop()
1983
- visited += 1
1984
- if depth > MAX_SELF_HOSTED_JSON_DEPTH or visited > MAX_SELF_HOSTED_JSON_NODES:
1985
- return True
1986
- if isinstance(item, bool):
1987
- continue
1988
- if isinstance(item, float):
1989
- if not math.isfinite(item):
1990
- return True
1991
- elif isinstance(item, list):
1992
- stack.extend((child, depth + 1) for child in item)
1993
- elif isinstance(item, dict):
1994
- stack.extend((child, depth + 1) for child in item.values())
1995
- return False
1996
-
1997
-
1998
- def read_self_hosted_payload(args: argparse.Namespace) -> tuple[Any, dict[str, Any]]:
1999
- source_label = sanitize_self_hosted_text(args.source_label) if args.source_label else None
2000
- if args.input:
2001
- path = Path(args.input)
2002
- source_label = source_label or sanitize_self_hosted_text(path)
2003
- try:
2004
- loaded = read_bounded_regular_file(path, max_bytes=MAX_SELF_HOSTED_METRICS_INPUT_BYTES, label=f"self-hosted metrics input: {source_label}")
2005
- except RegistryError as exc:
2006
- raise RegistryError(f"could not read self-hosted metrics input: {source_label}: {exc}") from exc
2007
- assert loaded is not None
2008
- raw, loaded_truncated = loaded
2009
- else:
2010
- source_label = source_label or "stdin"
2011
- raw = sys.stdin.buffer.read(MAX_SELF_HOSTED_METRICS_INPUT_BYTES + 1)
2012
- loaded_truncated = len(raw) > MAX_SELF_HOSTED_METRICS_INPUT_BYTES
2013
- raw = raw[:MAX_SELF_HOSTED_METRICS_INPUT_BYTES]
2014
- if loaded_truncated:
2015
- return None, {
2016
- "source_label": source_label,
2017
- "bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
2018
- "sha256": hashlib.sha256(raw).hexdigest(),
2019
- "truncated": True,
2020
- "max_bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
2021
- "envelope_source": None,
2022
- "invalid_metric_keys": [],
2023
- "ignored_keys": [],
2024
- }
2025
- if not raw.strip():
2026
- return None, {
2027
- "source_label": source_label,
2028
- "bytes": len(raw),
2029
- "sha256": hashlib.sha256(raw).hexdigest(),
2030
- "truncated": False,
2031
- "max_bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
2032
- "envelope_source": None,
2033
- "invalid_metric_keys": [],
2034
- "ignored_keys": [],
2035
- }
2036
- text = raw.decode("utf-8", errors="replace")
2037
- try:
2038
- payload = json.loads(text, parse_constant=reject_non_finite_json_constant)
2039
- except json.JSONDecodeError as exc:
2040
- raise RegistryError(f"could not parse self-hosted metrics JSON: {exc.msg}") from exc
2041
- except ValueError as exc:
2042
- raise RegistryError(f"could not parse self-hosted metrics JSON: {exc}") from exc
2043
- except RecursionError as exc:
2044
- raise RegistryError("could not parse self-hosted metrics JSON: nesting too deep") from exc
2045
- if has_non_finite_json_number(payload):
2046
- raise RegistryError("could not parse self-hosted metrics JSON: non-finite JSON number")
2047
- return payload, {
2048
- "source_label": source_label,
2049
- "bytes": len(raw),
2050
- "sha256": hashlib.sha256(raw).hexdigest(),
2051
- "truncated": False,
2052
- "max_bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
2053
- "envelope_source": None,
2054
- "invalid_metric_keys": [],
2055
- "ignored_keys": [],
2056
- }
2057
-
2058
-
2059
- def select_self_hosted_envelope(payload: Any) -> tuple[Any, str | None, list[str]]:
2060
- if not isinstance(payload, dict):
2061
- return None, None, ["input_not_object"]
2062
- ignored: list[str] = []
2063
- if SELF_HOSTED_METRICS_KEY in payload:
2064
- return payload.get(SELF_HOSTED_METRICS_KEY), f"explicit_provider_payload.{SELF_HOSTED_METRICS_KEY}", ignored
2065
- metrics = payload.get("metrics")
2066
- if isinstance(metrics, dict) and SELF_HOSTED_METRICS_KEY in metrics:
2067
- return metrics.get(SELF_HOSTED_METRICS_KEY), f"explicit_provider_payload.metrics.{SELF_HOSTED_METRICS_KEY}", ignored
2068
- if any(isinstance(key, str) and key.startswith("self_hosted_") for key in payload):
2069
- ignored.append("incidental_self_hosted_keys")
2070
- return None, None, ignored
2071
-
2072
-
2073
- def parse_optional_success(value: str | None) -> bool | None:
2074
- if value is None or value == "unknown":
2075
- return None
2076
- return value == "true"
2077
-
2078
-
2079
- def self_hosted_metrics_ledger_row(
2080
- sidecar: dict[str, Any],
2081
- *,
2082
- task_id: str = "self-hosted-metrics-manual",
2083
- variant: str = "self-hosted-metrics-ledger",
2084
- success: bool | None = None,
2085
- notes: str = "explicit self-hosted metrics record; no hosted API savings claim",
2086
- claude_version: str = "manual",
2087
- wall_time_seconds: float = 0.0,
2088
- ) -> dict[str, Any]:
2089
- return {
2090
- "schema_version": BENCH_RUN_EVIDENCE_SCHEMA_VERSION,
2091
- "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
2092
- "claude_version": sanitize_self_hosted_text(claude_version) or "manual",
2093
- "task_id": sanitize_self_hosted_text(task_id) or "self-hosted-metrics-manual",
2094
- "variant": sanitize_self_hosted_text(variant) or "self-hosted-metrics-ledger",
2095
- "transform_id": "self-hosted-metrics-ledger",
2096
- "success": success,
2097
- "primary_tokens_measured": False,
2098
- "primary_tokens": 0,
2099
- "primary_cost_measured": False,
2100
- "primary_cost_usd": 0.0,
2101
- "provider_cached_tokens": None,
2102
- "provider_cached_tokens_measured": False,
2103
- "wall_time_seconds": wall_time_seconds,
2104
- "external_tokens_measured": False,
2105
- "external_tokens": 0,
2106
- "external_cost_measured": False,
2107
- "external_cost_usd": 0.0,
2108
- "total_cost_with_shift_usd": None,
2109
- "artifacts_used": 0,
2110
- "bytes_before": 0,
2111
- "bytes_after": 0,
2112
- "hook_triggers": 0,
2113
- "turns": 0,
2114
- "notes": sanitize_self_hosted_text(notes)
2115
- or "explicit self-hosted metrics record; no hosted API savings claim",
2116
- "measurement_availability": {
2117
- "primary_tokens": False,
2118
- "primary_cost": False,
2119
- "external_tokens": False,
2120
- "external_cost": False,
2121
- "shifted_cost": False,
2122
- "provider_cache": False,
2123
- "byte_metrics": False,
2124
- "wall_time": False,
2125
- "self_hosted_metrics": True,
2126
- },
2127
- "self_hosted_metrics": sidecar,
2128
- "proxy_metrics": {
2129
- "byte_metrics_observed": False,
2130
- "token_proxy": "chars_div_4",
2131
- "bytes_per_token": TOKEN_PROXY_BYTES_PER_TOKEN,
2132
- "claim_boundary": "proxy_only_not_hosted_token_savings",
2133
- },
2134
- }
2135
-
2136
-
2137
- def self_hosted_metrics_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
2138
- cli_metrics = cli_self_hosted_metrics(args)
2139
- if cli_metrics:
2140
- raw_metrics = cli_metrics
2141
- source = "cli_flags"
2142
- ignored_envelope_keys = []
2143
- input_meta = {
2144
- "source_label": sanitize_self_hosted_text(args.source_label) if args.source_label else "cli_flags",
2145
- "bytes": 0,
2146
- "sha256": None,
2147
- "truncated": False,
2148
- "max_bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
2149
- "envelope_source": source,
2150
- "invalid_metric_keys": [],
2151
- "ignored_keys": [],
2152
- }
2153
- elif args.input or not sys.stdin.isatty():
2154
- raw_payload, input_meta = read_self_hosted_payload(args)
2155
- raw_metrics, source, ignored_envelope_keys = select_self_hosted_envelope(raw_payload)
2156
- else:
2157
- raw_metrics = {}
2158
- source = None
2159
- ignored_envelope_keys = []
2160
- input_meta = {
2161
- "source_label": sanitize_self_hosted_text(args.source_label) if args.source_label else "cli_flags",
2162
- "bytes": 0,
2163
- "sha256": None,
2164
- "truncated": False,
2165
- "max_bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
2166
- "envelope_source": source,
2167
- "invalid_metric_keys": [],
2168
- "ignored_keys": [],
2169
- }
2170
- if input_meta["truncated"]:
2171
- sidecar = None
2172
- invalid_keys: list[str] = []
2173
- ignored_keys = ignored_envelope_keys
2174
- elif raw_metrics is None:
2175
- sidecar = None
2176
- invalid_keys = []
2177
- ignored_keys = ignored_envelope_keys
2178
- else:
2179
- sidecar, invalid_keys, ignored_keys = normalize_self_hosted_metrics(raw_metrics, source=source or "missing_explicit_envelope")
2180
- input_meta["envelope_source"] = source
2181
- input_meta["invalid_metric_keys"] = sorted(set(invalid_keys))
2182
- input_meta["ignored_keys"] = sorted(set(ignored_keys + ignored_envelope_keys))
2183
- blockers: list[str] = []
2184
- if input_meta["truncated"]:
2185
- blockers.append("input_truncated")
2186
- if source is None:
2187
- blockers.append("missing_explicit_self_hosted_metrics_envelope")
2188
- if sidecar is None:
2189
- blockers.append("missing_self_hosted_metrics")
2190
- if invalid_keys:
2191
- blockers.append("invalid_self_hosted_metrics")
2192
- blockers = list(dict.fromkeys(blockers))
2193
- ready = not blockers
2194
- ledger_preview = None
2195
- if sidecar is not None:
2196
- ledger_preview = self_hosted_metrics_ledger_row(
2197
- sidecar,
2198
- task_id="self-hosted-metrics-dry-run",
2199
- notes="dry-run preview; no ledger file written",
2200
- claude_version="dry-run",
2201
- )
2202
- return {
2203
- "tool": TOOL_NAME,
2204
- "schema_version": CONFIG_SCHEMA_VERSION,
2205
- "experiment_id": "self-hosted-metrics-ledger",
2206
- "mode": "dry_run",
2207
- "status": "ready_for_ledger_review" if ready else "blocked_until_metrics",
2208
- "input": input_meta,
2209
- "policy": {
2210
- "default_off": True,
2211
- "ledger_write_performed": False,
2212
- "hosted_api_token_savings_claim_allowed": False,
2213
- "hosted_api_cost_savings_claim_allowed": False,
2214
- "stable_runtime_behavior_changed": False,
2215
- },
2216
- "self_hosted_metrics": sidecar,
2217
- "ledger_preview": ledger_preview,
2218
- "review_plan": {
2219
- "readiness_blockers": blockers,
2220
- "next_steps": [
2221
- "Record real run evidence with context-guard-bench --ledger-jsonl when benchmark data exists.",
2222
- "Keep self-hosted local metrics out of hosted API token/cost savings claims.",
2223
- "Use provider-measured matched successful tasks for hosted API savings claims.",
2224
- ],
2225
- },
2226
- "claim_boundary": (
2227
- "Dry-run self-hosted metrics ledger preview only; local/model-server metrics are diagnostic sidecars "
2228
- "and are not hosted API token or cost savings evidence."
2229
- ),
2230
- }
2231
-
2232
-
2233
- def command_plan_self_hosted_metrics_ledger(args: argparse.Namespace) -> int:
2234
- payload = self_hosted_metrics_plan_payload(args)
2235
- if args.json:
2236
- emit_json(payload)
2237
- else:
2238
- print("ContextGuard self-hosted metrics ledger preview (dry-run only)")
2239
- print("No ledger file was written and no hosted API token/cost savings claim is allowed from these metrics.")
2240
- print(f"Status: {payload['status']}")
2241
- if payload["review_plan"]["readiness_blockers"]:
2242
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
2243
- print(payload["claim_boundary"])
2244
- return 0
2245
-
2246
-
2247
- def self_hosted_metrics_record_payload(args: argparse.Namespace) -> dict[str, Any]:
2248
- payload = self_hosted_metrics_plan_payload(args)
2249
- payload["mode"] = "record"
2250
- payload["claim_boundary"] = (
2251
- "Explicit local self-hosted metrics ledger record only; local/model-server metrics are diagnostic sidecars "
2252
- "and are not hosted API token or cost savings evidence."
2253
- )
2254
- payload["policy"]["ledger_write_performed"] = False
2255
- payload["policy"]["stable_runtime_behavior_changed"] = False
2256
- payload["ledger_record"] = None
2257
- payload["ledger_jsonl"] = {
2258
- "path": sanitize_self_hosted_text(args.ledger_jsonl),
2259
- "write_performed": False,
2260
- "bytes_written": 0,
2261
- }
2262
- if payload["self_hosted_metrics"] is None or payload["review_plan"]["readiness_blockers"]:
2263
- payload["status"] = "blocked_until_metrics"
2264
- return payload
2265
-
2266
- row = self_hosted_metrics_ledger_row(
2267
- payload["self_hosted_metrics"],
2268
- task_id=args.task_id,
2269
- variant=args.variant,
2270
- success=parse_optional_success(args.success),
2271
- notes=args.notes,
2272
- claude_version="manual",
2273
- )
2274
- bytes_written = append_jsonl_no_follow(Path(args.ledger_jsonl), row, label="self-hosted metrics ledger")
2275
- payload["status"] = "recorded"
2276
- payload["ledger_preview"] = row
2277
- payload["ledger_record"] = row
2278
- payload["policy"]["ledger_write_performed"] = True
2279
- payload["ledger_jsonl"]["write_performed"] = True
2280
- payload["ledger_jsonl"]["bytes_written"] = bytes_written
2281
- payload["review_plan"]["next_steps"] = [
2282
- "Use this JSONL row only as self-hosted/local diagnostic evidence.",
2283
- "Keep hosted API token/cost savings claims behind provider-measured matched successful tasks.",
2284
- "Compare this sidecar with benchmark rows only through explicit shifted-cost accounting.",
2285
- ]
2286
- return payload
2287
-
2288
-
2289
- def command_record_self_hosted_metrics_ledger(args: argparse.Namespace) -> int:
2290
- payload = self_hosted_metrics_record_payload(args)
2291
- if args.json:
2292
- emit_json(payload)
2293
- else:
2294
- if payload["status"] == "recorded":
2295
- print("ContextGuard self-hosted metrics ledger record written")
2296
- print(f"Ledger: {payload['ledger_jsonl']['path']} bytes={payload['ledger_jsonl']['bytes_written']}")
2297
- else:
2298
- print("ContextGuard self-hosted metrics ledger record blocked")
2299
- print(f"Status: {payload['status']}")
2300
- if payload["review_plan"]["readiness_blockers"]:
2301
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
2302
- print(payload["claim_boundary"])
2303
- return 0
2304
-
2305
-
2306
- def sanitize_local_proxy_value(value: Any) -> str:
2307
- return sanitize_self_hosted_text(value)
2308
-
2309
-
2310
- def local_proxy_secret_like(value: Any) -> bool:
2311
- if value is None:
2312
- return False
2313
- return "[REDACTED]" in sanitize_local_proxy_value(value)
2314
-
2315
-
2316
- def local_proxy_bytes_secret_like(value: bytes) -> bool:
2317
- return local_proxy_secret_like(value.decode("utf-8", errors="replace"))
2318
-
2319
-
2320
- def local_proxy_request_target_meta(value: Any) -> dict[str, Any]:
2321
- text = "" if value is None else str(value)
2322
- raw = text.encode("utf-8", errors="replace")
2323
- return {
2324
- "request_target_sha256": hashlib.sha256(raw).hexdigest(),
2325
- "request_target_bytes": len(raw),
2326
- }
2327
-
2328
-
2329
- def normalize_external_allow_host(value: Any) -> tuple[str, list[str]]:
2330
- raw = "" if value is None else str(value).strip()
2331
- sanitized = sanitize_local_proxy_value(raw)
2332
- blockers: list[str] = []
2333
- host = raw.strip().strip("[]").lower().rstrip(".")
2334
- if not host:
2335
- return sanitized, ["invalid_external_allow_host"]
2336
- if "[REDACTED]" in sanitized:
2337
- blockers.append("secret_like_external_forwarding_design_metadata")
2338
- if any(ch in host for ch in ("*", "/", "\\", "@", ":", " ")) or len(host) > 253:
2339
- blockers.append("invalid_external_allow_host")
2340
- elif is_localhost_host(host):
2341
- blockers.append("localhost_external_allow_host_not_allowed")
2342
- else:
2343
- try:
2344
- ip = ipaddress.ip_address(host)
2345
- except ValueError:
2346
- labels = host.split(".")
2347
- label_re = re.compile(r"^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$")
2348
- if len(labels) < 2 or any(not label_re.fullmatch(label) for label in labels):
2349
- blockers.append("invalid_external_allow_host")
2350
- else:
2351
- if not ip.is_global:
2352
- blockers.append("non_global_external_allow_host_not_allowed")
2353
- return sanitized, blockers
2354
-
2355
-
2356
- def local_proxy_external_forwarding_design_payload(args: argparse.Namespace) -> dict[str, Any]:
2357
- intent = bool(args.external_forwarding_intent)
2358
- design_ack = bool(args.external_forwarding_design_ack)
2359
- raw_hosts = args.allow_host or []
2360
- raw_schemes = args.allow_scheme or []
2361
- raw_notes = args.threat_model_note or []
2362
- redaction_policy = sanitize_local_proxy_value(args.credential_redaction_policy)
2363
- provider_boundary = sanitize_local_proxy_value(args.provider_evidence_boundary)
2364
-
2365
- blockers: list[str] = []
2366
- if not intent:
2367
- blockers.append("missing_external_forwarding_intent")
2368
- if not design_ack:
2369
- blockers.append("missing_external_forwarding_design_ack")
2370
-
2371
- hosts: list[str] = []
2372
- if not raw_hosts:
2373
- blockers.append("missing_external_allow_host")
2374
- for raw_host in raw_hosts:
2375
- host, host_blockers = normalize_external_allow_host(raw_host)
2376
- if host:
2377
- hosts.append(host)
2378
- blockers.extend(host_blockers)
2379
- hosts = sorted(set(hosts))
2380
-
2381
- schemes = sorted(set(sanitize_local_proxy_value(str(value).strip().lower()) for value in raw_schemes if str(value).strip()))
2382
- if not schemes:
2383
- blockers.append("missing_external_allow_scheme")
2384
- for scheme in schemes:
2385
- if "[REDACTED]" in scheme:
2386
- blockers.append("secret_like_external_forwarding_design_metadata")
2387
- elif scheme not in LOCAL_PROXY_EXTERNAL_ALLOWED_SCHEMES:
2388
- blockers.append("https_only_external_allow_scheme_required")
2389
-
2390
- threat_model_notes = [sanitize_local_proxy_value(note) for note in clean_values(raw_notes)]
2391
- if not threat_model_notes:
2392
- blockers.append("missing_threat_model_note")
2393
- if any(local_proxy_secret_like(note) for note in raw_notes):
2394
- blockers.append("secret_like_external_forwarding_design_metadata")
2395
-
2396
- if not redaction_policy:
2397
- blockers.append("missing_credential_redaction_policy")
2398
- elif redaction_policy != LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY:
2399
- blockers.append("unsupported_credential_redaction_policy")
2400
- if not provider_boundary:
2401
- blockers.append("missing_provider_evidence_boundary")
2402
- elif provider_boundary != LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY:
2403
- blockers.append("unsupported_provider_evidence_boundary")
2404
- if local_proxy_secret_like(redaction_policy) or local_proxy_secret_like(provider_boundary):
2405
- blockers.append("secret_like_external_forwarding_design_metadata")
2406
-
2407
- blockers = list(dict.fromkeys(blockers))
2408
- ready = not blockers
2409
- return {
2410
- "tool": TOOL_NAME,
2411
- "schema_version": LOCAL_PROXY_EXTERNAL_DESIGN_SCHEMA_VERSION,
2412
- "experiment_id": "local-proxy",
2413
- "mode": "external_forwarding_design",
2414
- "status": "ready_for_external_forwarding_design_review" if ready else "blocked_until_external_forwarding_design_constraints",
2415
- "policy": {
2416
- "default_off": True,
2417
- "design_only": True,
2418
- "external_forwarding_runtime_implemented": False,
2419
- "external_forwarding_allowed": False,
2420
- "hidden_external_forwarding": False,
2421
- "api_key_persistence_allowed": False,
2422
- "credential_material_forwarded": False,
2423
- "stable_runtime_behavior_changed": False,
2424
- "hosted_api_token_savings_claim_allowed": False,
2425
- "hosted_api_cost_savings_claim_allowed": False,
2426
- },
2427
- "network_actions": {
2428
- "listener_started": False,
2429
- "outbound_forwarding_attempted": False,
2430
- "dns_lookup_attempted": False,
2431
- "external_services_called": False,
2432
- },
2433
- "external_forwarding_design": {
2434
- "intent_acknowledged": intent,
2435
- "design_acknowledged": design_ack,
2436
- "allowlist_required": True,
2437
- "allowlist": {
2438
- "hosts": hosts,
2439
- "schemes": schemes,
2440
- "wildcards_allowed": False,
2441
- "localhost_allowed": False,
2442
- "non_global_ip_allowed": False,
2443
- },
2444
- "credential_redaction": {
2445
- "policy": redaction_policy,
2446
- "required_policy": LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY,
2447
- "blocked_header_names": sorted(LOCAL_PROXY_SENSITIVE_HEADER_NAMES),
2448
- "raw_headers_persisted": False,
2449
- "request_bodies_persisted": False,
2450
- "response_bodies_persisted": False,
2451
- },
2452
- "threat_model": {
2453
- "required": True,
2454
- "notes": threat_model_notes,
2455
- "future_review_required": True,
2456
- },
2457
- "provider_evidence_boundary": {
2458
- "policy": provider_boundary,
2459
- "required_policy": LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY,
2460
- "diagnostic_only": True,
2461
- "provider_measured_matched_tasks_required_for_hosted_claims": True,
2462
- "hosted_api_token_savings_claim_allowed": False,
2463
- "hosted_api_cost_savings_claim_allowed": False,
2464
- },
2465
- "future_runtime_requirements": [
2466
- "separate future runtime gate and review",
2467
- "explicit host/scheme allowlist enforcement before any network connection",
2468
- "credential-bearing requests blocked or stripped before forwarding",
2469
- "no CONNECT/TLS interception without a separate reviewed gate",
2470
- "diagnostic shifted-cost accounting only unless provider-measured matched-task evidence exists",
2471
- ],
2472
- },
2473
- "review_plan": {
2474
- "readiness_blockers": blockers,
2475
- "next_steps": [
2476
- "Treat this as design evidence only; do not forward external traffic from this command.",
2477
- "Keep existing local-proxy serve runtime literal-loopback-only.",
2478
- "Require a separate future runtime gate before any external forwarding implementation.",
2479
- ],
2480
- },
2481
- "claim_boundary": (
2482
- "Dry-run external forwarding design gate only; no listener, DNS lookup, external service call, credential "
2483
- "persistence, traffic forwarding, or hosted API token/cost savings claim is performed."
2484
- ),
2485
- }
2486
-
2487
-
2488
- def command_plan_local_proxy_external_forwarding(args: argparse.Namespace) -> int:
2489
- payload = local_proxy_external_forwarding_design_payload(args)
2490
- if args.json:
2491
- emit_json(payload)
2492
- else:
2493
- print("ContextGuard local proxy external-forwarding design gate (dry-run only)")
2494
- print("No listener was started, no traffic was forwarded, no DNS lookup was performed, and no API key was persisted.")
2495
- print(f"Status: {payload['status']}")
2496
- if payload["review_plan"]["readiness_blockers"]:
2497
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
2498
- print(payload["claim_boundary"])
2499
- return 0
2500
-
2501
-
2502
- def is_localhost_host(value: Any) -> bool:
2503
- if not isinstance(value, str):
2504
- return False
2505
- host = value.strip().strip("[]").lower().rstrip(".")
2506
- if host in LOCAL_PROXY_LOCALHOST_NAMES:
2507
- return True
2508
- try:
2509
- return ipaddress.ip_address(host).is_loopback
2510
- except ValueError:
2511
- return False
2512
-
2513
-
2514
- def is_loopback_ip_literal(value: Any) -> bool:
2515
- if not isinstance(value, str):
2516
- return False
2517
- host = value.strip().strip("[]").lower().rstrip(".")
2518
- try:
2519
- return ipaddress.ip_address(host).is_loopback
2520
- except ValueError:
2521
- return False
2522
-
2523
-
2524
- def normalize_local_proxy_host(value: Any, *, default: str) -> tuple[str, bool, bool]:
2525
- if value is None or str(value).strip() == "":
2526
- host = default
2527
- else:
2528
- host = str(value).strip().strip("[]")
2529
- sanitized = sanitize_local_proxy_value(host)
2530
- return sanitized, is_localhost_host(host), "[REDACTED]" in sanitized
2531
-
2532
-
2533
- def normalize_local_proxy_port(value: Any, *, default: int) -> tuple[int, bool]:
2534
- if value is None or value == "":
2535
- return default, True
2536
- if isinstance(value, bool):
2537
- return default, False
2538
- try:
2539
- port = int(value)
2540
- except (TypeError, ValueError):
2541
- return default, False
2542
- return port, 0 <= port <= 65535
2543
-
2544
-
2545
- def normalize_local_proxy_int_limit(value: Any, *, default: int, maximum: int) -> tuple[int, bool]:
2546
- if value is None or value == "":
2547
- return default, True
2548
- if isinstance(value, bool):
2549
- return default, False
2550
- try:
2551
- parsed = int(value)
2552
- except (TypeError, ValueError):
2553
- return default, False
2554
- return parsed, 1 <= parsed <= maximum
2555
-
2556
-
2557
- def normalize_local_proxy_timeout(value: Any) -> tuple[float, bool]:
2558
- if value is None or value == "":
2559
- return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, True
2560
- if isinstance(value, bool):
2561
- return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, False
2562
- try:
2563
- parsed = float(value)
2564
- except (TypeError, ValueError):
2565
- return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, False
2566
- return parsed, 0.1 <= parsed <= LOCAL_PROXY_MAX_TIMEOUT_SECONDS
2567
-
2568
-
2569
- def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]:
2570
- if not args.input:
2571
- return {}, {
2572
- "source_label": "cli_flags",
2573
- "bytes": 0,
2574
- "sha256": None,
2575
- "truncated": False,
2576
- "ignored_keys": [],
2577
- }
2578
- path = Path(args.input)
2579
- safe_path = sanitize_local_proxy_value(path)
2580
- try:
2581
- loaded = read_bounded_regular_file(path, max_bytes=MAX_SELF_HOSTED_METRICS_INPUT_BYTES, label=f"local-proxy input: {safe_path}")
2582
- except RegistryError as exc:
2583
- raise RegistryError(f"could not read local-proxy input: {safe_path}: {exc}") from exc
2584
- assert loaded is not None
2585
- raw, loaded_truncated = loaded
2586
- if loaded_truncated:
2587
- return {}, {
2588
- "source_label": safe_path,
2589
- "bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
2590
- "sha256": hashlib.sha256(raw).hexdigest(),
2591
- "truncated": True,
2592
- "ignored_keys": [],
2593
- }
2594
- if not raw.strip():
2595
- return {}, {
2596
- "source_label": safe_path,
2597
- "bytes": len(raw),
2598
- "sha256": hashlib.sha256(raw).hexdigest(),
2599
- "truncated": False,
2600
- "ignored_keys": [],
2601
- }
2602
- text = raw.decode("utf-8", errors="replace")
2603
- try:
2604
- payload = json.loads(text, parse_constant=reject_non_finite_json_constant)
2605
- except json.JSONDecodeError as exc:
2606
- raise RegistryError(f"could not parse local-proxy JSON: {exc.msg}") from exc
2607
- except ValueError as exc:
2608
- raise RegistryError(f"could not parse local-proxy JSON: {exc}") from exc
2609
- except RecursionError as exc:
2610
- raise RegistryError("could not parse local-proxy JSON: nesting too deep") from exc
2611
- if has_non_finite_json_number(payload):
2612
- raise RegistryError("could not parse local-proxy JSON: non-finite JSON number")
2613
- if not isinstance(payload, dict):
2614
- return {}, {
2615
- "source_label": safe_path,
2616
- "bytes": len(raw),
2617
- "sha256": hashlib.sha256(raw).hexdigest(),
2618
- "truncated": False,
2619
- "ignored_keys": ["input_not_object"],
2620
- }
2621
- envelope = payload.get("local_proxy", payload)
2622
- ignored = []
2623
- if not isinstance(envelope, dict):
2624
- envelope = {}
2625
- ignored.append("local_proxy_not_object")
2626
- allowed = {
2627
- "bind_host",
2628
- "bind_port",
2629
- "target_host",
2630
- "target_port",
2631
- "upstream_url",
2632
- "ledger_jsonl",
2633
- "proxy_label",
2634
- "api_key",
2635
- "authorization_header",
2636
- "persist_api_key",
2637
- "external_forwarding_intent",
2638
- "runtime_gate_ack",
2639
- "forwarding_gate_ack",
2640
- "once",
2641
- "max_request_bytes",
2642
- "max_response_bytes",
2643
- "timeout_seconds",
2644
- "diagnostic_ledger_jsonl",
2645
- }
2646
- ignored.extend(sanitize_self_hosted_ignored_key(key) for key in envelope if key not in allowed)
2647
- return dict(envelope), {
2648
- "source_label": safe_path,
2649
- "bytes": len(raw),
2650
- "sha256": hashlib.sha256(raw).hexdigest(),
2651
- "truncated": False,
2652
- "ignored_keys": sorted(set(ignored)),
2653
- }
2654
-
2655
-
2656
- def coalesce_local_proxy_value(args: argparse.Namespace, payload: dict[str, Any], attr: str, key: str) -> Any:
2657
- value = getattr(args, attr, None)
2658
- return value if value is not None else payload.get(key)
2659
-
2660
-
2661
- def parse_local_proxy_json_bool(value: Any) -> tuple[bool, bool]:
2662
- if value is None:
2663
- return False, True
2664
- if isinstance(value, bool):
2665
- return value, True
2666
- if isinstance(value, str):
2667
- normalized = value.strip().lower()
2668
- if normalized in LOCAL_PROXY_TRUE_VALUES:
2669
- return True, True
2670
- if normalized in LOCAL_PROXY_FALSE_VALUES:
2671
- return False, True
2672
- return False, False
2673
- if isinstance(value, int) and not isinstance(value, bool):
2674
- if value == 1:
2675
- return True, True
2676
- if value == 0:
2677
- return False, True
2678
- return False, False
2679
-
2680
-
2681
- def coalesce_local_proxy_bool(args: argparse.Namespace, payload: dict[str, Any], attr: str, key: str) -> tuple[bool, bool]:
2682
- if getattr(args, attr, False):
2683
- return True, True
2684
- return parse_local_proxy_json_bool(payload.get(key))
2685
-
2686
-
2687
- def local_proxy_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
2688
- input_payload, input_meta = read_local_proxy_payload(args)
2689
- bind_host_raw = coalesce_local_proxy_value(args, input_payload, "bind_host", "bind_host")
2690
- bind_port_raw = coalesce_local_proxy_value(args, input_payload, "bind_port", "bind_port")
2691
- target_host_raw = coalesce_local_proxy_value(args, input_payload, "target_host", "target_host")
2692
- target_port_raw = coalesce_local_proxy_value(args, input_payload, "target_port", "target_port")
2693
- upstream_url_raw = coalesce_local_proxy_value(args, input_payload, "upstream_url", "upstream_url")
2694
- ledger_jsonl_raw = coalesce_local_proxy_value(args, input_payload, "ledger_jsonl", "ledger_jsonl")
2695
- proxy_label_raw = coalesce_local_proxy_value(args, input_payload, "proxy_label", "proxy_label")
2696
- api_key_raw = coalesce_local_proxy_value(args, input_payload, "api_key", "api_key")
2697
- authorization_raw = coalesce_local_proxy_value(args, input_payload, "authorization_header", "authorization_header")
2698
- persist_api_key, persist_api_key_valid = coalesce_local_proxy_bool(
2699
- args,
2700
- input_payload,
2701
- "persist_api_key",
2702
- "persist_api_key",
2703
- )
2704
- external_forwarding_intent, external_forwarding_intent_valid = coalesce_local_proxy_bool(
2705
- args,
2706
- input_payload,
2707
- "external_forwarding_intent",
2708
- "external_forwarding_intent",
2709
- )
2710
- runtime_gate_ack, runtime_gate_ack_valid = coalesce_local_proxy_bool(
2711
- args,
2712
- input_payload,
2713
- "runtime_gate_ack",
2714
- "runtime_gate_ack",
2715
- )
2716
-
2717
- upstream_url = sanitize_local_proxy_value(upstream_url_raw) if upstream_url_raw else None
2718
- upstream_host = None
2719
- upstream_url_valid = True
2720
- upstream_localhost = True
2721
- upstream_secret_like = False
2722
- if upstream_url_raw:
2723
- upstream_secret_like = local_proxy_secret_like(upstream_url_raw)
2724
- try:
2725
- parsed = urlparse(str(upstream_url_raw))
2726
- upstream_host = parsed.hostname
2727
- except ValueError:
2728
- upstream_url_valid = False
2729
- upstream_host = None
2730
- else:
2731
- if upstream_host:
2732
- upstream_localhost = is_localhost_host(upstream_host)
2733
- else:
2734
- upstream_url_valid = False
2735
- upstream_localhost = False
2736
- try:
2737
- upstream_port = parsed.port
2738
- except ValueError:
2739
- upstream_url_valid = False
2740
- upstream_port = None
2741
- if upstream_port is not None and target_port_raw is None:
2742
- target_port_raw = upstream_port
2743
- if upstream_host and target_host_raw is None:
2744
- target_host_raw = upstream_host
2745
-
2746
- bind_host, bind_localhost, bind_secret_like = normalize_local_proxy_host(
2747
- bind_host_raw,
2748
- default=LOCAL_PROXY_DEFAULT_BIND_HOST,
2749
- )
2750
- target_host, target_localhost, target_secret_like = normalize_local_proxy_host(
2751
- target_host_raw,
2752
- default=LOCAL_PROXY_DEFAULT_TARGET_HOST,
2753
- )
2754
- bind_port, bind_port_valid = normalize_local_proxy_port(bind_port_raw, default=LOCAL_PROXY_DEFAULT_BIND_PORT)
2755
- target_port, target_port_valid = normalize_local_proxy_port(target_port_raw, default=LOCAL_PROXY_DEFAULT_TARGET_PORT)
2756
- ledger_jsonl = sanitize_local_proxy_value(ledger_jsonl_raw) if ledger_jsonl_raw else None
2757
- proxy_label = sanitize_local_proxy_value(proxy_label_raw) if proxy_label_raw else "local-proxy-dry-run"
2758
- api_key_provided = api_key_raw is not None and str(api_key_raw).strip() != ""
2759
- authorization_header_provided = authorization_raw is not None and str(authorization_raw).strip() != ""
2760
- secret_like_fields: list[str] = []
2761
- for field, raw in (
2762
- ("bind_host", bind_host_raw),
2763
- ("bind_port", bind_port_raw),
2764
- ("target_host", target_host_raw),
2765
- ("target_port", target_port_raw),
2766
- ("upstream_url", upstream_url_raw),
2767
- ("ledger_jsonl", ledger_jsonl_raw),
2768
- ("proxy_label", proxy_label_raw),
2769
- ("api_key", api_key_raw),
2770
- ("authorization_header", authorization_raw),
2771
- ):
2772
- if raw is not None and local_proxy_secret_like(raw):
2773
- secret_like_fields.append(field)
2774
- if bind_secret_like and "bind_host" not in secret_like_fields:
2775
- secret_like_fields.append("bind_host")
2776
- if target_secret_like and "target_host" not in secret_like_fields:
2777
- secret_like_fields.append("target_host")
2778
- if upstream_secret_like and "upstream_url" not in secret_like_fields:
2779
- secret_like_fields.append("upstream_url")
2780
-
2781
- blockers: list[str] = []
2782
- if input_meta["truncated"]:
2783
- blockers.append("input_truncated")
2784
- if not persist_api_key_valid:
2785
- blockers.append("invalid_persist_api_key")
2786
- if not external_forwarding_intent_valid:
2787
- blockers.append("invalid_external_forwarding_intent")
2788
- if not runtime_gate_ack_valid:
2789
- blockers.append("invalid_runtime_gate_ack")
2790
- if not bind_port_valid:
2791
- blockers.append("invalid_bind_port")
2792
- if not target_port_valid:
2793
- blockers.append("invalid_target_port")
2794
- if upstream_url_raw and not upstream_url_valid:
2795
- blockers.append("invalid_upstream_url")
2796
- if not bind_localhost:
2797
- blockers.append("non_localhost_bind_host")
2798
- if not target_localhost:
2799
- blockers.append("non_localhost_target_host")
2800
- if upstream_url_raw and not upstream_localhost:
2801
- blockers.append("non_localhost_upstream_url")
2802
- if api_key_provided or authorization_header_provided:
2803
- blockers.append("api_key_material_provided")
2804
- if persist_api_key:
2805
- blockers.append("api_key_persistence_requested")
2806
- if external_forwarding_intent:
2807
- blockers.append("external_forwarding_intent_not_allowed")
2808
- if not runtime_gate_ack:
2809
- blockers.append("missing_runtime_gate_ack")
2810
- if secret_like_fields:
2811
- blockers.append("secret_like_proxy_metadata")
2812
- blockers = list(dict.fromkeys(blockers))
2813
- ready = not blockers
2814
-
2815
- return {
2816
- "tool": TOOL_NAME,
2817
- "schema_version": CONFIG_SCHEMA_VERSION,
2818
- "experiment_id": "local-proxy",
2819
- "mode": "dry_run",
2820
- "status": "ready_for_runtime_review" if ready else "blocked_until_local_proxy_constraints",
2821
- "input": input_meta,
2822
- "policy": {
2823
- "default_off": True,
2824
- "dry_run_only": True,
2825
- "localhost_only": True,
2826
- "runtime_gate_required_before_forwarding": True,
2827
- "runtime_gate_acknowledged": runtime_gate_ack,
2828
- "stable_runtime_behavior_changed": False,
2829
- },
2830
- "bind": {
2831
- "host": bind_host,
2832
- "port": bind_port,
2833
- "localhost_only": bind_localhost,
2834
- },
2835
- "target": {
2836
- "host": target_host,
2837
- "port": target_port,
2838
- "upstream_url": upstream_url,
2839
- "localhost_only": target_localhost,
2840
- },
2841
- "network_actions": {
2842
- "listener_started": False,
2843
- "outbound_forwarding_attempted": False,
2844
- "dns_lookup_attempted": False,
2845
- "external_services_called": False,
2846
- },
2847
- "api_key_persistence": {
2848
- "api_key_material_provided": api_key_provided,
2849
- "authorization_header_provided": authorization_header_provided,
2850
- "requested": persist_api_key,
2851
- "performed": False,
2852
- "allowed_by_default": False,
2853
- },
2854
- "ledger_preview": {
2855
- "schema_version": LOCAL_PROXY_SCHEMA_VERSION,
2856
- "ledger_jsonl": ledger_jsonl,
2857
- "ledger_write_performed": False,
2858
- "proxy_label": proxy_label,
2859
- "claim_boundary": "local_proxy_advisory_only_not_hosted_token_or_cost_savings",
2860
- },
2861
- "forwarding": {
2862
- "external_forwarding_intent": external_forwarding_intent,
2863
- "hidden_external_forwarding": False,
2864
- "runtime_gate_acknowledged": runtime_gate_ack,
2865
- "future_runtime_gate_required": True,
2866
- },
2867
- "redaction": {
2868
- "secret_like_fields": sorted(set(secret_like_fields)),
2869
- "raw_api_key_output": False,
2870
- },
2871
- "review_plan": {
2872
- "readiness_blockers": blockers,
2873
- "next_steps": [
2874
- "Keep any real proxy runtime behind a separate future runtime gate.",
2875
- "Use localhost-only bind and target defaults for advisory review.",
2876
- "Do not persist API keys or forward externally from this dry-run planner.",
2877
- ],
2878
- },
2879
- "claim_boundary": (
2880
- "Dry-run local proxy advisory preview only; no listener, forwarding, API-key persistence, ledger write, "
2881
- "or hosted API token/cost savings claim is performed."
2882
- ),
2883
- }
2884
-
2885
-
2886
- def command_plan_local_proxy(args: argparse.Namespace) -> int:
2887
- payload = local_proxy_plan_payload(args)
2888
- if args.json:
2889
- emit_json(payload)
2890
- else:
2891
- print("ContextGuard local proxy plan (dry-run only)")
2892
- print("No listener was started, no traffic was forwarded, no API key was persisted, and no ledger was written.")
2893
- print(f"Status: {payload['status']}")
2894
- print(f"Bind: {payload['bind']['host']}:{payload['bind']['port']} localhost_only={payload['bind']['localhost_only']}")
2895
- print(
2896
- f"Target: {payload['target']['host']}:{payload['target']['port']} "
2897
- f"localhost_only={payload['target']['localhost_only']}"
2898
- )
2899
- if payload["review_plan"]["readiness_blockers"]:
2900
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
2901
- print(payload["claim_boundary"])
2902
- return 0
2903
-
2904
-
2905
- def local_proxy_gate_row(payload: dict[str, Any]) -> dict[str, Any]:
2906
- return {
2907
- "schema_version": LOCAL_PROXY_GATE_SCHEMA_VERSION,
2908
- "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
2909
- "experiment_id": "local-proxy",
2910
- "proxy_label": payload["ledger_preview"]["proxy_label"],
2911
- "bind": payload["bind"],
2912
- "target": payload["target"],
2913
- "policy": {
2914
- "localhost_only": True,
2915
- "runtime_gate_acknowledged": payload["policy"]["runtime_gate_acknowledged"],
2916
- "listener_started": False,
2917
- "traffic_forwarded": False,
2918
- "dns_lookup_attempted": False,
2919
- "api_key_persisted": False,
2920
- "hidden_external_forwarding": False,
2921
- },
2922
- "network_actions": payload["network_actions"],
2923
- "api_key_persistence": payload["api_key_persistence"],
2924
- "forwarding": payload["forwarding"],
2925
- "claim_boundary": {
2926
- "id": "local_proxy_runtime_gate_not_hosted_savings",
2927
- "hosted_api_token_savings_claim_allowed": False,
2928
- "hosted_api_cost_savings_claim_allowed": False,
2929
- "requires_provider_measured_matched_tasks_for_hosted_claims": True,
2930
- "reason": "This row records a local proxy runtime gate only; it starts no listener and forwards no traffic.",
2931
- },
2932
- "shifted_cost_accounting_required": True,
2933
- }
2934
-
2935
-
2936
- def local_proxy_record_payload(args: argparse.Namespace) -> dict[str, Any]:
2937
- payload = local_proxy_plan_payload(args)
2938
- payload["mode"] = "record"
2939
- payload["claim_boundary"] = (
2940
- "Explicit local proxy runtime-gate record only; no listener, forwarding, DNS lookup, API-key persistence, "
2941
- "external service call, or hosted API token/cost savings claim is performed."
2942
- )
2943
- payload["policy"] = dict(payload["policy"])
2944
- payload["policy"].update({
2945
- "dry_run_only": False,
2946
- "runtime_gate_record_only": True,
2947
- "runtime_gate_recorded": False,
2948
- "listener_started": False,
2949
- "traffic_forwarded": False,
2950
- "stable_runtime_behavior_changed": False,
2951
- })
2952
- payload["ledger_record"] = None
2953
- payload["ledger_jsonl"] = {
2954
- "path": sanitize_local_proxy_value(args.ledger_jsonl),
2955
- "write_performed": False,
2956
- "bytes_written": 0,
2957
- }
2958
- blockers = list(payload["review_plan"]["readiness_blockers"])
2959
- if not payload["policy"]["runtime_gate_acknowledged"]:
2960
- blockers.append("missing_runtime_gate_ack")
2961
- blockers = list(dict.fromkeys(blockers))
2962
- payload["review_plan"]["readiness_blockers"] = blockers
2963
- payload["ledger_preview"]["schema_version"] = LOCAL_PROXY_GATE_SCHEMA_VERSION
2964
- payload["ledger_preview"]["ledger_jsonl"] = sanitize_local_proxy_value(args.ledger_jsonl)
2965
- payload["ledger_preview"]["ledger_write_performed"] = False
2966
- if blockers:
2967
- payload["status"] = "blocked_until_local_proxy_gate_ready"
2968
- return payload
2969
-
2970
- row = local_proxy_gate_row(payload)
2971
- bytes_written = append_jsonl_no_follow(Path(args.ledger_jsonl), row, label="local proxy runtime gate ledger")
2972
- payload["status"] = "recorded"
2973
- payload["ledger_preview"] = row
2974
- payload["ledger_record"] = row
2975
- payload["ledger_jsonl"]["write_performed"] = True
2976
- payload["ledger_jsonl"]["bytes_written"] = bytes_written
2977
- payload["policy"]["runtime_gate_recorded"] = True
2978
- payload["review_plan"]["next_steps"] = [
2979
- "Use this JSONL row only as a local proxy runtime-gate record.",
2980
- "Keep any actual proxy listener or forwarding implementation behind a separate reviewed runtime.",
2981
- "Do not persist API keys or claim hosted token/cost savings from this gate record.",
2982
- ]
2983
- return payload
2984
-
2985
-
2986
- def command_record_local_proxy_runtime_gate(args: argparse.Namespace) -> int:
2987
- payload = local_proxy_record_payload(args)
2988
- if args.json:
2989
- emit_json(payload)
2990
- else:
2991
- if payload["status"] == "recorded":
2992
- print("ContextGuard local proxy runtime-gate record written")
2993
- print(f"Ledger: {payload['ledger_jsonl']['path']} bytes={payload['ledger_jsonl']['bytes_written']}")
2994
- else:
2995
- print("ContextGuard local proxy runtime-gate record blocked")
2996
- print(f"Status: {payload['status']}")
2997
- if payload["review_plan"]["readiness_blockers"]:
2998
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
2999
- print(payload["claim_boundary"])
3000
- return 0 if payload["status"] == "recorded" else 1
3001
-
3002
-
3003
- def local_proxy_forward_payload(args: argparse.Namespace) -> dict[str, Any]:
3004
- payload = local_proxy_plan_payload(args)
3005
- input_payload, _input_meta = read_local_proxy_payload(args)
3006
- forwarding_gate_ack, forwarding_gate_ack_valid = coalesce_local_proxy_bool(
3007
- args,
3008
- input_payload,
3009
- "forwarding_gate_ack",
3010
- "forwarding_gate_ack",
3011
- )
3012
- once, once_valid = coalesce_local_proxy_bool(args, input_payload, "once", "once")
3013
- max_request_bytes, max_request_valid = normalize_local_proxy_int_limit(
3014
- coalesce_local_proxy_value(args, input_payload, "max_request_bytes", "max_request_bytes"),
3015
- default=LOCAL_PROXY_DEFAULT_MAX_REQUEST_BYTES,
3016
- maximum=LOCAL_PROXY_MAX_FORWARD_BYTES,
3017
- )
3018
- max_response_bytes, max_response_valid = normalize_local_proxy_int_limit(
3019
- coalesce_local_proxy_value(args, input_payload, "max_response_bytes", "max_response_bytes"),
3020
- default=LOCAL_PROXY_DEFAULT_MAX_RESPONSE_BYTES,
3021
- maximum=LOCAL_PROXY_MAX_FORWARD_BYTES,
3022
- )
3023
- timeout_seconds, timeout_valid = normalize_local_proxy_timeout(
3024
- coalesce_local_proxy_value(args, input_payload, "timeout_seconds", "timeout_seconds")
3025
- )
3026
- diagnostic_ledger_raw = coalesce_local_proxy_value(
3027
- args,
3028
- input_payload,
3029
- "diagnostic_ledger_jsonl",
3030
- "diagnostic_ledger_jsonl",
3031
- )
3032
- diagnostic_ledger_path = sanitize_local_proxy_value(diagnostic_ledger_raw) if diagnostic_ledger_raw else None
3033
- diagnostic_ledger_write_path = str(diagnostic_ledger_raw) if diagnostic_ledger_raw else None
3034
- bind_host = payload["bind"]["host"]
3035
- target_host = payload["target"]["host"]
3036
- bind_ip_literal = is_loopback_ip_literal(bind_host)
3037
- target_ip_literal = is_loopback_ip_literal(target_host)
3038
- upstream_url = payload["target"].get("upstream_url")
3039
- upstream_scheme = ""
3040
- if upstream_url:
3041
- try:
3042
- upstream_scheme = urlparse(str(upstream_url)).scheme.lower()
3043
- except ValueError:
3044
- upstream_scheme = "invalid"
3045
-
3046
- payload["mode"] = "serve"
3047
- payload["schema_version"] = LOCAL_PROXY_FORWARD_SCHEMA_VERSION
3048
- payload["claim_boundary"] = (
3049
- "Explicit local proxy forwarding MVP only; binds and forwards literal loopback IPs, blocks credential "
3050
- "material, persists no API keys, performs no DNS lookup, calls no external services, and makes no hosted "
3051
- "API token/cost savings claim."
3052
- )
3053
- payload["policy"] = dict(payload["policy"])
3054
- payload["policy"].update({
3055
- "dry_run_only": False,
3056
- "forwarding_runtime": True,
3057
- "forwarding_gate_acknowledged": forwarding_gate_ack,
3058
- "once_required": True,
3059
- "once": once,
3060
- "literal_loopback_ip_only": True,
3061
- "listener_started": False,
3062
- "traffic_forwarded": False,
3063
- "stable_runtime_behavior_changed": False,
3064
- })
3065
- payload["forwarding"] = dict(payload["forwarding"])
3066
- payload["forwarding"].update({
3067
- "actual_local_forwarding_runtime": True,
3068
- "forwarding_gate_acknowledged": forwarding_gate_ack,
3069
- "external_forwarding_allowed": False,
3070
- "connect_tunneling_allowed": False,
3071
- "https_mitm_allowed": False,
3072
- })
3073
- payload["runtime_limits"] = {
3074
- "once": once,
3075
- "max_request_bytes": max_request_bytes,
3076
- "max_response_bytes": max_response_bytes,
3077
- "timeout_seconds": timeout_seconds,
3078
- }
3079
- payload["diagnostic_ledger"] = {
3080
- "schema_version": LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION,
3081
- "path": diagnostic_ledger_path,
3082
- "path_sha256": hashlib.sha256(str(diagnostic_ledger_raw).encode("utf-8", errors="replace")).hexdigest() if diagnostic_ledger_raw else None,
3083
- "write_requested": bool(diagnostic_ledger_raw),
3084
- "write_performed": False,
3085
- "bytes_written": 0,
3086
- "reason": None if diagnostic_ledger_raw else "not_requested",
3087
- }
3088
- payload["_diagnostic_ledger_write_path"] = diagnostic_ledger_write_path
3089
- payload["forward_result"] = None
3090
-
3091
- blockers = list(payload["review_plan"]["readiness_blockers"])
3092
- if diagnostic_ledger_raw is not None and local_proxy_secret_like(diagnostic_ledger_raw):
3093
- blockers.append("secret_like_diagnostic_ledger_path")
3094
- if not payload["policy"]["runtime_gate_acknowledged"]:
3095
- blockers.append("missing_runtime_gate_ack")
3096
- if not forwarding_gate_ack_valid:
3097
- blockers.append("invalid_forwarding_gate_ack")
3098
- if not once_valid:
3099
- blockers.append("invalid_once")
3100
- if not forwarding_gate_ack:
3101
- blockers.append("missing_forwarding_gate_ack")
3102
- if not once:
3103
- blockers.append("once_required_for_forwarding_mvp")
3104
- if payload["bind"]["port"] <= 0:
3105
- blockers.append("bind_port_required_for_listener")
3106
- if payload["target"]["port"] <= 0:
3107
- blockers.append("target_port_required_for_forwarding")
3108
- if not bind_ip_literal:
3109
- blockers.append("bind_host_must_be_loopback_ip_literal")
3110
- if not target_ip_literal:
3111
- blockers.append("target_host_must_be_loopback_ip_literal")
3112
- if upstream_scheme and upstream_scheme != "http":
3113
- blockers.append("unsupported_upstream_url_scheme")
3114
- if not max_request_valid:
3115
- blockers.append("invalid_max_request_bytes")
3116
- if not max_response_valid:
3117
- blockers.append("invalid_max_response_bytes")
3118
- if not timeout_valid:
3119
- blockers.append("invalid_timeout_seconds")
3120
- blockers = list(dict.fromkeys(blockers))
3121
- payload["review_plan"]["readiness_blockers"] = blockers
3122
- payload["review_plan"]["next_steps"] = [
3123
- "Use this MVP only for local loopback HTTP forwarding.",
3124
- "Keep external forwarding, CONNECT tunneling, credential persistence, and hosted savings claims behind later gates.",
3125
- "Use --once plus byte/time limits for bounded operation.",
3126
- ]
3127
- payload["status"] = "ready_to_serve" if not blockers else "blocked_until_local_proxy_forwarding_ready"
3128
- return payload
3129
-
3130
-
3131
- def local_proxy_forward_diagnostic_row(payload: dict[str, Any]) -> dict[str, Any]:
3132
- result = payload.get("forward_result") or {}
3133
- return {
3134
- "schema_version": LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION,
3135
- "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
3136
- "experiment_id": "local-proxy",
3137
- "mode": "serve",
3138
- "proxy_label": payload["ledger_preview"]["proxy_label"],
3139
- "bind": payload["bind"],
3140
- "target": {
3141
- "host": payload["target"]["host"],
3142
- "port": payload["target"]["port"],
3143
- "localhost_only": payload["target"]["localhost_only"],
3144
- },
3145
- "request": {
3146
- "method": result.get("request_method"),
3147
- "target_sha256": result.get("request_target_sha256"),
3148
- "target_bytes": result.get("request_target_bytes", 0),
3149
- "body_bytes": result.get("inbound_request_bytes", 0),
3150
- "headers_persisted": False,
3151
- "body_persisted": False,
3152
- "credential_material_forwarded": False,
3153
- },
3154
- "response": {
3155
- "upstream_status": result.get("upstream_status"),
3156
- "upstream_response_bytes": result.get("upstream_response_bytes", 0),
3157
- "body_persisted": False,
3158
- },
3159
- "runtime_limits": payload["runtime_limits"],
3160
- "network_actions": payload["network_actions"],
3161
- "policy": {
3162
- "localhost_only": True,
3163
- "literal_loopback_ip_only": True,
3164
- "forwarded": bool(result.get("forwarded")),
3165
- "api_key_persisted": False,
3166
- "hidden_external_forwarding": False,
3167
- "external_services_called": False,
3168
- "dns_lookup_attempted": False,
3169
- "connect_tunneling_allowed": False,
3170
- "https_mitm_allowed": False,
3171
- "hosted_api_token_savings_claim_allowed": False,
3172
- "hosted_api_cost_savings_claim_allowed": False,
3173
- },
3174
- "shifted_cost_accounting": {
3175
- "required": True,
3176
- "local_proxy_request": True,
3177
- "diagnostic_only": True,
3178
- "provider_measured_matched_tasks_required_for_hosted_claims": True,
3179
- },
3180
- "claim_boundary": {
3181
- "id": "local_proxy_forward_diagnostic_not_hosted_savings",
3182
- "reason": "This row records one explicit literal-loopback forwarded request as shifted-cost diagnostic evidence only.",
3183
- "hosted_api_token_savings_claim_allowed": False,
3184
- "hosted_api_cost_savings_claim_allowed": False,
3185
- },
3186
- }
3187
-
3188
-
3189
- def maybe_write_local_proxy_forward_diagnostic(payload: dict[str, Any]) -> None:
3190
- ledger = payload.get("diagnostic_ledger")
3191
- if not isinstance(ledger, dict) or not ledger.get("write_requested"):
3192
- return
3193
- if payload.get("status") != "served_once" or not (payload.get("forward_result") or {}).get("forwarded"):
3194
- if ledger.get("reason") != "preflight_failed":
3195
- ledger["reason"] = "not_forwarded"
3196
- return
3197
- row = local_proxy_forward_diagnostic_row(payload)
3198
- write_path = payload.get("_diagnostic_ledger_write_path")
3199
- if not write_path:
3200
- ledger["reason"] = "not_requested"
3201
- return
3202
- bytes_written = append_jsonl_no_follow(Path(str(write_path)), row, label="local proxy forwarding diagnostic ledger")
3203
- ledger["write_performed"] = True
3204
- ledger["bytes_written"] = bytes_written
3205
- ledger["reason"] = None
3206
- ledger["row_preview"] = row
3207
-
3208
-
3209
- def local_proxy_has_sensitive_headers(headers: Any) -> list[str]:
3210
- found: list[str] = []
3211
- for name, value in headers.items():
3212
- lower = str(name).lower()
3213
- if lower in LOCAL_PROXY_SENSITIVE_HEADER_NAMES:
3214
- found.append(lower)
3215
- elif local_proxy_secret_like(name):
3216
- found.append("redacted_sensitive_header")
3217
- elif local_proxy_secret_like(value):
3218
- found.append(lower)
3219
- return sorted(set(found))
3220
-
3221
-
3222
- def local_proxy_safe_forward_headers(headers: Any, *, target_host: str, target_port: int) -> dict[str, str]:
3223
- return {
3224
- "Host": f"{target_host}:{target_port}",
3225
- "Connection": "close",
3226
- }
3227
-
3228
-
3229
- def local_proxy_response_headers(headers: Any) -> list[tuple[str, str]]:
3230
- result: list[tuple[str, str]] = []
3231
- for name, value in headers.items():
3232
- lower = str(name).lower()
3233
- if lower in LOCAL_PROXY_SENSITIVE_HEADER_NAMES or lower in LOCAL_PROXY_HOP_BY_HOP_HEADERS:
3234
- continue
3235
- if lower not in {"content-type"}:
3236
- continue
3237
- if local_proxy_secret_like(name) or local_proxy_secret_like(value):
3238
- continue
3239
- result.append((str(name), str(value)))
3240
- return result
3241
-
3242
-
3243
- def write_local_proxy_ready_file(path: str | None, *, bind_host: str, bind_port: int) -> None:
3244
- if not path:
3245
- return
3246
- ready_payload = {
3247
- "schema_version": LOCAL_PROXY_READY_SCHEMA_VERSION,
3248
- "experiment_id": "local-proxy",
3249
- "mode": "serve",
3250
- "status": "listener_ready",
3251
- "diagnostic_only": True,
3252
- "pid": os.getpid(),
3253
- "bind": {
3254
- "host": bind_host,
3255
- "port": bind_port,
3256
- },
3257
- }
3258
- data = json.dumps(ready_payload, sort_keys=True).encode("utf-8") + b"\n"
3259
- write_regular_file_no_follow_exclusive(Path(path), data, label="local proxy ready file", mode=0o600)
3260
-
3261
-
3262
- def serve_local_proxy_once(payload: dict[str, Any], *, ready_file: str | None = None) -> dict[str, Any]:
3263
- bind_host = payload["bind"]["host"]
3264
- bind_port = int(payload["bind"]["port"])
3265
- target_host = payload["target"]["host"]
3266
- target_port = int(payload["target"]["port"])
3267
- limits = payload["runtime_limits"]
3268
- max_request_bytes = int(limits["max_request_bytes"])
3269
- max_response_bytes = int(limits["max_response_bytes"])
3270
- timeout_seconds = float(limits["timeout_seconds"])
3271
- server_result: dict[str, Any] = {
3272
- "served_once": False,
3273
- "forwarded": False,
3274
- "blocked_reason": None,
3275
- "forward_attempted": False,
3276
- "request_method": None,
3277
- "request_target_sha256": None,
3278
- "request_target_bytes": 0,
3279
- "inbound_request_bytes": 0,
3280
- "upstream_status": None,
3281
- "upstream_response_bytes": 0,
3282
- "downstream_status": None,
3283
- "sensitive_headers_blocked": [],
3284
- "listener_started": False,
3285
- "ready_file_written": False,
3286
- }
3287
-
3288
- def finish_blocked(handler: BaseHTTPRequestHandler, status_code: int, reason: str, *, sensitive: list[str] | None = None) -> None:
3289
- server_result.update({
3290
- "served_once": True,
3291
- "forwarded": False,
3292
- "blocked_reason": reason,
3293
- "downstream_status": status_code,
3294
- "sensitive_headers_blocked": sorted(set(sensitive or [])),
3295
- })
3296
- body = json.dumps({"status": "blocked", "reason": reason}, sort_keys=True).encode("utf-8")
3297
- handler.send_response(status_code)
3298
- handler.send_header("Content-Type", "application/json")
3299
- handler.send_header("Content-Length", str(len(body)))
3300
- handler.send_header("Connection", "close")
3301
- handler.end_headers()
3302
- if handler.command != "HEAD":
3303
- handler.wfile.write(body)
3304
-
3305
- class LocalProxyHandler(BaseHTTPRequestHandler):
3306
- server_version = "ContextGuardLocalProxy/0"
3307
- protocol_version = "HTTP/1.1"
3308
-
3309
- def log_message(self, format: str, *args: Any) -> None: # noqa: A002 - BaseHTTPRequestHandler API.
3310
- return
3311
-
3312
- def do_CONNECT(self) -> None:
3313
- server_result["request_method"] = "CONNECT"
3314
- server_result.update(local_proxy_request_target_meta(self.path))
3315
- finish_blocked(self, 405, "connect_tunneling_not_allowed")
3316
-
3317
- def do_HEAD(self) -> None:
3318
- self.forward_request()
3319
-
3320
- def do_GET(self) -> None:
3321
- self.forward_request()
3322
-
3323
- def do_POST(self) -> None:
3324
- self.block_method()
3325
-
3326
- def do_PUT(self) -> None:
3327
- self.block_method()
3328
-
3329
- def do_PATCH(self) -> None:
3330
- self.block_method()
3331
-
3332
- def block_method(self) -> None:
3333
- server_result["request_method"] = self.command
3334
- server_result.update(local_proxy_request_target_meta(self.path))
3335
- finish_blocked(self, 405, "method_not_allowed")
3336
-
3337
- def do_DELETE(self) -> None:
3338
- self.block_method()
3339
-
3340
- def do_OPTIONS(self) -> None:
3341
- self.block_method()
3342
-
3343
- def do_TRACE(self) -> None:
3344
- self.block_method()
3345
-
3346
- def forward_request(self) -> None:
3347
- server_result["request_method"] = self.command
3348
- server_result.update(local_proxy_request_target_meta(self.path))
3349
- if local_proxy_secret_like(self.path):
3350
- finish_blocked(self, 400, "secret_like_request_target")
3351
- return
3352
- parsed_target = urlparse(self.path)
3353
- if parsed_target.scheme or parsed_target.netloc:
3354
- finish_blocked(self, 400, "absolute_proxy_url_not_allowed")
3355
- return
3356
- if str(self.headers.get("Transfer-Encoding", "")).strip():
3357
- finish_blocked(self, 400, "transfer_encoding_not_allowed")
3358
- return
3359
- sensitive_headers = local_proxy_has_sensitive_headers(self.headers)
3360
- if sensitive_headers:
3361
- finish_blocked(self, 403, "sensitive_request_headers_blocked", sensitive=sensitive_headers)
3362
- return
3363
- raw_length = self.headers.get("Content-Length")
3364
- try:
3365
- content_length = int(raw_length) if raw_length else 0
3366
- except ValueError:
3367
- finish_blocked(self, 400, "invalid_content_length")
3368
- return
3369
- if content_length < 0 or content_length > max_request_bytes:
3370
- finish_blocked(self, 413, "request_body_exceeds_limit")
3371
- return
3372
- if content_length:
3373
- finish_blocked(self, 400, "request_body_not_allowed_for_forwarding_mvp")
3374
- return
3375
- body = self.rfile.read(content_length) if content_length else b""
3376
- server_result["inbound_request_bytes"] = len(body)
3377
- path = self.path if self.path.startswith("/") else f"/{self.path}"
3378
- conn = http.client.HTTPConnection(target_host, target_port, timeout=timeout_seconds)
3379
- try:
3380
- server_result["forward_attempted"] = True
3381
- conn.request(
3382
- self.command,
3383
- path,
3384
- body=body,
3385
- headers=local_proxy_safe_forward_headers(self.headers, target_host=target_host, target_port=target_port),
3386
- )
3387
- response = conn.getresponse()
3388
- response_body = response.read(max_response_bytes + 1)
3389
- if len(response_body) > max_response_bytes:
3390
- finish_blocked(self, 502, "upstream_response_exceeds_limit")
3391
- return
3392
- if local_proxy_bytes_secret_like(response_body):
3393
- finish_blocked(self, 502, "upstream_response_sensitive_content_blocked")
3394
- return
3395
- self.send_response(response.status, response.reason)
3396
- for header_name, header_value in local_proxy_response_headers(response.headers):
3397
- self.send_header(header_name, header_value)
3398
- self.send_header("Content-Length", str(len(response_body)))
3399
- self.send_header("Connection", "close")
3400
- self.end_headers()
3401
- if self.command != "HEAD":
3402
- self.wfile.write(response_body)
3403
- server_result.update({
3404
- "served_once": True,
3405
- "forwarded": True,
3406
- "blocked_reason": None,
3407
- "upstream_status": response.status,
3408
- "upstream_response_bytes": len(response_body),
3409
- "downstream_status": response.status,
3410
- })
3411
- except (OSError, http.client.HTTPException, TimeoutError) as exc:
3412
- finish_blocked(self, 502, "upstream_forward_error")
3413
- server_result["error"] = sanitize_local_proxy_value(str(exc))
3414
- finally:
3415
- conn.close()
3416
-
3417
- address_family = socket.AF_INET6 if ":" in bind_host else socket.AF_INET
3418
- class LocalProxyHTTPServer(HTTPServer):
3419
- def server_bind(self) -> None:
3420
- TCPServer.server_bind(self)
3421
- host, port = self.server_address[:2]
3422
- self.server_name = str(host)
3423
- self.server_port = int(port)
3424
-
3425
- def get_request(self) -> tuple[Any, Any]:
3426
- request, client_address = super().get_request()
3427
- request.settimeout(timeout_seconds)
3428
- return request, client_address
3429
-
3430
- LocalProxyHTTPServer.address_family = address_family
3431
- try:
3432
- httpd = LocalProxyHTTPServer((bind_host, bind_port), LocalProxyHandler)
3433
- except OSError as exc:
3434
- raise RegistryError(f"could not start local proxy listener: {os_error_detail(exc)}") from exc
3435
- httpd.timeout = timeout_seconds
3436
- try:
3437
- try:
3438
- write_local_proxy_ready_file(ready_file, bind_host=bind_host, bind_port=bind_port)
3439
- server_result["ready_file_written"] = bool(ready_file)
3440
- server_result["listener_started"] = True
3441
- except RegistryError as exc:
3442
- server_result.update({
3443
- "served_once": False,
3444
- "forwarded": False,
3445
- "blocked_reason": "ready_file_write_failed",
3446
- "downstream_status": None,
3447
- "error": sanitize_local_proxy_value(str(exc)),
3448
- })
3449
- return server_result
3450
- httpd.handle_request()
3451
- if not server_result["served_once"]:
3452
- server_result.update({
3453
- "blocked_reason": "timeout_waiting_for_request",
3454
- "downstream_status": None,
3455
- })
3456
- finally:
3457
- httpd.server_close()
3458
- return server_result
3459
-
3460
-
3461
- def command_serve_local_proxy(args: argparse.Namespace) -> int:
3462
- payload = local_proxy_forward_payload(args)
3463
- diagnostic_ledger = payload.get("diagnostic_ledger") if isinstance(payload.get("diagnostic_ledger"), dict) else {}
3464
- if payload["status"] == "ready_to_serve" and diagnostic_ledger.get("write_requested"):
3465
- try:
3466
- preflight_append_jsonl_no_follow(
3467
- Path(str(payload.get("_diagnostic_ledger_write_path"))),
3468
- label="local proxy forwarding diagnostic ledger",
3469
- )
3470
- except RegistryError as exc:
3471
- payload["status"] = "blocked_until_local_proxy_forwarding_ready"
3472
- payload["review_plan"]["readiness_blockers"].append("diagnostic_ledger_preflight_failed")
3473
- diagnostic_ledger["reason"] = "preflight_failed"
3474
- diagnostic_ledger["error"] = sanitize_local_proxy_value(str(exc))
3475
- if payload["status"] == "ready_to_serve":
3476
- result = serve_local_proxy_once(payload, ready_file=args.ready_file)
3477
- payload["forward_result"] = result
3478
- payload["network_actions"]["listener_started"] = bool(result.get("listener_started"))
3479
- payload["network_actions"]["outbound_forwarding_attempted"] = bool(result["forward_attempted"])
3480
- payload["network_actions"]["dns_lookup_attempted"] = False
3481
- payload["network_actions"]["external_services_called"] = False
3482
- payload["policy"]["listener_started"] = bool(result.get("listener_started"))
3483
- payload["policy"]["traffic_forwarded"] = bool(result["forwarded"])
3484
- if result["forwarded"]:
3485
- payload["status"] = "served_once"
3486
- elif result.get("blocked_reason") == "ready_file_write_failed":
3487
- payload["status"] = "blocked_until_local_proxy_forwarding_ready"
3488
- payload["review_plan"]["readiness_blockers"].append("ready_file_write_failed")
3489
- else:
3490
- payload["status"] = "blocked_request"
3491
- maybe_write_local_proxy_forward_diagnostic(payload)
3492
- payload.pop("_diagnostic_ledger_write_path", None)
3493
- if args.json:
3494
- emit_json(payload)
3495
- else:
3496
- if payload["status"] == "served_once":
3497
- print("ContextGuard local proxy served one loopback request")
3498
- else:
3499
- print("ContextGuard local proxy serve blocked")
3500
- print(f"Status: {payload['status']}")
3501
- if payload["review_plan"]["readiness_blockers"]:
3502
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
3503
- if payload.get("forward_result") and payload["forward_result"].get("blocked_reason"):
3504
- print(f"Request blocker: {payload['forward_result']['blocked_reason']}")
3505
- print(payload["claim_boundary"])
3506
- return 0 if payload["status"] == "served_once" else 1
3507
-
3508
-
3509
- LEARNED_CODE_FENCE_RE = re.compile(r"(?m)^\s*(?:```|~~~)")
3510
- LEARNED_DIFF_RE = re.compile(r"(?m)^\s*(diff --git |@@\s+-|--- |\+\+\+ |[+-].*)")
3511
- LEARNED_IDENTIFIER_RE = re.compile(
3512
- r"\b(?:"
3513
- r"_*[A-Za-z]+_[A-Za-z0-9_]*"
3514
- r"|_*[a-z]+[A-Z][A-Za-z0-9]*"
3515
- r"|_*[A-Z][a-z]+[A-Z][A-Za-z0-9]*"
3516
- r"|_*[A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)+"
3517
- r"|_*[A-Z][A-Z0-9_]{2,}"
3518
- r")\b"
3519
- )
3520
- LEARNED_PATH_RE = re.compile(
3521
- r"(?x)(?:"
3522
- r"(?<![\w.-])/(?:[A-Za-z0-9._@%+=:-]+/)*[A-Za-z0-9._@%+=:-]+"
3523
- r"|"
3524
- r"\b[A-Za-z]:\\(?:[^\\\s:\"'<>|]+\\)*[^\\\s:\"'<>|]+"
3525
- r"|"
3526
- r"(?<![\w.-])(?:\.{1,2}/)+[A-Za-z0-9._@%+=:-]+(?:/[A-Za-z0-9._@%+=:-]+)*\b"
3527
- r"|"
3528
- r"\b(?:\.{1,2}/)?(?:[A-Za-z0-9._@%+=:-]+/)+[A-Za-z0-9._@%+=:-]+\b"
3529
- r"|"
3530
- r"\b[A-Za-z0-9._-]+\.(?:py|js|ts|tsx|jsx|go|rs|java|kt|swift|json|ya?ml|toml|md|txt|log|sh|bash|zsh|sql|html|css)\b"
3531
- r")"
3532
- )
3533
- LEARNED_HASH_RE = re.compile(r"\b(?:sha256:[0-9a-fA-F]{32,64}|[0-9a-fA-F]{7,64})\b")
3534
- LEARNED_STACK_FRAME_RE = re.compile(
3535
- r"(?m)^\s*(?:File\s+\"[^\"]+\",\s+line\s+\d+,\s+in\s+\S+|at\s+\S+.*\([^)]*:\d+(?::\d+)?\))"
3536
- )
3537
- LEARNED_JSON_KEY_RE = re.compile(r"""(?x)"(?:[^"\\]|\\.)*"\s*:|'(?:[^'\\]|\\.)*'\s*:""")
3538
- LEARNED_QUOTED_STRING_RE = re.compile(
3539
- r'''(?x)"""(?:.|\n)*?"""|''' + r"""'''(?:.|\n)*?'''|"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'"""
3540
- )
3541
- LEARNED_NUMERIC_CONSTANT_RE = re.compile(
3542
- r"(?<![\w.])(?:[vV]?\d+(?:\.\d+)*|[-+]?0x[0-9A-Fa-f]+)(?![\w.])"
3543
- )
3544
- LEARNED_PROMPT_LIKE_RE = re.compile(
3545
- r"(?imx)(?:"
3546
- r"\b(?:ignore|disregard|forget)\s+(?:all\s+)?(?:the\s+)?(?:above|earlier|previous|prior)\s+instructions?\b"
3547
- r"|^\s*(?:system|developer|user|assistant)\s*:"
3548
- r"|\b(?:system|developer|user|assistant)\s+instructions?\b"
3549
- r"|\b(?:system|developer)\s+message\b"
3550
- r"|\byou\s+are\s+(?:now\s+)?(?:chatgpt|a\s+\w+|\w+)\b"
3551
- r"|\bact\s+as\b"
3552
- r"|\bjailbreak\b"
3553
- r"|\bdo\s+not\s+follow\b"
3554
- r"|\boverride\s+instructions\b"
3555
- r")"
3556
- )
3557
- LEARNED_URL_RE = re.compile(
3558
- r"(?i)\b(?:https?://|(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+[A-Za-z]{2,24})(?:/|\b)"
3559
- )
3560
- LEARNED_CODE_LIKE_RE = re.compile(
3561
- r"(?mx)^\s*(?:"
3562
- r"(?:from\s+\S+\s+import\s+\S+|import\s+\S+|def\s+[A-Za-z_]\w*\s*\(|class\s+[A-Za-z_]\w*\s*(?:\(|:)|"
3563
- r"function\s+[A-Za-z_$][\w$]*\s*\(|(?:const|let|var)\s+[A-Za-z_$][\w$]*\s*=)"
3564
- r"|(?:if|elif|else|for|while|try|except|finally|with)\b.*:"
3565
- r"|(?:print|raise|return|yield|assert)\b(?:\s*\(|\s+\S+)"
3566
- r"|[A-Za-z_][A-Za-z0-9_]*\s*(?:=|==|!=|<=|>=|\+=|-=|\*=|/=)\s*\S+"
3567
- r"|.*[{};]\s*$"
3568
- r"|(?:ls|cp|mv|rm|sudo|curl|wget|chmod|chown|git|npm|npx|pnpm|yarn|python3?|pip|node|bash|sh|zsh|cat|grep|sed|awk|make|cargo|pytest|tox|uv|ruff|mypy|pyright|docker|kubectl)(?:\s+(?:-\S+|\S+))*"
3569
- r"|<[/!]?[A-Za-z][A-Za-z0-9-]*(?:\s+[^<>]*)?>"
3570
- r")"
3571
- )
3572
- LEARNED_INLINE_CODE_RE = re.compile(r"`[^`\n]+`")
3573
- LEARNED_NON_TEXT_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f\ufffd]")
3574
- LEARNED_WORD_RE = re.compile(r"\b[\w.-]+\b")
3575
- LEARNED_ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
3576
-
3577
-
3578
- def read_learned_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
3579
- source_label = args.source_label
3580
- if args.input:
3581
- path = Path(args.input)
3582
- source_label = source_label or path.name
3583
- loaded = read_bounded_regular_file(path, max_bytes=MAX_LEARNED_COMPRESSION_INPUT_BYTES, label="learned-compression input")
3584
- assert loaded is not None
3585
- raw, truncated = loaded
3586
- else:
3587
- source_label = source_label or "stdin"
3588
- raw = sys.stdin.buffer.read(MAX_LEARNED_COMPRESSION_INPUT_BYTES + 1)
3589
- truncated = len(raw) > MAX_LEARNED_COMPRESSION_INPUT_BYTES
3590
- raw = raw[:MAX_LEARNED_COMPRESSION_INPUT_BYTES]
3591
- text = raw.decode("utf-8", errors="replace")
3592
- metadata = {
3593
- "source_label": source_label,
3594
- "bytes": len(raw),
3595
- "lines": len(text.splitlines()),
3596
- "sha256": hashlib.sha256(raw).hexdigest() if raw else None,
3597
- "truncated": truncated,
3598
- "max_bytes": MAX_LEARNED_COMPRESSION_INPUT_BYTES,
3599
- }
3600
- return text, metadata
3601
-
3602
-
3603
- def learned_content_type(text: str, counts: dict[str, int]) -> str:
3604
- stripped = text.strip()
3605
- if not stripped:
3606
- return "empty"
3607
- if counts["non_text_input"]:
3608
- return "non_text"
3609
- if counts["protected_json_key"]:
3610
- return "json"
3611
- if counts["protected_diff"]:
3612
- return "diff"
3613
- if counts["protected_code_fence"] or counts["protected_code_like"] or counts["protected_identifier"] >= 3:
3614
- return "code"
3615
- return "prose"
3616
-
3617
-
3618
- def learned_signal_counts(text: str) -> dict[str, int]:
3619
- words = LEARNED_WORD_RE.findall(text)
3620
- numeric_count = len(LEARNED_NUMERIC_CONSTANT_RE.findall(text))
3621
- code_like_count = len(LEARNED_CODE_LIKE_RE.findall(text)) + len(LEARNED_INLINE_CODE_RE.findall(text))
3622
- numeric_density_high = 1 if words and numeric_count >= 3 and numeric_count / len(words) >= 0.20 else 0
3623
- return {
3624
- "protected_code_fence": len(LEARNED_CODE_FENCE_RE.findall(text)),
3625
- "protected_diff": len(LEARNED_DIFF_RE.findall(text)),
3626
- "protected_identifier": len(LEARNED_IDENTIFIER_RE.findall(text)),
3627
- "protected_path": len(LEARNED_PATH_RE.findall(text)),
3628
- "protected_hash": len(LEARNED_HASH_RE.findall(text)),
3629
- "protected_stack_frame": len(LEARNED_STACK_FRAME_RE.findall(text)),
3630
- "protected_json_key": len(LEARNED_JSON_KEY_RE.findall(text)),
3631
- "protected_numeric_constant": numeric_count,
3632
- "protected_quoted_string": len(LEARNED_QUOTED_STRING_RE.findall(text)),
3633
- "prompt_like_instruction": len(LEARNED_PROMPT_LIKE_RE.findall(text)),
3634
- "url_or_endpoint": len(LEARNED_URL_RE.findall(text)),
3635
- "protected_code_like": code_like_count,
3636
- "non_text_input": len(LEARNED_NON_TEXT_RE.findall(text)),
3637
- "numeric_density_high": numeric_density_high,
3638
- }
3639
-
3640
-
3641
- def valid_learned_reexpand_command(receipt_id: str | None, command: str | None) -> tuple[bool, str | None]:
3642
- if not receipt_id or not command:
3643
- return False, "missing_exact_fallback"
3644
- if not LEARNED_ARTIFACT_ID_RE.fullmatch(receipt_id):
3645
- return False, "invalid_reexpand_command"
3646
- if any(token in command for token in (";", "|", "&", ">", "<", "`", "$", "\n", "\r")):
3647
- return False, "invalid_reexpand_command"
3648
- try:
3649
- argv = shlex.split(command)
3650
- except ValueError:
3651
- return False, "invalid_reexpand_command"
3652
- if len(argv) < 4:
3653
- return False, "invalid_reexpand_command"
3654
- if argv == ["context-guard-artifact", "get", receipt_id, "--full"]:
3655
- return True, None
3656
- if argv == ["context-guard", "artifact", "get", receipt_id, "--full"]:
3657
- return True, None
3658
- return False, "invalid_reexpand_command"
3659
-
3660
-
3661
- def verify_learned_fallback_artifact(
3662
- receipt_id: str | None,
3663
- *,
3664
- expected_sha256: str,
3665
- expected_bytes: int,
3666
- ) -> tuple[bool, str | None, dict[str, Any]]:
3667
- if not receipt_id or not LEARNED_ARTIFACT_ID_RE.fullmatch(receipt_id):
3668
- return False, "invalid_reexpand_command", {"checked": False, "read_directories": []}
3669
- read_dirs = context_diff_artifact_read_dirs()
3670
- details: dict[str, Any] = {
3671
- "checked": True,
3672
- "read_directories": [str(path) for path in read_dirs],
3673
- "matched_directory": None,
3674
- "content_sha256": None,
3675
- "content_bytes": None,
3676
- }
3677
- for directory in read_dirs:
3678
- content_path, meta_path = context_diff_artifact_paths(directory, receipt_id)
3679
- meta_loaded = read_bounded_regular_file(
3680
- meta_path,
3681
- max_bytes=MAX_LEARNED_COMPRESSION_ARTIFACT_METADATA_BYTES,
3682
- label="learned-compression fallback metadata",
3683
- missing_ok=True,
3684
- )
3685
- content_loaded = read_bounded_regular_file(
3686
- content_path,
3687
- max_bytes=max(MAX_LEARNED_COMPRESSION_INPUT_BYTES, expected_bytes),
3688
- label="learned-compression fallback content",
3689
- missing_ok=True,
3690
- )
3691
- if meta_loaded is None and content_loaded is None:
3692
- continue
3693
- if meta_loaded is None or content_loaded is None:
3694
- return False, "fallback_receipt_invalid", details
3695
- meta_raw, meta_truncated = meta_loaded
3696
- content_raw, content_truncated = content_loaded
3697
- if meta_truncated or content_truncated:
3698
- return False, "fallback_receipt_invalid", details
3699
- try:
3700
- metadata = json.loads(meta_raw.decode("utf-8"))
3701
- except (UnicodeDecodeError, json.JSONDecodeError):
3702
- return False, "fallback_receipt_invalid", details
3703
- if not isinstance(metadata, dict) or metadata.get("artifact_id") != receipt_id:
3704
- return False, "fallback_receipt_invalid", details
3705
- stored = metadata.get("stored_output")
3706
- stored_sha = stored.get("sha256") if isinstance(stored, dict) else None
3707
- stored_bytes = stored.get("bytes") if isinstance(stored, dict) else None
3708
- actual_sha = hashlib.sha256(content_raw).hexdigest()
3709
- actual_bytes = len(content_raw)
3710
- details.update({
3711
- "matched_directory": str(directory),
3712
- "content_sha256": actual_sha,
3713
- "content_bytes": actual_bytes,
3714
- })
3715
- if stored_sha != actual_sha or stored_bytes != actual_bytes:
3716
- return False, "fallback_receipt_invalid", details
3717
- if actual_sha != expected_sha256 or actual_bytes != expected_bytes:
3718
- return False, "fallback_content_mismatch", details
3719
- return True, None, details
3720
- return False, "fallback_receipt_not_found", details
3721
-
3722
-
3723
- def read_learned_candidate_replacement(args: argparse.Namespace) -> tuple[str | None, dict[str, Any]]:
3724
- if args.replacement_text is not None and args.replacement_file:
3725
- raise RegistryError("learned-compression emit accepts only one of --replacement-text or --replacement-file")
3726
- if args.replacement_text is not None:
3727
- text = str(args.replacement_text)
3728
- raw = text.encode("utf-8")
3729
- truncated = len(raw) > MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES
3730
- raw = raw[:MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES]
3731
- text = raw.decode("utf-8", errors="replace")
3732
- source_label = "inline"
3733
- elif args.replacement_file:
3734
- path = Path(args.replacement_file)
3735
- loaded = read_bounded_regular_file(
3736
- path,
3737
- max_bytes=MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES,
3738
- label="learned-compression candidate replacement",
3739
- )
3740
- assert loaded is not None
3741
- raw, truncated = loaded
3742
- text = raw.decode("utf-8", errors="replace")
3743
- source_label = path.name
3744
- else:
3745
- text = None
3746
- raw = b""
3747
- truncated = False
3748
- source_label = None
3749
- return text, {
3750
- "source_label": source_label,
3751
- "bytes": len(raw),
3752
- "lines": len(text.splitlines()) if text is not None else 0,
3753
- "sha256": hashlib.sha256(raw).hexdigest() if text is not None else None,
3754
- "truncated": truncated,
3755
- "max_bytes": MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES,
3756
- }
3757
-
3758
-
3759
- def learned_compression_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
3760
- text, input_meta = read_learned_input(args)
3761
- receipt_id = args.exact_fallback_receipt.strip() if args.exact_fallback_receipt else None
3762
- reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
3763
- reexpand_valid, fallback_blocker = valid_learned_reexpand_command(receipt_id, reexpand_command)
3764
- counts = learned_signal_counts(text)
3765
- content_type = learned_content_type(text, counts)
3766
-
3767
- blockers: list[str] = []
3768
- if not text.strip():
3769
- blockers.append("missing_input")
3770
- if input_meta["truncated"]:
3771
- blockers.append("input_truncated")
3772
- if not args.sanitized:
3773
- blockers.append("missing_sanitized_assertion")
3774
- if not args.trusted_source:
3775
- blockers.append("untrusted_input")
3776
- if fallback_blocker:
3777
- blockers.append(fallback_blocker)
3778
- if content_type != "prose" and text.strip():
3779
- blockers.append("non_prose_input")
3780
- for blocker, count in counts.items():
3781
- if count:
3782
- blockers.append(blocker)
3783
- blockers = list(dict.fromkeys(blockers))
3784
- ready = not blockers
3785
- return {
3786
- "tool": TOOL_NAME,
3787
- "schema_version": CONFIG_SCHEMA_VERSION,
3788
- "experiment_id": "learned-compression",
3789
- "mode": "dry_run",
3790
- "status": "ready_for_human_review" if ready else "blocked_until_safe_input",
3791
- "input": input_meta,
3792
- "policy": {
3793
- "deny_by_default": True,
3794
- "runtime_compression_allowed": False,
3795
- "eligible_for_human_review": ready,
3796
- "human_review_required": True,
3797
- "stable_runtime_behavior_changed": False,
3798
- },
3799
- "sanitization": {
3800
- "required": True,
3801
- "caller_asserted": bool(args.sanitized),
3802
- "verified": False,
3803
- },
3804
- "trust": {
3805
- "required": True,
3806
- "caller_asserted": bool(args.trusted_source),
3807
- "verified": False,
3808
- },
3809
- "exact_fallback": {
3810
- "required": True,
3811
- "available": bool(receipt_id and reexpand_command and reexpand_valid),
3812
- "receipt_id": receipt_id,
3813
- "cli": reexpand_command,
3814
- "verified": False,
3815
- },
3816
- "protected_signal_scan": {
3817
- "content_type": content_type,
3818
- "counts": counts,
3819
- },
3820
- "review_plan": {
3821
- "readiness_blockers": blockers,
3822
- "protected_signals": [name for name, count in counts.items() if count],
3823
- "next_steps": [
3824
- "Keep exact fallback receipt and re-expand command available before considering any future summary.",
3825
- "Reject learned compression for protected, prompt-like, untrusted, or non-prose input.",
3826
- "Do not claim hosted token/cost savings from this dry-run policy check.",
3827
- ],
3828
- },
3829
- "claim_boundary": (
3830
- "Dry-run learned-compression policy check only; no hosted token/cost savings claim without "
3831
- "provider-measured matched successful tasks."
3832
- ),
3833
- "candidate_replacement": None,
3834
- }
3835
-
3836
-
3837
- def command_plan_learned_compression(args: argparse.Namespace) -> int:
3838
- payload = learned_compression_plan_payload(args)
3839
- if args.json:
3840
- emit_json(payload)
3841
- else:
3842
- print("ContextGuard learned/synthetic compression gate (dry-run only)")
3843
- print("No learned compressor/model/provider was called and no replacement text was emitted.")
3844
- print(f"Status: {payload['status']}")
3845
- print(f"Input: {payload['input']['source_label']} lines={payload['input']['lines']} sha256={payload['input']['sha256']}")
3846
- if payload["review_plan"]["readiness_blockers"]:
3847
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
3848
- print(payload["claim_boundary"])
3849
- return 0
3850
-
3851
-
3852
- def learned_compression_emit_payload(args: argparse.Namespace) -> dict[str, Any]:
3853
- payload = learned_compression_plan_payload(args)
3854
- receipt_id = args.exact_fallback_receipt.strip() if args.exact_fallback_receipt else None
3855
- reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
3856
- reexpand_valid, _fallback_blocker = valid_learned_reexpand_command(receipt_id, reexpand_command)
3857
- fallback_verified = False
3858
- fallback_blocker = None
3859
- fallback_verification: dict[str, Any] = {"checked": False, "read_directories": []}
3860
- if reexpand_valid:
3861
- fallback_verified, fallback_blocker, fallback_verification = verify_learned_fallback_artifact(
3862
- receipt_id,
3863
- expected_sha256=payload["input"]["sha256"],
3864
- expected_bytes=payload["input"]["bytes"],
3865
- )
3866
-
3867
- candidate_text, candidate_meta = read_learned_candidate_replacement(args)
3868
- candidate_counts = learned_signal_counts(candidate_text or "")
3869
- candidate_content_type = learned_content_type(candidate_text or "", candidate_counts)
3870
-
3871
- blockers = list(payload["review_plan"]["readiness_blockers"])
3872
- if fallback_blocker:
3873
- blockers.append(fallback_blocker)
3874
- if candidate_text is None or not candidate_text.strip():
3875
- blockers.append("missing_candidate_replacement")
3876
- if candidate_meta["truncated"]:
3877
- blockers.append("candidate_replacement_truncated")
3878
- if (
3879
- candidate_text is not None
3880
- and not candidate_meta["truncated"]
3881
- and candidate_meta["bytes"] >= payload["input"]["bytes"]
3882
- ):
3883
- blockers.append("candidate_not_smaller_than_input")
3884
- if candidate_text is not None and candidate_text.strip() and candidate_content_type != "prose":
3885
- blockers.append("candidate_non_prose_input")
3886
- for blocker, count in candidate_counts.items():
3887
- if count:
3888
- blockers.append(f"candidate_{blocker}")
3889
- blockers = list(dict.fromkeys(blockers))
3890
- ready = not blockers
3891
-
3892
- payload["mode"] = "emit"
3893
- payload["status"] = "candidate_emitted" if ready else "blocked_until_candidate_ready"
3894
- payload["policy"] = dict(payload["policy"])
3895
- payload["policy"].update({
3896
- "runtime_compression_allowed": False,
3897
- "caller_supplied_candidate_required": True,
3898
- "caller_supplied_candidate_allowed": ready,
3899
- "lossy_replacement_allowed": ready,
3900
- "learned_compressor_called": False,
3901
- "embedding_or_reranker_called": False,
3902
- "model_call_allowed": False,
3903
- "subprocess_allowed": False,
3904
- })
3905
- payload["exact_fallback"] = {
3906
- "required": True,
3907
- "available": bool(receipt_id and reexpand_command and reexpand_valid and fallback_verified),
3908
- "receipt_id": receipt_id,
3909
- "cli": reexpand_command,
3910
- "verified": fallback_verified,
3911
- "valid_command_shape": reexpand_valid,
3912
- "verification": fallback_verification,
3913
- "note": "Emit mode validates exact local fallback command shape and verifies local artifact content matches the input prose.",
3914
- }
3915
- payload["candidate_scan"] = {
3916
- "content_type": candidate_content_type,
3917
- "counts": candidate_counts,
3918
- "protected_signals": [name for name, count in candidate_counts.items() if count],
3919
- }
3920
- payload["replacement"] = candidate_meta
3921
- payload["review_plan"]["readiness_blockers"] = blockers
3922
- payload["review_plan"]["protected_signals"] = [name for name, count in payload["protected_signal_scan"]["counts"].items() if count]
3923
- payload["review_plan"]["candidate_protected_signals"] = [
3924
- name for name, count in candidate_counts.items() if count
3925
- ]
3926
- payload["review_plan"]["next_steps"] = [
3927
- "Human-review the caller-supplied candidate against the exact fallback before using it.",
3928
- "Reject candidates that omit protected facts, prompt-like text, paths, code, diffs, identifiers, or numeric constants.",
3929
- "Treat byte reduction as local proxy evidence only; do not claim hosted token/cost savings.",
3930
- ]
3931
- payload["claim_boundary"] = (
3932
- "Explicit local learned-compression candidate emission only; ContextGuard does not run a learned compressor, "
3933
- "model, embedding, reranker, subprocess, or external service, and byte reduction is not hosted API token or cost evidence."
3934
- )
3935
- bytes_after = candidate_meta["bytes"] if candidate_text is not None else 0
3936
- payload["compression_evidence"] = {
3937
- "bytes_before": payload["input"]["bytes"],
3938
- "bytes_after": bytes_after,
3939
- "byte_reduction": max(0, payload["input"]["bytes"] - bytes_after),
3940
- "byte_reduction_proxy_only": True,
3941
- "hosted_api_token_savings_claim_allowed": False,
3942
- "hosted_api_cost_savings_claim_allowed": False,
3943
- }
3944
- if ready and candidate_text is not None:
3945
- payload["candidate_replacement"] = {
3946
- "text": candidate_text,
3947
- "bytes": candidate_meta["bytes"],
3948
- "lines": candidate_meta["lines"],
3949
- "sha256": candidate_meta["sha256"],
3950
- "source_label": candidate_meta["source_label"],
3951
- "caller_supplied": True,
3952
- }
3953
- else:
3954
- payload.pop("candidate_replacement", None)
3955
- return payload
3956
-
3957
-
3958
- def command_emit_learned_compression(args: argparse.Namespace) -> int:
3959
- payload = learned_compression_emit_payload(args)
3960
- if args.json:
3961
- emit_json(payload)
3962
- else:
3963
- if payload["status"] == "candidate_emitted":
3964
- print("ContextGuard learned-compression candidate emitted")
3965
- print(
3966
- f"Candidate: bytes={payload['replacement']['bytes']} "
3967
- f"sha256={payload['replacement']['sha256']}"
3968
- )
3969
- print(f"Exact fallback: {payload['exact_fallback']['cli']}")
3970
- else:
3971
- print("ContextGuard learned-compression candidate blocked")
3972
- print(f"Status: {payload['status']}")
3973
- if payload["review_plan"]["readiness_blockers"]:
3974
- print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
3975
- print(payload["claim_boundary"])
3976
- return 0 if payload["status"] == "candidate_emitted" else 1
3977
-
3978
-
3979
- def add_common_args(parser: argparse.ArgumentParser) -> None:
3980
- parser.add_argument("--root", help="Project root for default project-local experiment config (default: cwd).")
3981
- parser.add_argument("--config", help="Project-local config path. Relative paths resolve under --root; absolute paths must stay inside --root.")
3982
- parser.add_argument("--json", action="store_true", help="Emit JSON output.")
3983
-
3984
-
3985
- def load_args_context(args: argparse.Namespace) -> tuple[Path, Path, dict[str, Any]]:
3986
- root = resolve_root(args.root)
3987
- config_path = resolve_config_path(root, args.config)
3988
- return root, config_path, load_config(config_path)
3989
-
3990
-
3991
- def build_parser() -> argparse.ArgumentParser:
3992
- parser = argparse.ArgumentParser(
3993
- prog=TOOL_NAME,
3994
- description="Inspect and manage default-off ContextGuard experimental feature opt-ins.",
3995
- )
3996
- sub = parser.add_subparsers(dest="command", required=True)
3997
-
3998
- list_parser = sub.add_parser("list", help="List known experiments and metadata.")
3999
- add_common_args(list_parser)
4000
- list_parser.set_defaults(func=command_list)
4001
-
4002
- status_parser = sub.add_parser("status", help="Show project-local experiment enablement status.")
4003
- add_common_args(status_parser)
4004
- status_parser.set_defaults(func=command_status)
4005
-
4006
- enable_parser = sub.add_parser("enable", help="Enable one experiment in project-local config.")
4007
- enable_parser.add_argument("experiment_id")
4008
- add_common_args(enable_parser)
4009
- enable_parser.set_defaults(func=command_enable)
4010
-
4011
- disable_parser = sub.add_parser("disable", help="Disable one experiment in project-local config.")
4012
- disable_parser.add_argument("experiment_id")
4013
- add_common_args(disable_parser)
4014
- disable_parser.set_defaults(func=command_disable)
4015
-
4016
- plan_parser = sub.add_parser("plan", help="Run read-only dry-run planners for experimental lanes.")
4017
- plan_sub = plan_parser.add_subparsers(dest="plan_command", required=True)
4018
-
4019
- context_diff = plan_sub.add_parser(
4020
- "context-diff-compaction",
4021
- help="Dry-run a reviewable context-diff compaction plan without emitting a replacement.",
4022
- )
4023
- context_diff.add_argument("--input", help="Read diff text from a file instead of stdin.")
4024
- context_diff.add_argument("--source-label", help="Safe label to use for the input source in reports.")
4025
- context_diff.add_argument("--receipt-id", help="User-supplied exact receipt/artifact id for human review readiness.")
4026
- context_diff.add_argument("--reexpand-command", help="User-supplied exact re-expand command for human review readiness.")
4027
- context_diff.add_argument("--json", action="store_true", help="Emit JSON output.")
4028
- context_diff.set_defaults(func=command_plan_context_diff_compaction)
4029
-
4030
- visual_ocr = plan_sub.add_parser(
4031
- "visual-crop-ocr",
4032
- help="Dry-run visual crop/OCR evidence metadata without calling OCR or image services.",
4033
- )
4034
- visual_ocr.add_argument("--full-evidence-receipt", help="User-supplied receipt/id for the original full visual evidence.")
4035
- visual_ocr.add_argument("--full-evidence-label", help="Safe label for the full visual evidence.")
4036
- visual_ocr.add_argument("--crop-label", help="Safe label for the cropped region or crop fixture.")
4037
- visual_ocr.add_argument("--crop-bounds", help="Crop bounds as x,y,width,height integers.")
4038
- visual_ocr.add_argument("--image-size", help="Original image size as width,height integers.")
4039
- visual_ocr.add_argument("--ocr-text", help="Bounded OCR fixture text supplied inline.")
4040
- visual_ocr.add_argument("--ocr-text-file", help="Read bounded OCR fixture text from a UTF-8 text file.")
4041
- visual_ocr.add_argument("--ocr-source-label", help="Safe label for OCR text source; defaults to inline or file basename.")
4042
- visual_ocr.add_argument("--ocr-confidence", help="OCR confidence as a finite decimal from 0.0 to 1.0.")
4043
- visual_ocr.add_argument("--ocr-error-note", action="append", help="Known OCR error/uncertainty note. Repeatable.")
4044
- visual_ocr.add_argument("--missed-context-note", action="append", help="Potential context outside crop/OCR text. Repeatable.")
4045
- visual_ocr.add_argument("--json", action="store_true", help="Emit JSON output.")
4046
- visual_ocr.set_defaults(func=command_plan_visual_crop_ocr)
4047
-
4048
- self_hosted = plan_sub.add_parser(
4049
- "self-hosted-metrics-ledger",
4050
- help="Dry-run self-hosted/local metrics ledger sidecar evidence without writing a ledger.",
4051
- )
4052
- self_hosted.add_argument("--input", help="Read an explicit self_hosted_metrics JSON envelope from a file instead of stdin.")
4053
- self_hosted.add_argument("--source-label", help="Safe label to use for the input source in reports.")
4054
- self_hosted.add_argument("--latency-ms", type=float, default=None, help="Local/model-server latency in milliseconds.")
4055
- self_hosted.add_argument("--peak-memory-mb", type=float, default=None, help="Peak local/model-server memory in MiB/MB.")
4056
- self_hosted.add_argument("--quality-score", type=float, default=None, help="Quality score from 0.0 to 1.0.")
4057
- self_hosted.add_argument("--energy-wh", type=float, default=None, help="Diagnostic local energy use in watt-hours.")
4058
- self_hosted.add_argument("--local-cost-usd", type=float, default=None, help="Diagnostic local/self-hosted cost in USD.")
4059
- self_hosted.add_argument("--tokens-per-second", type=float, default=None, help="Diagnostic local throughput.")
4060
- self_hosted.add_argument("--model-server", help="Sanitized label for local model server/runtime.")
4061
- self_hosted.add_argument("--optimization", help="Sanitized label for the local optimization under test.")
4062
- self_hosted.add_argument("--quality-metric", help="Sanitized label for quality metric.")
4063
- self_hosted.add_argument("--hardware", help="Sanitized local hardware label.")
4064
- self_hosted.add_argument("--runtime", help="Sanitized local runtime label.")
4065
- self_hosted.add_argument("--dataset", help="Sanitized dataset label.")
4066
- self_hosted.add_argument("--json", action="store_true", help="Emit JSON output.")
4067
- self_hosted.set_defaults(func=command_plan_self_hosted_metrics_ledger)
4068
-
4069
- local_proxy = plan_sub.add_parser(
4070
- "local-proxy",
4071
- help="Dry-run a localhost-only local proxy advisory plan without starting a proxy.",
4072
- )
4073
- local_proxy.add_argument("--input", help="Read a local_proxy JSON envelope from a file instead of CLI flags.")
4074
- local_proxy.add_argument("--bind-host", help="Advisory bind host; must be localhost/loopback.")
4075
- local_proxy.add_argument("--bind-port", default=None, help="Advisory bind port; 0 means unspecified/ephemeral.")
4076
- local_proxy.add_argument("--target-host", help="Advisory target host; must be localhost/loopback.")
4077
- local_proxy.add_argument("--target-port", default=None, help="Advisory target port; 0 means unspecified.")
4078
- local_proxy.add_argument("--upstream-url", help="Advisory upstream URL; host must be localhost/loopback.")
4079
- local_proxy.add_argument("--ledger-jsonl", help="Advisory ledger path preview; dry-run only, not written.")
4080
- local_proxy.add_argument("--proxy-label", help="Safe label for this local proxy plan.")
4081
- local_proxy.add_argument("--api-key", help="Blocked/redacted API key material; never persisted or emitted raw.")
4082
- local_proxy.add_argument("--authorization-header", help="Blocked/redacted Authorization header; never persisted or emitted raw.")
4083
- local_proxy.add_argument("--persist-api-key", action="store_true", help="Declare API-key persistence intent; blocked by default.")
4084
- local_proxy.add_argument(
4085
- "--external-forwarding-intent",
4086
- action="store_true",
4087
- help="Declare future external forwarding intent; blocked in this dry-run planner.",
4088
- )
4089
- local_proxy.add_argument(
4090
- "--runtime-gate-ack",
4091
- action="store_true",
4092
- help="Acknowledge that any future forwarding needs a separate runtime gate.",
4093
- )
4094
- local_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
4095
- local_proxy.set_defaults(func=command_plan_local_proxy)
4096
-
4097
- external_proxy = plan_sub.add_parser(
4098
- "local-proxy-external-forwarding",
4099
- help="Dry-run an external-forwarding opt-in design gate without forwarding traffic.",
4100
- )
4101
- external_proxy.add_argument(
4102
- "--external-forwarding-intent",
4103
- action="store_true",
4104
- help="Acknowledge intent to design a future external-forwarding proxy surface.",
4105
- )
4106
- external_proxy.add_argument(
4107
- "--external-forwarding-design-ack",
4108
- action="store_true",
4109
- help="Acknowledge this command is design-only and does not enable external forwarding.",
4110
- )
4111
- external_proxy.add_argument("--allow-host", action="append", help="Explicit non-wildcard public host allowed by the future design. Repeatable.")
4112
- external_proxy.add_argument("--allow-scheme", action="append", help="Allowed scheme for the future design; HTTPS is required. Repeatable.")
4113
- external_proxy.add_argument("--threat-model-note", action="append", help="Threat-model note for the future external-forwarding design. Repeatable.")
4114
- external_proxy.add_argument(
4115
- "--credential-redaction-policy",
4116
- help=f"Required policy: {LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY}.",
4117
- )
4118
- external_proxy.add_argument(
4119
- "--provider-evidence-boundary",
4120
- help=f"Required policy: {LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY}.",
4121
- )
4122
- external_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
4123
- external_proxy.set_defaults(func=command_plan_local_proxy_external_forwarding)
4124
-
4125
- learned = plan_sub.add_parser(
4126
- "learned-compression",
4127
- help="Dry-run a deny-by-default learned/synthetic compression safety gate.",
4128
- )
4129
- learned.add_argument("--input", help="Read candidate prose from a text file instead of stdin.")
4130
- learned.add_argument("--source-label", help="Safe label to use for the input source in reports.")
4131
- learned.add_argument("--sanitized", action="store_true", help="Assert input is already sanitized.")
4132
- learned.add_argument("--trusted-source", action="store_true", help="Assert input came from a trusted source.")
4133
- learned.add_argument("--exact-fallback-receipt", help="Local exact fallback receipt id for the original text.")
4134
- learned.add_argument("--reexpand-command", help="Local exact re-expand command bound to the receipt id.")
4135
- learned.add_argument("--json", action="store_true", help="Emit JSON output.")
4136
- learned.set_defaults(func=command_plan_learned_compression)
4137
-
4138
- emit_parser = sub.add_parser("emit", help="Emit explicit local runtime outputs for experimental lanes.")
4139
- emit_sub = emit_parser.add_subparsers(dest="emit_command", required=True)
4140
- emit_context_diff = emit_sub.add_parser(
4141
- "context-diff-compaction",
4142
- help="Emit a caller-supplied compact diff replacement only with exact retrieval metadata.",
4143
- )
4144
- emit_context_diff.add_argument("--input", help="Read original diff text from a file instead of stdin.")
4145
- emit_context_diff.add_argument("--source-label", help="Safe label to use for the diff input source in reports.")
4146
- emit_context_diff.add_argument("--receipt-id", required=True, help="Exact local artifact receipt id for the original diff.")
4147
- emit_context_diff.add_argument("--reexpand-command", required=True, help="Exact command that restores the original diff.")
4148
- replacement_group = emit_context_diff.add_mutually_exclusive_group(required=True)
4149
- replacement_group.add_argument("--replacement-text", help="Caller-supplied compact replacement text to emit.")
4150
- replacement_group.add_argument("--replacement-file", help="Read caller-supplied compact replacement text from a file.")
4151
- emit_context_diff.add_argument("--json", action="store_true", help="Emit JSON output.")
4152
- emit_context_diff.set_defaults(func=command_emit_context_diff_compaction)
4153
-
4154
- emit_visual_ocr = emit_sub.add_parser(
4155
- "visual-crop-ocr",
4156
- help="Emit a caller-supplied visual crop/OCR evidence pack without image/OCR services.",
4157
- )
4158
- emit_visual_ocr.add_argument("--full-evidence-receipt", help="User-supplied receipt/id for the original full visual evidence.")
4159
- emit_visual_ocr.add_argument("--full-evidence-label", help="Safe label for the full visual evidence.")
4160
- emit_visual_ocr.add_argument("--crop-label", help="Safe label for the cropped region or crop fixture.")
4161
- emit_visual_ocr.add_argument("--crop-bounds", help="Crop bounds as x,y,width,height integers.")
4162
- emit_visual_ocr.add_argument("--image-size", help="Original image size as width,height integers.")
4163
- emit_visual_ocr.add_argument("--ocr-text", help="Bounded OCR fixture text supplied inline.")
4164
- emit_visual_ocr.add_argument("--ocr-text-file", help="Read bounded OCR fixture text from a UTF-8 text file.")
4165
- emit_visual_ocr.add_argument("--ocr-source-label", help="Safe label for OCR text source; defaults to inline or file basename.")
4166
- emit_visual_ocr.add_argument("--ocr-confidence", help="OCR confidence as a finite decimal from 0.0 to 1.0.")
4167
- emit_visual_ocr.add_argument("--ocr-error-note", action="append", help="Known OCR error/uncertainty note. Repeatable.")
4168
- emit_visual_ocr.add_argument("--missed-context-note", action="append", help="Potential context outside crop/OCR text. Repeatable.")
4169
- emit_visual_ocr.add_argument("--json", action="store_true", help="Emit JSON output.")
4170
- emit_visual_ocr.set_defaults(func=command_emit_visual_crop_ocr)
4171
-
4172
- emit_learned = emit_sub.add_parser(
4173
- "learned-compression",
4174
- help="Emit a caller-supplied compact prose candidate only with verified exact fallback.",
4175
- )
4176
- emit_learned.add_argument("--input", help="Read original prose text from a file instead of stdin.")
4177
- emit_learned.add_argument("--source-label", help="Safe label to use for the input source in reports.")
4178
- emit_learned.add_argument("--sanitized", action="store_true", help="Assert input is already sanitized.")
4179
- emit_learned.add_argument("--trusted-source", action="store_true", help="Assert input came from a trusted source.")
4180
- emit_learned.add_argument("--exact-fallback-receipt", required=True, help="Local exact fallback receipt id for the original text.")
4181
- emit_learned.add_argument("--reexpand-command", required=True, help="Local exact re-expand command bound to the receipt id.")
4182
- learned_replacement_group = emit_learned.add_mutually_exclusive_group(required=True)
4183
- learned_replacement_group.add_argument("--replacement-text", help="Caller-supplied compact prose candidate to emit.")
4184
- learned_replacement_group.add_argument("--replacement-file", help="Read caller-supplied compact prose candidate from a file.")
4185
- emit_learned.add_argument("--json", action="store_true", help="Emit JSON output.")
4186
- emit_learned.set_defaults(func=command_emit_learned_compression)
4187
-
4188
- record_parser = sub.add_parser("record", help="Run explicit local runtime recorders for experimental lanes.")
4189
- record_sub = record_parser.add_subparsers(dest="record_command", required=True)
4190
- record_self_hosted = record_sub.add_parser(
4191
- "self-hosted-metrics-ledger",
4192
- help="Append one self-hosted/local metrics sidecar row to a JSONL ledger.",
4193
- )
4194
- record_self_hosted.add_argument("--ledger-jsonl", required=True, help="Local JSONL ledger path to append.")
4195
- record_self_hosted.add_argument("--input", help="Read an explicit self_hosted_metrics JSON envelope from a file instead of stdin.")
4196
- record_self_hosted.add_argument("--source-label", help="Safe label to use for the input source in reports.")
4197
- record_self_hosted.add_argument("--latency-ms", type=float, default=None, help="Local/model-server latency in milliseconds.")
4198
- record_self_hosted.add_argument("--peak-memory-mb", type=float, default=None, help="Peak local/model-server memory in MiB/MB.")
4199
- record_self_hosted.add_argument("--quality-score", type=float, default=None, help="Quality score from 0.0 to 1.0.")
4200
- record_self_hosted.add_argument("--energy-wh", type=float, default=None, help="Diagnostic local energy use in watt-hours.")
4201
- record_self_hosted.add_argument("--local-cost-usd", type=float, default=None, help="Diagnostic local/self-hosted cost in USD.")
4202
- record_self_hosted.add_argument("--tokens-per-second", type=float, default=None, help="Diagnostic local throughput.")
4203
- record_self_hosted.add_argument("--model-server", help="Sanitized label for local model server/runtime.")
4204
- record_self_hosted.add_argument("--optimization", help="Sanitized label for the local optimization under test.")
4205
- record_self_hosted.add_argument("--quality-metric", help="Sanitized label for quality metric.")
4206
- record_self_hosted.add_argument("--hardware", help="Sanitized local hardware label.")
4207
- record_self_hosted.add_argument("--runtime", help="Sanitized local runtime label.")
4208
- record_self_hosted.add_argument("--dataset", help="Sanitized dataset label.")
4209
- record_self_hosted.add_argument("--task-id", default="self-hosted-metrics-manual", help="Sanitized task id for the ledger row.")
4210
- record_self_hosted.add_argument("--variant", default="self-hosted-metrics-ledger", help="Sanitized variant label for the ledger row.")
4211
- record_self_hosted.add_argument(
4212
- "--success",
4213
- choices=("true", "false", "unknown"),
4214
- default="unknown",
4215
- help="Optional success value for the local run; unknown writes JSON null.",
4216
- )
4217
- record_self_hosted.add_argument(
4218
- "--notes",
4219
- default="explicit self-hosted metrics record; no hosted API savings claim",
4220
- help="Sanitized note for the ledger row.",
4221
- )
4222
- record_self_hosted.add_argument("--json", action="store_true", help="Emit JSON output.")
4223
- record_self_hosted.set_defaults(func=command_record_self_hosted_metrics_ledger)
4224
-
4225
- record_local_proxy = record_sub.add_parser(
4226
- "local-proxy-runtime-gate",
4227
- help="Append one localhost-only local proxy runtime-gate row without starting a proxy.",
4228
- )
4229
- record_local_proxy.add_argument("--input", help="Read a local_proxy JSON envelope from a file instead of CLI flags.")
4230
- record_local_proxy.add_argument("--bind-host", help="Advisory bind host; must be localhost/loopback.")
4231
- record_local_proxy.add_argument("--bind-port", default=None, help="Advisory bind port; 0 means unspecified/ephemeral.")
4232
- record_local_proxy.add_argument("--target-host", help="Advisory target host; must be localhost/loopback.")
4233
- record_local_proxy.add_argument("--target-port", default=None, help="Advisory target port; 0 means unspecified.")
4234
- record_local_proxy.add_argument("--upstream-url", help="Advisory upstream URL; host must be localhost/loopback.")
4235
- record_local_proxy.add_argument("--ledger-jsonl", required=True, help="Local JSONL ledger path to append the gate row.")
4236
- record_local_proxy.add_argument("--proxy-label", help="Safe label for this local proxy gate record.")
4237
- record_local_proxy.add_argument("--api-key", help="Blocked/redacted API key material; never persisted or emitted raw.")
4238
- record_local_proxy.add_argument("--authorization-header", help="Blocked/redacted Authorization header; never persisted or emitted raw.")
4239
- record_local_proxy.add_argument("--persist-api-key", action="store_true", help="Declare API-key persistence intent; blocked.")
4240
- record_local_proxy.add_argument(
4241
- "--external-forwarding-intent",
4242
- action="store_true",
4243
- help="Declare future external forwarding intent; blocked in this gate recorder.",
4244
- )
4245
- record_local_proxy.add_argument(
4246
- "--runtime-gate-ack",
4247
- action="store_true",
4248
- help="Acknowledge this is only a local gate record and any forwarding needs a separate runtime gate.",
4249
- )
4250
- record_local_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
4251
- record_local_proxy.set_defaults(func=command_record_local_proxy_runtime_gate)
4252
-
4253
- serve_parser = sub.add_parser("serve", help="Run explicit bounded local servers for experimental lanes.")
4254
- serve_sub = serve_parser.add_subparsers(dest="serve_command", required=True)
4255
- serve_local_proxy = serve_sub.add_parser(
4256
- "local-proxy",
4257
- help="Serve one bounded localhost-only HTTP forwarding request.",
4258
- )
4259
- serve_local_proxy.add_argument("--input", help="Read a local_proxy JSON envelope from a file instead of CLI flags.")
4260
- serve_local_proxy.add_argument("--bind-host", help="Bind host; actual serving requires a literal loopback IP.")
4261
- serve_local_proxy.add_argument("--bind-port", default=None, help="Bind port; must be a nonzero explicit port for serving.")
4262
- serve_local_proxy.add_argument("--target-host", help="Target host; actual forwarding requires a literal loopback IP.")
4263
- serve_local_proxy.add_argument("--target-port", default=None, help="Target port; must be a nonzero explicit port for forwarding.")
4264
- serve_local_proxy.add_argument("--upstream-url", help="Optional upstream URL; host must be a literal loopback IP for serving.")
4265
- serve_local_proxy.add_argument("--proxy-label", help="Safe label for this local proxy serve run.")
4266
- serve_local_proxy.add_argument(
4267
- "--diagnostic-ledger-jsonl",
4268
- help="Append one shifted-cost diagnostic JSONL row only after a successful loopback forwarded request.",
4269
- )
4270
- serve_local_proxy.add_argument("--api-key", help="Blocked/redacted API key material; never persisted or emitted raw.")
4271
- serve_local_proxy.add_argument("--authorization-header", help="Blocked/redacted Authorization header; never persisted or emitted raw.")
4272
- serve_local_proxy.add_argument("--persist-api-key", action="store_true", help="Declare API-key persistence intent; blocked.")
4273
- serve_local_proxy.add_argument(
4274
- "--external-forwarding-intent",
4275
- action="store_true",
4276
- help="Declare external forwarding intent; blocked in this local-only runtime.",
4277
- )
4278
- serve_local_proxy.add_argument(
4279
- "--runtime-gate-ack",
4280
- action="store_true",
4281
- help="Acknowledge this is an explicit experimental runtime.",
4282
- )
4283
- serve_local_proxy.add_argument(
4284
- "--forwarding-gate-ack",
4285
- action="store_true",
4286
- help="Acknowledge this starts a loopback-only forwarding listener for one bounded request.",
4287
- )
4288
- serve_local_proxy.add_argument("--once", action="store_true", help="Serve exactly one accepted or blocked request; required for this MVP.")
4289
- serve_local_proxy.add_argument(
4290
- "--max-request-bytes",
4291
- default=None,
4292
- help=f"Maximum request body bytes, 1..{LOCAL_PROXY_MAX_FORWARD_BYTES}.",
4293
- )
4294
- serve_local_proxy.add_argument(
4295
- "--max-response-bytes",
4296
- default=None,
4297
- help=f"Maximum upstream response bytes, 1..{LOCAL_PROXY_MAX_FORWARD_BYTES}.",
4298
- )
4299
- serve_local_proxy.add_argument(
4300
- "--timeout-seconds",
4301
- default=None,
4302
- help=f"Listener/upstream timeout seconds, 0.1..{LOCAL_PROXY_MAX_TIMEOUT_SECONDS}.",
4303
- )
4304
- serve_local_proxy.add_argument("--ready-file", help=argparse.SUPPRESS)
4305
- serve_local_proxy.add_argument("--json", action="store_true", help="Emit JSON output after the single request completes.")
4306
- serve_local_proxy.set_defaults(func=command_serve_local_proxy)
4307
-
4308
- return parser
4309
-
4310
-
4311
- def normalize_negative_csv_option_values(argv: list[str] | None) -> list[str] | None:
4312
- """Keep negative comma-separated option values portable across Python versions.
4313
-
4314
- Python 3.11/3.12 argparse treats a value such as ``-1,0,20,10`` after an
4315
- option as another option token rather than as the option's value. Python
4316
- 3.14 accepts the same test input, so normalize the small set of CSV-valued
4317
- options that intentionally accepts negative numbers for validation.
4318
- """
4319
- if argv is None:
4320
- argv = sys.argv[1:]
4321
- normalized: list[str] = []
4322
- pending_csv_option: str | None = None
4323
- csv_options = {"--crop-bounds"}
4324
- for token in argv:
4325
- if pending_csv_option is not None:
4326
- normalized.append(f"{pending_csv_option}={token}")
4327
- pending_csv_option = None
4328
- continue
4329
- if token in csv_options:
4330
- pending_csv_option = token
4331
- continue
4332
- normalized.append(token)
4333
- if pending_csv_option is not None:
4334
- normalized.append(pending_csv_option)
4335
- return normalized
4336
-
4337
-
4338
- def main(argv: list[str] | None = None) -> int:
4339
- parser = build_parser()
4340
- args = parser.parse_args(normalize_negative_csv_option_values(argv))
4341
- try:
4342
- return int(args.func(args))
4343
- except RegistryError as exc:
4344
- fail(str(exc))
4345
-
4346
-
4347
- if __name__ == "__main__":
4348
- raise SystemExit(main())