@ictechgy/context-guard 0.4.8 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/README.ko.md +92 -37
  3. package/README.md +111 -37
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  8. package/docs/distribution.md +10 -7
  9. package/docs/experimental-benchmark-fixtures.md +8 -1
  10. package/package.json +3 -6
  11. package/packaging/homebrew/context-guard.rb.template +1 -1
  12. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  13. package/plugins/context-guard/README.ko.md +9 -6
  14. package/plugins/context-guard/README.md +27 -12
  15. package/plugins/context-guard/bin/context-guard +113 -26
  16. package/plugins/context-guard/bin/context-guard-artifact +542 -46
  17. package/plugins/context-guard/bin/context-guard-cache-score +380 -0
  18. package/plugins/context-guard/bin/context-guard-compress +146 -1
  19. package/plugins/context-guard/bin/context-guard-cost +783 -4
  20. package/plugins/context-guard/bin/context-guard-experiments +2211 -121
  21. package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
  22. package/plugins/context-guard/bin/context-guard-filter +163 -7
  23. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  24. package/plugins/context-guard/bin/context-guard-pack +602 -43
  25. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  26. package/plugins/context-guard/bin/context-guard-setup +165 -31
  27. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  28. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  29. package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
  30. package/plugins/context-guard/lib/context_guard_commands.py +206 -0
  31. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  32. package/context-guard-kit/README.md +0 -91
  33. package/context-guard-kit/benchmark_runner.py +0 -2401
  34. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  35. package/context-guard-kit/context_compress.py +0 -695
  36. package/context-guard-kit/context_escrow.py +0 -935
  37. package/context-guard-kit/context_filter.py +0 -637
  38. package/context-guard-kit/context_guard_cli.py +0 -325
  39. package/context-guard-kit/context_guard_diet.py +0 -1711
  40. package/context-guard-kit/context_pack.py +0 -2713
  41. package/context-guard-kit/cost_guard.py +0 -2349
  42. package/context-guard-kit/experimental_registry.py +0 -2339
  43. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  44. package/context-guard-kit/guard_large_read.py +0 -690
  45. package/context-guard-kit/hook_secret_patterns.py +0 -43
  46. package/context-guard-kit/read_symbol.py +0 -483
  47. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  48. package/context-guard-kit/sanitize_output.py +0 -725
  49. package/context-guard-kit/settings.example.json +0 -67
  50. package/context-guard-kit/setup_wizard.py +0 -2515
  51. package/context-guard-kit/statusline.sh +0 -362
  52. package/context-guard-kit/statusline_merged.sh +0 -157
  53. package/context-guard-kit/tool_schema_pruner.py +0 -837
  54. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -11,6 +11,8 @@ from __future__ import annotations
11
11
  import argparse
12
12
  from dataclasses import asdict, dataclass
13
13
  from datetime import datetime, timezone
14
+ import http.client
15
+ from http.server import BaseHTTPRequestHandler, HTTPServer
14
16
  import hashlib
15
17
  import ipaddress
16
18
  import json
@@ -19,9 +21,12 @@ import os
19
21
  import re
20
22
  import secrets
21
23
  import shlex
24
+ import socket
25
+ from socketserver import TCPServer
22
26
  from pathlib import Path
23
27
  import stat
24
28
  import sys
29
+ import time
25
30
  from typing import Any, NoReturn
26
31
  import unicodedata
27
32
  from urllib.parse import urlparse
@@ -31,8 +36,14 @@ CONFIG_SCHEMA_VERSION = "contextguard.experiments.v1"
31
36
  DEFAULT_CONFIG = Path(".context-guard") / "experiments.json"
32
37
  MAX_CONFIG_BYTES = 64_000
33
38
  MAX_CONTEXT_DIFF_INPUT_BYTES = 256_000
39
+ MAX_CONTEXT_DIFF_REPLACEMENT_BYTES = 128_000
40
+ MAX_CONTEXT_DIFF_ARTIFACT_METADATA_BYTES = 64_000
41
+ DEFAULT_CONTEXT_DIFF_ARTIFACT_DIR = Path(".context-guard") / "artifacts"
42
+ LEGACY_CONTEXT_DIFF_ARTIFACT_DIR = Path(".claude-token-optimizer") / "artifacts"
34
43
  MAX_VISUAL_OCR_TEXT_BYTES = 64_000
35
44
  MAX_LEARNED_COMPRESSION_INPUT_BYTES = 128_000
45
+ MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES = 64_000
46
+ MAX_LEARNED_COMPRESSION_ARTIFACT_METADATA_BYTES = 64_000
36
47
  MAX_SELF_HOSTED_METRICS_INPUT_BYTES = 64_000
37
48
  SELF_HOSTED_METRICS_SCHEMA_VERSION = "contextguard.bench.self-hosted-metrics.v1"
38
49
  SELF_HOSTED_METRICS_KEY = "self_hosted_metrics"
@@ -48,11 +59,48 @@ TOKEN_PROXY_BYTES_PER_TOKEN = 4
48
59
  MAX_SELF_HOSTED_JSON_DEPTH = 100
49
60
  MAX_SELF_HOSTED_JSON_NODES = 10_000
50
61
  LOCAL_PROXY_SCHEMA_VERSION = "contextguard.experiments.local-proxy-plan.v1"
62
+ LOCAL_PROXY_GATE_SCHEMA_VERSION = "contextguard.experiments.local-proxy-gate.v1"
63
+ LOCAL_PROXY_FORWARD_SCHEMA_VERSION = "contextguard.experiments.local-proxy-forward.v1"
64
+ LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION = "contextguard.experiments.local-proxy-forward-diagnostic.v1"
65
+ LOCAL_PROXY_READY_SCHEMA_VERSION = "contextguard.experiments.local-proxy-ready.v1"
66
+ LOCAL_PROXY_EXTERNAL_DESIGN_SCHEMA_VERSION = "contextguard.experiments.local-proxy-external-forwarding-design.v1"
51
67
  LOCAL_PROXY_DEFAULT_BIND_HOST = "127.0.0.1"
52
68
  LOCAL_PROXY_DEFAULT_BIND_PORT = 0
53
69
  LOCAL_PROXY_DEFAULT_TARGET_HOST = "127.0.0.1"
54
70
  LOCAL_PROXY_DEFAULT_TARGET_PORT = 0
55
71
  LOCAL_PROXY_LOCALHOST_NAMES = {"localhost"}
72
+ LOCAL_PROXY_TRUE_VALUES = {"1", "on", "true", "yes", "y"}
73
+ LOCAL_PROXY_FALSE_VALUES = {"", "0", "false", "n", "no", "off"}
74
+ LOCAL_PROXY_DEFAULT_MAX_REQUEST_BYTES = 64 * 1024
75
+ LOCAL_PROXY_DEFAULT_MAX_RESPONSE_BYTES = 256 * 1024
76
+ LOCAL_PROXY_MAX_FORWARD_BYTES = 2 * 1024 * 1024
77
+ LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS = 5.0
78
+ LOCAL_PROXY_MAX_TIMEOUT_SECONDS = 30.0
79
+ LOCAL_PROXY_EXTERNAL_ALLOWED_SCHEMES = {"https"}
80
+ LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY = "strip-sensitive-headers"
81
+ LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY = "diagnostic-only-provider-measured-required"
82
+ LOCAL_PROXY_SENSITIVE_HEADER_NAMES = {
83
+ "authorization",
84
+ "proxy-authorization",
85
+ "x-api-key",
86
+ "api-key",
87
+ "x-anthropic-api-key",
88
+ "x-openai-api-key",
89
+ "openai-api-key",
90
+ "cookie",
91
+ "set-cookie",
92
+ }
93
+ LOCAL_PROXY_NONCE_HEADER = "X-ContextGuard-Proxy-Nonce"
94
+ LOCAL_PROXY_HOP_BY_HOP_HEADERS = {
95
+ "connection",
96
+ "keep-alive",
97
+ "proxy-authenticate",
98
+ "proxy-authorization",
99
+ "te",
100
+ "trailer",
101
+ "transfer-encoding",
102
+ "upgrade",
103
+ }
56
104
  ALLOWED_FIRST_COMPONENT_SYMLINKS = {
57
105
  "tmp": Path("/private/tmp"),
58
106
  "var": Path("/private/var"),
@@ -145,37 +193,51 @@ EXPERIMENTS: tuple[Experiment, ...] = (
145
193
  Experiment(
146
194
  id="context-diff-compaction",
147
195
  name="Reviewable context-diff compaction",
148
- summary="Dry-run advisory lane for human-reviewable compaction plans with stable exact handles.",
196
+ summary="Explicit receipt-backed runtime for caller-supplied compact diff replacements with stable exact handles.",
149
197
  stability="experimental",
150
198
  default_enabled=False,
151
199
  risk_level="medium",
152
200
  claim_boundary="Smaller local diffs are proxy evidence only; hosted savings require provider-measured matched tasks.",
153
201
  gate_requirements=("explicit opt-in", "human-reviewable diff", "local receipt", "exact re-expand handle"),
154
- runtime_status="available-dry-run",
155
- commands=("context-guard experiments plan context-diff-compaction",),
156
- opt_in_flags=("plan context-diff-compaction", "--receipt-id", "--reexpand-command"),
202
+ runtime_status="available-explicit-runtime",
203
+ commands=(
204
+ "context-guard experiments plan context-diff-compaction",
205
+ "context-guard experiments emit context-diff-compaction --receipt-id <id> --reexpand-command <cmd>",
206
+ ),
207
+ opt_in_flags=(
208
+ "plan context-diff-compaction",
209
+ "emit context-diff-compaction",
210
+ "--receipt-id",
211
+ "--reexpand-command",
212
+ "--replacement-text|--replacement-file",
213
+ ),
157
214
  config_effect=(
158
- "Registry enablement records project-local intent only; context-diff compaction remains a dry-run plan "
159
- "unless a future story adds an explicit replacement command."
215
+ "Registry enablement records project-local intent only; context-diff replacement emits only through the "
216
+ "explicit emit command with exact retrieval metadata and caller-supplied compact text."
160
217
  ),
161
218
  evidence_contract=(
162
- "Dry-run plans require human-reviewable hunks plus user-supplied exact receipt and re-expand handles before "
163
- "any future lossy replacement can be reviewed."
219
+ "Emitted replacements require human-reviewable hunks, caller-supplied compact text, and exact local "
220
+ "artifact content that matches the input diff plus re-expand metadata; smaller local diffs remain proxy "
221
+ "evidence only."
164
222
  ),
165
223
  ),
166
224
  Experiment(
167
225
  id="visual-crop-ocr",
168
- name="Visual crop/OCR evidence planning",
169
- summary="Dry-run fixture lane for comparing full visual evidence with cropped or OCR-derived evidence.",
226
+ name="Visual crop/OCR evidence pack",
227
+ summary="Explicit local runtime for caller-supplied visual crop/OCR evidence packs.",
170
228
  stability="experimental",
171
229
  default_enabled=False,
172
230
  risk_level="medium",
173
231
  claim_boundary="Image/OCR byte reductions are proxy evidence until provider image/text token fields are measured.",
174
232
  gate_requirements=("explicit opt-in", "original evidence preserved", "confidence/error notes", "missed-context guardrail"),
175
- runtime_status="available-dry-run",
176
- commands=("context-guard experiments plan visual-crop-ocr",),
233
+ runtime_status="available-explicit-runtime",
234
+ commands=(
235
+ "context-guard experiments plan visual-crop-ocr",
236
+ "context-guard experiments emit visual-crop-ocr",
237
+ ),
177
238
  opt_in_flags=(
178
239
  "plan visual-crop-ocr",
240
+ "emit visual-crop-ocr",
179
241
  "--full-evidence-receipt",
180
242
  "--crop-bounds",
181
243
  "--image-size",
@@ -185,48 +247,64 @@ EXPERIMENTS: tuple[Experiment, ...] = (
185
247
  "--missed-context-note",
186
248
  ),
187
249
  config_effect=(
188
- "Registry enablement records project-local intent only; visual crop/OCR planning remains a dry-run "
189
- "metadata surface and does not run OCR, crop images, call providers, or change stable behavior."
250
+ "Registry enablement records project-local intent only; visual crop/OCR evidence packs emit only through "
251
+ "the explicit emit command and do not run OCR, crop images, call providers, write files, or change stable behavior."
190
252
  ),
191
253
  evidence_contract=(
192
- "Dry-run plans require retrievable full visual evidence plus crop/OCR confidence, error, and "
193
- "missed-context guardrails before human review."
254
+ "Emitted evidence packs require the full visual evidence receipt plus caller-supplied crop/OCR evidence, "
255
+ "OCR confidence/error notes when OCR is present, and missed-context guardrails before human review."
194
256
  ),
195
257
  ),
196
258
  Experiment(
197
259
  id="learned-compression",
198
- name="Learned/synthetic compression safe gate",
199
- summary="Deny-by-default dry-run safety gate for already-sanitized unprotected prose only.",
260
+ name="Learned/synthetic compression candidate gate",
261
+ summary="Explicit local runtime for caller-supplied compact prose candidates with verified exact fallback.",
200
262
  stability="experimental",
201
263
  default_enabled=False,
202
264
  risk_level="high",
203
265
  claim_boundary="Semantic compression cannot claim savings or correctness without matched-task quality and provider token evidence.",
204
266
  gate_requirements=("explicit opt-in", "sanitized unprotected prose only", "protected-zone denial", "exact fallback or receipt"),
205
- runtime_status="available-dry-run",
206
- commands=("context-guard experiments plan learned-compression",),
207
- opt_in_flags=("plan learned-compression", "--sanitized", "--trusted-source", "--exact-fallback-receipt", "--reexpand-command"),
267
+ runtime_status="available-explicit-runtime",
268
+ commands=(
269
+ "context-guard experiments plan learned-compression",
270
+ "context-guard experiments emit learned-compression --exact-fallback-receipt <id> --reexpand-command <cmd>",
271
+ ),
272
+ opt_in_flags=(
273
+ "plan learned-compression",
274
+ "emit learned-compression",
275
+ "--sanitized",
276
+ "--trusted-source",
277
+ "--exact-fallback-receipt",
278
+ "--reexpand-command",
279
+ "--replacement-text|--replacement-file",
280
+ ),
208
281
  config_effect=(
209
- "Registry enablement records project-local intent only; learned compression remains a dry-run policy check "
210
- "and does not run learned compressors, embeddings, model calls, or replacements."
282
+ "Registry enablement records project-local intent only; learned-compression candidates emit only through "
283
+ "the explicit emit command and do not run learned compressors, embeddings, rerankers, model calls, subprocesses, or external services."
211
284
  ),
212
285
  evidence_contract=(
213
- "Dry-run eligibility requires caller-asserted sanitized trusted prose, exact local fallback handles, and "
214
- "denial of protected or prompt-like signals."
286
+ "Emitted candidates require caller-asserted sanitized trusted prose, verified exact local fallback content, "
287
+ "a smaller caller-supplied prose candidate, and denial of protected or prompt-like signals."
215
288
  ),
216
289
  ),
217
290
  Experiment(
218
291
  id="self-hosted-metrics-ledger",
219
292
  name="Self-hosted metrics ledger",
220
- summary="Dry-run checker for self-hosted/local metrics ledger sidecars kept separate from hosted API claims.",
293
+ summary="Explicit local ledger runtime for self-hosted/local metrics sidecars kept separate from hosted API claims.",
221
294
  stability="experimental",
222
295
  default_enabled=False,
223
296
  risk_level="low",
224
297
  claim_boundary="Self-hosted memory/latency metrics must stay separate from hosted API token/cost claims.",
225
298
  gate_requirements=("explicit opt-in", "separate ledger fields", "shifted-cost accounting"),
226
- runtime_status="available-dry-run",
227
- commands=("context-guard experiments plan self-hosted-metrics-ledger",),
299
+ runtime_status="available-explicit-runtime",
300
+ commands=(
301
+ "context-guard experiments plan self-hosted-metrics-ledger",
302
+ "context-guard experiments record self-hosted-metrics-ledger --ledger-jsonl <path>",
303
+ ),
228
304
  opt_in_flags=(
229
305
  "plan self-hosted-metrics-ledger",
306
+ "record self-hosted-metrics-ledger",
307
+ "--ledger-jsonl",
230
308
  "--input",
231
309
  "--latency-ms",
232
310
  "--peak-memory-mb",
@@ -238,43 +316,70 @@ EXPERIMENTS: tuple[Experiment, ...] = (
238
316
  "--optimization",
239
317
  ),
240
318
  config_effect=(
241
- "Registry enablement records project-local intent only; self-hosted metrics planning remains a dry-run "
242
- "ledger-preview surface and does not write ledgers or alter benchmark/report behavior."
319
+ "Registry enablement records project-local intent only; self-hosted metrics still write a ledger only "
320
+ "when the explicit record command is invoked with --ledger-jsonl."
243
321
  ),
244
322
  evidence_contract=(
245
- "Real evidence belongs in context-guard-bench JSONL ledger sidecars; self-hosted metrics remain separate "
246
- "from hosted API token/cost savings."
323
+ "The explicit record command writes context-guard-bench JSONL ledger sidecars; self-hosted metrics "
324
+ "remain separate from hosted API token/cost savings."
247
325
  ),
248
326
  ),
249
327
  Experiment(
250
328
  id="local-proxy",
251
- name="Local proxy advisory lane",
252
- summary="Dry-run localhost-only proxy advisory plan with no hidden forwarding or API-key persistence.",
329
+ name="Local proxy runtime gate",
330
+ summary="Explicit local gate-record runtime for localhost-only proxy experiments with no hidden forwarding or API-key persistence.",
253
331
  stability="experimental",
254
332
  default_enabled=False,
255
333
  risk_level="high",
256
334
  claim_boundary="Proxy metrics are diagnostic only; no hosted savings claim without provider-measured evidence.",
257
335
  gate_requirements=("explicit opt-in", "localhost-only default", "no API-key persistence", "no hidden external forwarding"),
258
- runtime_status="available-dry-run",
259
- commands=("context-guard experiments plan local-proxy",),
336
+ runtime_status="available-explicit-runtime",
337
+ commands=(
338
+ "context-guard experiments plan local-proxy",
339
+ "context-guard experiments plan local-proxy-external-forwarding",
340
+ "context-guard experiments record local-proxy-runtime-gate --ledger-jsonl <path>",
341
+ "context-guard experiments serve local-proxy --bind-host 127.0.0.1 --bind-port <port> --target-host 127.0.0.1 --target-port <port> --runtime-gate-ack --forwarding-gate-ack --once --ready-file <path>",
342
+ "context-guard experiments serve local-proxy --ready-file <ready-file> --diagnostic-ledger-jsonl <path> ...",
343
+ ),
260
344
  opt_in_flags=(
261
345
  "plan local-proxy",
346
+ "plan local-proxy-external-forwarding",
347
+ "record local-proxy-runtime-gate",
348
+ "serve local-proxy",
262
349
  "--bind-host",
263
350
  "--bind-port",
264
351
  "--target-host",
265
352
  "--target-port",
266
353
  "--upstream-url",
354
+ "--ledger-jsonl",
267
355
  "--runtime-gate-ack",
356
+ "--forwarding-gate-ack",
357
+ "--once",
358
+ "--max-request-bytes",
359
+ "--max-response-bytes",
360
+ "--diagnostic-ledger-jsonl",
361
+ "--ready-file",
268
362
  "--external-forwarding-intent",
363
+ "--external-forwarding-design-ack",
364
+ "--allow-host",
365
+ "--allow-scheme",
366
+ "--threat-model-note",
367
+ "--credential-redaction-policy",
368
+ "--provider-evidence-boundary",
269
369
  "--persist-api-key",
270
370
  ),
271
371
  config_effect=(
272
- "Registry enablement records project-local intent only; local proxy planning remains a dry-run advisory "
273
- "surface and does not bind sockets, forward traffic, persist API keys, or write ledgers."
372
+ "Registry enablement records project-local intent only; local proxy record/serve runtimes run only through "
373
+ "explicit commands. Serve binds and forwards only literal loopback addresses, blocks credential material, "
374
+ "and never persists API keys or calls non-local services; external-forwarding planning is design-only."
274
375
  ),
275
376
  evidence_contract=(
276
- "Dry-run plans require localhost-only bind/target metadata, explicit runtime gate acknowledgement before "
277
- "any future forwarding, and no raw API-key persistence."
377
+ "Gate rows require localhost-only bind/target metadata and explicit runtime gate acknowledgement. Serve "
378
+ "evidence requires loopback-only bind/target IPs, a private ready-file nonce handoff, explicit forwarding "
379
+ "acknowledgement, no credential forwarding or persistence, bounded bytes/timeouts, and optional diagnostic "
380
+ "ledger rows that remain shifted-cost evidence only. External-forwarding design plans require threat model "
381
+ "notes, explicit allowlists, credential redaction policy, and provider-evidence boundaries before any future "
382
+ "runtime."
278
383
  ),
279
384
  ),
280
385
  )
@@ -337,6 +442,18 @@ def _temp_file_open_flags(*, label: str) -> int:
337
442
  return flags
338
443
 
339
444
 
445
+ def _append_file_open_flags(*, label: str) -> int:
446
+ flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
447
+ flags |= _no_follow_flag(label=label)
448
+ if hasattr(os, "O_CLOEXEC"):
449
+ flags |= os.O_CLOEXEC
450
+ if hasattr(os, "O_NONBLOCK"):
451
+ flags |= os.O_NONBLOCK
452
+ if hasattr(os, "O_NOCTTY"):
453
+ flags |= os.O_NOCTTY
454
+ return flags
455
+
456
+
340
457
  def _leaf_name(path: Path, *, label: str) -> str:
341
458
  name = path.name
342
459
  if name in {"", ".", ".."}:
@@ -591,6 +708,149 @@ def write_regular_file_no_follow(path: Path, data: bytes, *, label: str) -> None
591
708
  pass
592
709
 
593
710
 
711
+ def _reject_parent_traversal(path: Path, *, label: str) -> None:
712
+ if any(part == ".." for part in path.parts):
713
+ raise RegistryError(f"{label} must not contain parent traversal")
714
+
715
+
716
+ def write_regular_file_no_follow_exclusive(path: Path, data: bytes, *, label: str, mode: int = 0o600) -> None:
717
+ _reject_parent_traversal(path, label=label)
718
+ path = normalize_local_path(path)
719
+ parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent")
720
+ if parent_fd is None: # pragma: no cover - missing_ok is not enabled.
721
+ raise RegistryError(f"could not inspect {label} parent")
722
+ fd = -1
723
+ created = False
724
+ success = False
725
+ try:
726
+ leaf = _leaf_name(path, label=label)
727
+ exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
728
+ if exists:
729
+ raise RegistryError(f"{label} must not already exist")
730
+ flags = _temp_file_open_flags(label=label)
731
+ fd = os.open(leaf, flags, mode, dir_fd=parent_fd)
732
+ created = True
733
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
734
+ raise RegistryError(f"{label} must be a regular file")
735
+ try:
736
+ os.fchmod(fd, mode)
737
+ except OSError:
738
+ pass
739
+ write_all_fd(fd, data)
740
+ try:
741
+ os.fsync(fd)
742
+ except OSError:
743
+ pass
744
+ success = True
745
+ except FileExistsError as exc:
746
+ raise RegistryError(f"{label} must not already exist") from exc
747
+ except OSError as exc:
748
+ raise RegistryError(f"could not write {label}: {os_error_detail(exc)}") from exc
749
+ finally:
750
+ if fd >= 0:
751
+ try:
752
+ os.close(fd)
753
+ except OSError:
754
+ pass
755
+ if created and not success:
756
+ try:
757
+ os.unlink(_leaf_name(path, label=label), dir_fd=parent_fd)
758
+ except OSError:
759
+ pass
760
+ try:
761
+ os.fsync(parent_fd)
762
+ except OSError:
763
+ pass
764
+ try:
765
+ os.close(parent_fd)
766
+ except OSError:
767
+ pass
768
+
769
+
770
+ def append_jsonl_no_follow(path: Path, payload: dict[str, Any], *, label: str) -> int:
771
+ path = normalize_local_path(path)
772
+ parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
773
+ if parent_fd is None: # pragma: no cover - create=True never returns None.
774
+ raise RegistryError(f"could not inspect {label} parent")
775
+ fd = -1
776
+ try:
777
+ leaf = _leaf_name(path, label=label)
778
+ _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
779
+ fd = os.open(leaf, _append_file_open_flags(label=label), 0o600, dir_fd=parent_fd)
780
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
781
+ raise RegistryError(f"{label} must be a regular file")
782
+ data = json.dumps(payload, ensure_ascii=False, sort_keys=True).encode("utf-8") + b"\n"
783
+ write_all_fd(fd, data)
784
+ try:
785
+ os.fsync(fd)
786
+ except OSError:
787
+ pass
788
+ return len(data)
789
+ except OSError as exc:
790
+ raise RegistryError(f"could not append {label}: {os_error_detail(exc)}") from exc
791
+ finally:
792
+ if fd >= 0:
793
+ try:
794
+ os.close(fd)
795
+ except OSError:
796
+ pass
797
+ try:
798
+ os.fsync(parent_fd)
799
+ except OSError:
800
+ pass
801
+ try:
802
+ os.close(parent_fd)
803
+ except OSError:
804
+ pass
805
+
806
+
807
+ def preflight_append_jsonl_no_follow(path: Path, *, label: str) -> None:
808
+ """Validate that a JSONL append target is no-follow appendable before side effects."""
809
+ path = normalize_local_path(path)
810
+ parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
811
+ if parent_fd is None: # pragma: no cover - create=True never returns None.
812
+ raise RegistryError(f"could not inspect {label} parent")
813
+ fd = -1
814
+ temp_leaf: str | None = None
815
+ try:
816
+ leaf = _leaf_name(path, label=label)
817
+ exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
818
+ if exists:
819
+ fd = os.open(leaf, _append_file_open_flags(label=label), 0o600, dir_fd=parent_fd)
820
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
821
+ raise RegistryError(f"{label} must be a regular file")
822
+ return
823
+ for _attempt in range(20):
824
+ candidate = _leaf_name(Path(f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.preflight"), label=f"{label} preflight")
825
+ try:
826
+ fd = os.open(candidate, _temp_file_open_flags(label=f"{label} preflight"), 0o600, dir_fd=parent_fd)
827
+ temp_leaf = candidate
828
+ break
829
+ except FileExistsError:
830
+ continue
831
+ if fd < 0 or temp_leaf is None:
832
+ raise RegistryError(f"could not create temporary {label} preflight")
833
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
834
+ raise RegistryError(f"{label} preflight temp must be a regular file")
835
+ except OSError as exc:
836
+ raise RegistryError(f"could not append {label}: {os_error_detail(exc)}") from exc
837
+ finally:
838
+ if fd >= 0:
839
+ try:
840
+ os.close(fd)
841
+ except OSError:
842
+ pass
843
+ if temp_leaf is not None:
844
+ try:
845
+ os.unlink(temp_leaf, dir_fd=parent_fd)
846
+ except OSError:
847
+ pass
848
+ try:
849
+ os.close(parent_fd)
850
+ except OSError:
851
+ pass
852
+
853
+
594
854
  def resolve_root(raw_root: str | None) -> Path:
595
855
  root = Path(raw_root) if raw_root else Path.cwd()
596
856
  try:
@@ -762,6 +1022,7 @@ def command_disable(args: argparse.Namespace) -> int:
762
1022
 
763
1023
  DIFF_GIT_RE = re.compile(r"^diff --git (?P<old>\S+) (?P<new>\S+)$")
764
1024
  HUNK_RE = re.compile(r"^@@\s+-(?P<old_start>\d+)(?:,(?P<old_count>\d+))?\s+\+(?P<new_start>\d+)(?:,(?P<new_count>\d+))?\s+@@(?P<section>.*)$")
1025
+ CONTEXT_DIFF_ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
765
1026
 
766
1027
 
767
1028
  def read_bounded_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
@@ -800,13 +1061,16 @@ def strip_diff_prefix(path: str) -> str:
800
1061
  def summarize_diff(text: str, *, max_files: int = 50, max_hunks: int = 200) -> dict[str, Any]:
801
1062
  files: list[dict[str, Any]] = []
802
1063
  current: dict[str, Any] | None = None
1064
+ current_hunk: dict[str, Any] | None = None
803
1065
  total_hunks = 0
1066
+ summarized_hunks = 0
804
1067
  lines = text.splitlines()
805
1068
  diff_header_count = 0
806
1069
  for line_number, line in enumerate(lines, start=1):
807
1070
  match = DIFF_GIT_RE.match(line)
808
1071
  if match:
809
1072
  diff_header_count += 1
1073
+ current_hunk = None
810
1074
  if len(files) >= max_files:
811
1075
  current = None
812
1076
  continue
@@ -823,28 +1087,199 @@ def summarize_diff(text: str, *, max_files: int = 50, max_hunks: int = 200) -> d
823
1087
  total_hunks += 1
824
1088
  if current is None:
825
1089
  if len(files) >= max_files:
1090
+ current_hunk = None
826
1091
  continue
827
1092
  current = {"old_path": None, "new_path": None, "diff_header_line": None, "hunks": []}
828
1093
  files.append(current)
829
1094
  if len(current["hunks"]) < max_hunks:
830
- current["hunks"].append(
831
- {
832
- "line": line_number,
833
- "old_start": int(hunk.group("old_start")),
834
- "old_count": int(hunk.group("old_count") or "1"),
835
- "new_start": int(hunk.group("new_start")),
836
- "new_count": int(hunk.group("new_count") or "1"),
837
- "section": hunk.group("section").strip()[:120],
838
- }
839
- )
1095
+ current_hunk = {
1096
+ "line": line_number,
1097
+ "old_start": int(hunk.group("old_start")),
1098
+ "old_count": int(hunk.group("old_count") or "1"),
1099
+ "new_start": int(hunk.group("new_start")),
1100
+ "new_count": int(hunk.group("new_count") or "1"),
1101
+ "section": hunk.group("section").strip()[:120],
1102
+ "added_lines": 0,
1103
+ "removed_lines": 0,
1104
+ "context_lines": 0,
1105
+ "body_lines": 0,
1106
+ "reviewable": False,
1107
+ }
1108
+ current["hunks"].append(current_hunk)
1109
+ summarized_hunks += 1
1110
+ else:
1111
+ current_hunk = None
1112
+ continue
1113
+ if current_hunk is not None:
1114
+ changed = False
1115
+ if line.startswith("+") and not line.startswith("+++"):
1116
+ current_hunk["added_lines"] += 1
1117
+ changed = True
1118
+ elif line.startswith("-") and not line.startswith("---"):
1119
+ current_hunk["removed_lines"] += 1
1120
+ changed = True
1121
+ elif line.startswith(" "):
1122
+ current_hunk["context_lines"] += 1
1123
+ else:
1124
+ continue
1125
+ current_hunk["body_lines"] += 1
1126
+ reviewable_hunks = 0
1127
+ malformed_hunks = 0
1128
+ for file_summary in files:
1129
+ for hunk_summary in file_summary["hunks"]:
1130
+ old_body_lines = hunk_summary["removed_lines"] + hunk_summary["context_lines"]
1131
+ new_body_lines = hunk_summary["added_lines"] + hunk_summary["context_lines"]
1132
+ has_changes = bool(hunk_summary["added_lines"] or hunk_summary["removed_lines"])
1133
+ well_formed = (
1134
+ old_body_lines == hunk_summary["old_count"]
1135
+ and new_body_lines == hunk_summary["new_count"]
1136
+ )
1137
+ hunk_summary["old_body_lines"] = old_body_lines
1138
+ hunk_summary["new_body_lines"] = new_body_lines
1139
+ hunk_summary["has_changes"] = has_changes
1140
+ hunk_summary["well_formed"] = well_formed
1141
+ hunk_summary["reviewable"] = bool(has_changes and well_formed)
1142
+ if hunk_summary["reviewable"]:
1143
+ reviewable_hunks += 1
1144
+ elif not well_formed:
1145
+ malformed_hunks += 1
840
1146
  return {
841
1147
  "file_count": len(files),
842
1148
  "hunk_count": total_hunks,
1149
+ "summarized_hunk_count": summarized_hunks,
1150
+ "reviewable_hunk_count": reviewable_hunks,
1151
+ "malformed_hunk_count": malformed_hunks,
843
1152
  "truncated_files": max(0, diff_header_count - len(files)),
1153
+ "truncated_hunks": max(0, total_hunks - summarized_hunks),
844
1154
  "files": files,
845
1155
  }
846
1156
 
847
1157
 
1158
+ def valid_context_diff_reexpand_command(receipt_id: str | None, command: str | None) -> tuple[bool, str | None]:
1159
+ if not receipt_id or not command:
1160
+ return False, "missing_exact_receipt_or_reexpand_command"
1161
+ if not CONTEXT_DIFF_ARTIFACT_ID_RE.fullmatch(receipt_id):
1162
+ return False, "invalid_reexpand_command"
1163
+ if any(token in command for token in (";", "|", "&", ">", "<", "`", "$", "\n", "\r")):
1164
+ return False, "invalid_reexpand_command"
1165
+ try:
1166
+ argv = shlex.split(command)
1167
+ except ValueError:
1168
+ return False, "invalid_reexpand_command"
1169
+ if argv == ["context-guard-artifact", "get", receipt_id, "--full"]:
1170
+ return True, None
1171
+ if argv == ["context-guard", "artifact", "get", receipt_id, "--full"]:
1172
+ return True, None
1173
+ return False, "invalid_reexpand_command"
1174
+
1175
+
1176
+ def context_diff_artifact_read_dirs() -> list[Path]:
1177
+ return [DEFAULT_CONTEXT_DIFF_ARTIFACT_DIR, LEGACY_CONTEXT_DIFF_ARTIFACT_DIR]
1178
+
1179
+
1180
+ def context_diff_artifact_paths(directory: Path, receipt_id: str) -> tuple[Path, Path]:
1181
+ return directory / f"{receipt_id}.txt", directory / f"{receipt_id}.json"
1182
+
1183
+
1184
+ def verify_context_diff_artifact(
1185
+ receipt_id: str | None,
1186
+ *,
1187
+ expected_sha256: str,
1188
+ expected_bytes: int,
1189
+ ) -> tuple[bool, str | None, dict[str, Any]]:
1190
+ if not receipt_id or not CONTEXT_DIFF_ARTIFACT_ID_RE.fullmatch(receipt_id):
1191
+ return False, "invalid_reexpand_command", {"checked": False, "read_directories": []}
1192
+ read_dirs = context_diff_artifact_read_dirs()
1193
+ details: dict[str, Any] = {
1194
+ "checked": True,
1195
+ "read_directories": [str(path) for path in read_dirs],
1196
+ "matched_directory": None,
1197
+ "content_sha256": None,
1198
+ "content_bytes": None,
1199
+ }
1200
+ for directory in read_dirs:
1201
+ content_path, meta_path = context_diff_artifact_paths(directory, receipt_id)
1202
+ meta_loaded = read_bounded_regular_file(
1203
+ meta_path,
1204
+ max_bytes=MAX_CONTEXT_DIFF_ARTIFACT_METADATA_BYTES,
1205
+ label="context-diff artifact metadata",
1206
+ missing_ok=True,
1207
+ )
1208
+ content_loaded = read_bounded_regular_file(
1209
+ content_path,
1210
+ max_bytes=max(MAX_CONTEXT_DIFF_INPUT_BYTES, expected_bytes),
1211
+ label="context-diff artifact content",
1212
+ missing_ok=True,
1213
+ )
1214
+ if meta_loaded is None and content_loaded is None:
1215
+ continue
1216
+ if meta_loaded is None or content_loaded is None:
1217
+ return False, "artifact_receipt_invalid", details
1218
+ meta_raw, meta_truncated = meta_loaded
1219
+ content_raw, content_truncated = content_loaded
1220
+ if meta_truncated or content_truncated:
1221
+ return False, "artifact_receipt_invalid", details
1222
+ try:
1223
+ metadata = json.loads(meta_raw.decode("utf-8"))
1224
+ except (UnicodeDecodeError, json.JSONDecodeError):
1225
+ return False, "artifact_receipt_invalid", details
1226
+ if not isinstance(metadata, dict) or metadata.get("artifact_id") != receipt_id:
1227
+ return False, "artifact_receipt_invalid", details
1228
+ stored = metadata.get("stored_output")
1229
+ stored_sha = stored.get("sha256") if isinstance(stored, dict) else None
1230
+ stored_bytes = stored.get("bytes") if isinstance(stored, dict) else None
1231
+ actual_sha = hashlib.sha256(content_raw).hexdigest()
1232
+ actual_bytes = len(content_raw)
1233
+ details.update({
1234
+ "matched_directory": str(directory),
1235
+ "content_sha256": actual_sha,
1236
+ "content_bytes": actual_bytes,
1237
+ })
1238
+ if stored_sha != actual_sha or stored_bytes != actual_bytes:
1239
+ return False, "artifact_receipt_invalid", details
1240
+ if actual_sha != expected_sha256 or actual_bytes != expected_bytes:
1241
+ return False, "artifact_content_mismatch", details
1242
+ return True, None, details
1243
+ return False, "artifact_receipt_not_found", details
1244
+
1245
+
1246
+ def read_context_diff_replacement(args: argparse.Namespace) -> tuple[str | None, dict[str, Any]]:
1247
+ if args.replacement_text is not None and args.replacement_file:
1248
+ raise RegistryError("context-diff-compaction emit accepts only one of --replacement-text or --replacement-file")
1249
+ if args.replacement_text is not None:
1250
+ text = str(args.replacement_text)
1251
+ raw = text.encode("utf-8")
1252
+ truncated = len(raw) > MAX_CONTEXT_DIFF_REPLACEMENT_BYTES
1253
+ raw = raw[:MAX_CONTEXT_DIFF_REPLACEMENT_BYTES]
1254
+ text = raw.decode("utf-8", errors="replace")
1255
+ source_label = "inline"
1256
+ elif args.replacement_file:
1257
+ path = Path(args.replacement_file)
1258
+ loaded = read_bounded_regular_file(
1259
+ path,
1260
+ max_bytes=MAX_CONTEXT_DIFF_REPLACEMENT_BYTES,
1261
+ label="context-diff replacement",
1262
+ )
1263
+ assert loaded is not None
1264
+ raw, truncated = loaded
1265
+ text = raw.decode("utf-8", errors="replace")
1266
+ source_label = str(path)
1267
+ else:
1268
+ text = None
1269
+ raw = b""
1270
+ truncated = False
1271
+ source_label = None
1272
+ metadata = {
1273
+ "source_label": source_label,
1274
+ "bytes": len(raw),
1275
+ "lines": len(text.splitlines()) if text is not None else 0,
1276
+ "sha256": hashlib.sha256(raw).hexdigest() if text is not None else None,
1277
+ "truncated": truncated,
1278
+ "max_bytes": MAX_CONTEXT_DIFF_REPLACEMENT_BYTES,
1279
+ }
1280
+ return text, metadata
1281
+
1282
+
848
1283
  def context_diff_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
849
1284
  text, input_meta = read_bounded_input(args)
850
1285
  summary = summarize_diff(text)
@@ -856,7 +1291,11 @@ def context_diff_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
856
1291
  readiness_blockers.append("missing_exact_receipt_or_reexpand_command")
857
1292
  if input_meta["truncated"]:
858
1293
  readiness_blockers.append("input_truncated")
859
- if summary["file_count"] == 0 or summary["hunk_count"] == 0:
1294
+ if summary.get("truncated_files", 0) or summary.get("truncated_hunks", 0):
1295
+ readiness_blockers.append("diff_summary_truncated")
1296
+ if summary.get("malformed_hunk_count", 0):
1297
+ readiness_blockers.append("malformed_diff_hunks")
1298
+ if summary["file_count"] == 0 or summary.get("reviewable_hunk_count", 0) == 0:
860
1299
  readiness_blockers.append("no_reviewable_diff_hunks")
861
1300
  status = (
862
1301
  "ready_for_human_review"
@@ -885,7 +1324,7 @@ def context_diff_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
885
1324
  "artifact_id": receipt_id,
886
1325
  "cli": reexpand_command,
887
1326
  "verified": False,
888
- "note": "G003 records user-supplied handles for human review only; it does not verify local receipt storage.",
1327
+ "note": "Dry-run planning records user-supplied handles for human review only; it does not verify local receipt storage.",
889
1328
  },
890
1329
  "review_plan": {
891
1330
  "summary": summary,
@@ -928,6 +1367,119 @@ def command_plan_context_diff_compaction(args: argparse.Namespace) -> int:
928
1367
  return 0
929
1368
 
930
1369
 
1370
+ def context_diff_emit_payload(args: argparse.Namespace) -> dict[str, Any]:
1371
+ payload = context_diff_plan_payload(args)
1372
+ receipt_id = args.receipt_id.strip() if args.receipt_id else None
1373
+ reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
1374
+ reexpand_valid, reexpand_blocker = valid_context_diff_reexpand_command(receipt_id, reexpand_command)
1375
+ replacement_text, replacement_meta = read_context_diff_replacement(args)
1376
+ artifact_verified = False
1377
+ artifact_blocker = None
1378
+ artifact_verification: dict[str, Any] = {"checked": False, "read_directories": []}
1379
+ if reexpand_valid:
1380
+ artifact_verified, artifact_blocker, artifact_verification = verify_context_diff_artifact(
1381
+ receipt_id,
1382
+ expected_sha256=payload["input"]["sha256"],
1383
+ expected_bytes=payload["input"]["bytes"],
1384
+ )
1385
+
1386
+ blockers = list(payload["review_plan"]["readiness_blockers"])
1387
+ if reexpand_blocker:
1388
+ blockers.append(reexpand_blocker)
1389
+ if artifact_blocker:
1390
+ blockers.append(artifact_blocker)
1391
+ if replacement_text is None or not replacement_text.strip():
1392
+ blockers.append("missing_compacted_replacement")
1393
+ if replacement_meta["truncated"]:
1394
+ blockers.append("replacement_truncated")
1395
+ if (
1396
+ replacement_text is not None
1397
+ and not replacement_meta["truncated"]
1398
+ and replacement_meta["bytes"] >= payload["input"]["bytes"]
1399
+ ):
1400
+ blockers.append("replacement_not_smaller_than_input")
1401
+ blockers = list(dict.fromkeys(blockers))
1402
+ ready = not blockers
1403
+
1404
+ replacement_record = None
1405
+ if ready and replacement_text is not None:
1406
+ replacement_record = {
1407
+ "text": replacement_text,
1408
+ "bytes": replacement_meta["bytes"],
1409
+ "lines": replacement_meta["lines"],
1410
+ "sha256": replacement_meta["sha256"],
1411
+ "source_label": replacement_meta["source_label"],
1412
+ }
1413
+
1414
+ payload["mode"] = "emit"
1415
+ payload["status"] = "replacement_emitted" if ready else "blocked_until_emit_ready"
1416
+ payload["transform_policy"] = {
1417
+ "automatic_compaction": False,
1418
+ "lossy_replacement_allowed": ready,
1419
+ "semantic_rewrite_allowed": False,
1420
+ "caller_supplied_replacement_required": True,
1421
+ "human_review_required": True,
1422
+ "stable_runtime_behavior_changed": False,
1423
+ }
1424
+ payload["exact_retrieval"] = {
1425
+ "required": True,
1426
+ "available": bool(receipt_id and reexpand_command and reexpand_valid and artifact_verified),
1427
+ "artifact_id": receipt_id,
1428
+ "cli": reexpand_command,
1429
+ "verified": artifact_verified,
1430
+ "valid_command_shape": reexpand_valid,
1431
+ "verification": artifact_verification,
1432
+ "note": "Emit mode validates exact local artifact command shape and verifies local artifact content matches the input diff.",
1433
+ }
1434
+ payload["replacement"] = replacement_meta
1435
+ payload["review_plan"]["readiness_blockers"] = blockers
1436
+ payload["review_plan"]["bounded_loss_disclosure"] = (
1437
+ "Compacted replacement is caller supplied and lossy; use exact_retrieval.cli to recover the original diff "
1438
+ "before relying on omitted details."
1439
+ )
1440
+ payload["review_plan"]["next_steps"] = [
1441
+ "Human-review the compacted replacement against the original diff before use.",
1442
+ "Use exact_retrieval.cli to recover the original diff whenever omitted details matter.",
1443
+ "Treat bytes_before/bytes_after as local proxy evidence only; do not claim hosted token/cost savings.",
1444
+ ]
1445
+ payload["claim_boundary"] = (
1446
+ "Explicit local context-diff replacement emission only; smaller local diffs are proxy evidence and are not "
1447
+ "hosted API token or cost savings evidence."
1448
+ )
1449
+ bytes_after = replacement_meta["bytes"] if replacement_text is not None else 0
1450
+ payload["compaction_evidence"] = {
1451
+ "bytes_before": payload["input"]["bytes"],
1452
+ "bytes_after": bytes_after,
1453
+ "byte_reduction": max(0, payload["input"]["bytes"] - bytes_after),
1454
+ "byte_reduction_proxy_only": True,
1455
+ "hosted_api_token_savings_claim_allowed": False,
1456
+ "hosted_api_cost_savings_claim_allowed": False,
1457
+ }
1458
+ payload["compacted_replacement"] = replacement_record
1459
+ return payload
1460
+
1461
+
1462
+ def command_emit_context_diff_compaction(args: argparse.Namespace) -> int:
1463
+ payload = context_diff_emit_payload(args)
1464
+ if args.json:
1465
+ emit_json(payload)
1466
+ else:
1467
+ if payload["status"] == "replacement_emitted":
1468
+ print("ContextGuard context-diff compact replacement emitted")
1469
+ print(
1470
+ f"Replacement: bytes={payload['replacement']['bytes']} "
1471
+ f"sha256={payload['replacement']['sha256']}"
1472
+ )
1473
+ print(f"Exact re-expand: {payload['exact_retrieval']['cli']}")
1474
+ else:
1475
+ print("ContextGuard context-diff compact replacement blocked")
1476
+ print(f"Status: {payload['status']}")
1477
+ if payload["review_plan"]["readiness_blockers"]:
1478
+ print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
1479
+ print(payload["claim_boundary"])
1480
+ return 0 if payload["status"] == "replacement_emitted" else 1
1481
+
1482
+
931
1483
  def clean_values(values: list[str] | None) -> list[str]:
932
1484
  return [value.strip() for value in values or [] if value.strip()]
933
1485
 
@@ -1016,6 +1568,7 @@ def read_visual_ocr_text(args: argparse.Namespace) -> dict[str, Any]:
1016
1568
  "truncated": truncated,
1017
1569
  "max_bytes": MAX_VISUAL_OCR_TEXT_BYTES,
1018
1570
  "valid_utf8": valid_encoding,
1571
+ "text": text,
1019
1572
  "text_preview": text,
1020
1573
  "has_text": bool(text.strip()),
1021
1574
  }
@@ -1178,6 +1731,93 @@ def command_plan_visual_crop_ocr(args: argparse.Namespace) -> int:
1178
1731
  return 0
1179
1732
 
1180
1733
 
1734
+ def visual_crop_ocr_evidence_pack_payload(args: argparse.Namespace) -> dict[str, Any]:
1735
+ payload = visual_crop_ocr_plan_payload(args)
1736
+ blockers = list(payload["review_plan"]["readiness_blockers"])
1737
+ ready = not blockers
1738
+ crop = payload["derived_evidence"]["crop"]
1739
+ ocr = payload["derived_evidence"]["ocr"]
1740
+
1741
+ image_area = None
1742
+ crop_area = None
1743
+ if crop["bounds"] is not None and crop["image_size"] is not None:
1744
+ image_area = crop["image_size"]["width"] * crop["image_size"]["height"]
1745
+ crop_area = crop["bounds"]["width"] * crop["bounds"]["height"]
1746
+
1747
+ payload["mode"] = "emit"
1748
+ payload["status"] = "evidence_pack_emitted" if ready else "blocked_until_visual_evidence_pack_ready"
1749
+ payload["guardrails"] = dict(payload["guardrails"])
1750
+ payload["guardrails"].update({
1751
+ "candidate_replacement_allowed": False,
1752
+ "evidence_pack_allowed": ready,
1753
+ "runtime_writes_files": False,
1754
+ "external_services_called": False,
1755
+ })
1756
+ payload["claim_boundary"] = (
1757
+ "Explicit local visual crop/OCR evidence-pack emission only; image area and OCR byte reductions are proxy "
1758
+ "evidence and are not hosted API token or cost savings evidence."
1759
+ )
1760
+ payload["reduction_evidence"] = {
1761
+ "image_area_before": image_area,
1762
+ "crop_area_after": crop_area if crop["available"] else None,
1763
+ "crop_area_reduction": (image_area - crop_area) if crop["available"] and image_area is not None and crop_area is not None else None,
1764
+ "ocr_text_bytes": ocr["metadata"]["bytes"] if ocr["available"] else None,
1765
+ "proxy_only": True,
1766
+ "hosted_api_token_savings_claim_allowed": False,
1767
+ "hosted_api_cost_savings_claim_allowed": False,
1768
+ }
1769
+ payload["review_plan"]["next_steps"] = [
1770
+ "Human-review crop/OCR evidence against the full visual evidence receipt before using it as a substitute.",
1771
+ "Read missed-context notes before relying on omitted visual regions.",
1772
+ "Treat image area/OCR byte reductions as local proxy evidence only; do not claim hosted token/cost savings.",
1773
+ ]
1774
+ if ready:
1775
+ payload["evidence_pack"] = {
1776
+ "schema_version": "contextguard.visual-evidence-pack.v1",
1777
+ "full_visual_evidence": payload["full_visual_evidence"],
1778
+ "crop_evidence": crop if crop["available"] else None,
1779
+ "ocr_evidence": (
1780
+ {
1781
+ "source_type": ocr["source_type"],
1782
+ "source_label": ocr["source_label"],
1783
+ "text": ocr["text_preview"],
1784
+ "metadata": ocr["metadata"],
1785
+ "confidence": ocr["confidence"],
1786
+ "error_notes": ocr["error_notes"],
1787
+ }
1788
+ if ocr["available"]
1789
+ else None
1790
+ ),
1791
+ "missed_context_notes": payload["review_plan"]["missed_context_notes"],
1792
+ "guardrails": payload["guardrails"],
1793
+ "reduction_evidence": payload["reduction_evidence"],
1794
+ "claim_boundary": payload["claim_boundary"],
1795
+ }
1796
+ return payload
1797
+
1798
+
1799
+ def command_emit_visual_crop_ocr(args: argparse.Namespace) -> int:
1800
+ payload = visual_crop_ocr_evidence_pack_payload(args)
1801
+ if args.json:
1802
+ emit_json(payload)
1803
+ else:
1804
+ if payload["status"] == "evidence_pack_emitted":
1805
+ print("ContextGuard visual crop/OCR evidence pack emitted")
1806
+ print(f"Full evidence receipt: {payload['full_visual_evidence']['receipt_id']}")
1807
+ print(
1808
+ "Derived evidence: "
1809
+ f"crop={payload['derived_evidence']['crop']['available']} "
1810
+ f"ocr={payload['derived_evidence']['ocr']['available']}"
1811
+ )
1812
+ else:
1813
+ print("ContextGuard visual crop/OCR evidence pack blocked")
1814
+ print(f"Status: {payload['status']}")
1815
+ if payload["review_plan"]["readiness_blockers"]:
1816
+ print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
1817
+ print(payload["claim_boundary"])
1818
+ return 0 if payload["status"] == "evidence_pack_emitted" else 1
1819
+
1820
+
1181
1821
  SECRET_LABEL_KEY_RE = (
1182
1822
  r"[A-Za-z0-9_.-]*(?:"
1183
1823
  r"api[-_]?key|apikey|token|secret|password|passwd|pwd|client[-_]?secret|"
@@ -1434,6 +2074,70 @@ def select_self_hosted_envelope(payload: Any) -> tuple[Any, str | None, list[str
1434
2074
  return None, None, ignored
1435
2075
 
1436
2076
 
2077
+ def parse_optional_success(value: str | None) -> bool | None:
2078
+ if value is None or value == "unknown":
2079
+ return None
2080
+ return value == "true"
2081
+
2082
+
2083
+ def self_hosted_metrics_ledger_row(
2084
+ sidecar: dict[str, Any],
2085
+ *,
2086
+ task_id: str = "self-hosted-metrics-manual",
2087
+ variant: str = "self-hosted-metrics-ledger",
2088
+ success: bool | None = None,
2089
+ notes: str = "explicit self-hosted metrics record; no hosted API savings claim",
2090
+ claude_version: str = "manual",
2091
+ wall_time_seconds: float = 0.0,
2092
+ ) -> dict[str, Any]:
2093
+ return {
2094
+ "schema_version": BENCH_RUN_EVIDENCE_SCHEMA_VERSION,
2095
+ "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
2096
+ "claude_version": sanitize_self_hosted_text(claude_version) or "manual",
2097
+ "task_id": sanitize_self_hosted_text(task_id) or "self-hosted-metrics-manual",
2098
+ "variant": sanitize_self_hosted_text(variant) or "self-hosted-metrics-ledger",
2099
+ "transform_id": "self-hosted-metrics-ledger",
2100
+ "success": success,
2101
+ "primary_tokens_measured": False,
2102
+ "primary_tokens": 0,
2103
+ "primary_cost_measured": False,
2104
+ "primary_cost_usd": 0.0,
2105
+ "provider_cached_tokens": None,
2106
+ "provider_cached_tokens_measured": False,
2107
+ "wall_time_seconds": wall_time_seconds,
2108
+ "external_tokens_measured": False,
2109
+ "external_tokens": 0,
2110
+ "external_cost_measured": False,
2111
+ "external_cost_usd": 0.0,
2112
+ "total_cost_with_shift_usd": None,
2113
+ "artifacts_used": 0,
2114
+ "bytes_before": 0,
2115
+ "bytes_after": 0,
2116
+ "hook_triggers": 0,
2117
+ "turns": 0,
2118
+ "notes": sanitize_self_hosted_text(notes)
2119
+ or "explicit self-hosted metrics record; no hosted API savings claim",
2120
+ "measurement_availability": {
2121
+ "primary_tokens": False,
2122
+ "primary_cost": False,
2123
+ "external_tokens": False,
2124
+ "external_cost": False,
2125
+ "shifted_cost": False,
2126
+ "provider_cache": False,
2127
+ "byte_metrics": False,
2128
+ "wall_time": False,
2129
+ "self_hosted_metrics": True,
2130
+ },
2131
+ "self_hosted_metrics": sidecar,
2132
+ "proxy_metrics": {
2133
+ "byte_metrics_observed": False,
2134
+ "token_proxy": "chars_div_4",
2135
+ "bytes_per_token": TOKEN_PROXY_BYTES_PER_TOKEN,
2136
+ "claim_boundary": "proxy_only_not_hosted_token_savings",
2137
+ },
2138
+ }
2139
+
2140
+
1437
2141
  def self_hosted_metrics_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
1438
2142
  cli_metrics = cli_self_hosted_metrics(args)
1439
2143
  if cli_metrics:
@@ -1493,51 +2197,12 @@ def self_hosted_metrics_plan_payload(args: argparse.Namespace) -> dict[str, Any]
1493
2197
  ready = not blockers
1494
2198
  ledger_preview = None
1495
2199
  if sidecar is not None:
1496
- ledger_preview = {
1497
- "schema_version": BENCH_RUN_EVIDENCE_SCHEMA_VERSION,
1498
- "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
1499
- "claude_version": "dry-run",
1500
- "task_id": "self-hosted-metrics-dry-run",
1501
- "variant": "self-hosted-metrics-ledger",
1502
- "transform_id": "self-hosted-metrics-ledger",
1503
- "success": None,
1504
- "primary_tokens_measured": False,
1505
- "primary_tokens": 0,
1506
- "primary_cost_measured": False,
1507
- "primary_cost_usd": 0.0,
1508
- "provider_cached_tokens": None,
1509
- "provider_cached_tokens_measured": False,
1510
- "wall_time_seconds": 0.0,
1511
- "external_tokens_measured": False,
1512
- "external_tokens": 0,
1513
- "external_cost_measured": False,
1514
- "external_cost_usd": 0.0,
1515
- "total_cost_with_shift_usd": None,
1516
- "artifacts_used": 0,
1517
- "bytes_before": 0,
1518
- "bytes_after": 0,
1519
- "hook_triggers": 0,
1520
- "turns": 0,
1521
- "notes": "dry-run preview; no ledger file written",
1522
- "measurement_availability": {
1523
- "primary_tokens": False,
1524
- "primary_cost": False,
1525
- "external_tokens": False,
1526
- "external_cost": False,
1527
- "shifted_cost": False,
1528
- "provider_cache": False,
1529
- "byte_metrics": False,
1530
- "wall_time": False,
1531
- "self_hosted_metrics": True,
1532
- },
1533
- "self_hosted_metrics": sidecar,
1534
- "proxy_metrics": {
1535
- "byte_metrics_observed": False,
1536
- "token_proxy": "chars_div_4",
1537
- "bytes_per_token": TOKEN_PROXY_BYTES_PER_TOKEN,
1538
- "claim_boundary": "proxy_only_not_hosted_token_savings",
1539
- },
1540
- }
2200
+ ledger_preview = self_hosted_metrics_ledger_row(
2201
+ sidecar,
2202
+ task_id="self-hosted-metrics-dry-run",
2203
+ notes="dry-run preview; no ledger file written",
2204
+ claude_version="dry-run",
2205
+ )
1541
2206
  return {
1542
2207
  "tool": TOOL_NAME,
1543
2208
  "schema_version": CONFIG_SCHEMA_VERSION,
@@ -1583,6 +2248,65 @@ def command_plan_self_hosted_metrics_ledger(args: argparse.Namespace) -> int:
1583
2248
  return 0
1584
2249
 
1585
2250
 
2251
+ def self_hosted_metrics_record_payload(args: argparse.Namespace) -> dict[str, Any]:
2252
+ payload = self_hosted_metrics_plan_payload(args)
2253
+ payload["mode"] = "record"
2254
+ payload["claim_boundary"] = (
2255
+ "Explicit local self-hosted metrics ledger record only; local/model-server metrics are diagnostic sidecars "
2256
+ "and are not hosted API token or cost savings evidence."
2257
+ )
2258
+ payload["policy"]["ledger_write_performed"] = False
2259
+ payload["policy"]["stable_runtime_behavior_changed"] = False
2260
+ payload["ledger_record"] = None
2261
+ payload["ledger_jsonl"] = {
2262
+ "path": sanitize_self_hosted_text(args.ledger_jsonl),
2263
+ "write_performed": False,
2264
+ "bytes_written": 0,
2265
+ }
2266
+ if payload["self_hosted_metrics"] is None or payload["review_plan"]["readiness_blockers"]:
2267
+ payload["status"] = "blocked_until_metrics"
2268
+ return payload
2269
+
2270
+ row = self_hosted_metrics_ledger_row(
2271
+ payload["self_hosted_metrics"],
2272
+ task_id=args.task_id,
2273
+ variant=args.variant,
2274
+ success=parse_optional_success(args.success),
2275
+ notes=args.notes,
2276
+ claude_version="manual",
2277
+ )
2278
+ bytes_written = append_jsonl_no_follow(Path(args.ledger_jsonl), row, label="self-hosted metrics ledger")
2279
+ payload["status"] = "recorded"
2280
+ payload["ledger_preview"] = row
2281
+ payload["ledger_record"] = row
2282
+ payload["policy"]["ledger_write_performed"] = True
2283
+ payload["ledger_jsonl"]["write_performed"] = True
2284
+ payload["ledger_jsonl"]["bytes_written"] = bytes_written
2285
+ payload["review_plan"]["next_steps"] = [
2286
+ "Use this JSONL row only as self-hosted/local diagnostic evidence.",
2287
+ "Keep hosted API token/cost savings claims behind provider-measured matched successful tasks.",
2288
+ "Compare this sidecar with benchmark rows only through explicit shifted-cost accounting.",
2289
+ ]
2290
+ return payload
2291
+
2292
+
2293
+ def command_record_self_hosted_metrics_ledger(args: argparse.Namespace) -> int:
2294
+ payload = self_hosted_metrics_record_payload(args)
2295
+ if args.json:
2296
+ emit_json(payload)
2297
+ else:
2298
+ if payload["status"] == "recorded":
2299
+ print("ContextGuard self-hosted metrics ledger record written")
2300
+ print(f"Ledger: {payload['ledger_jsonl']['path']} bytes={payload['ledger_jsonl']['bytes_written']}")
2301
+ else:
2302
+ print("ContextGuard self-hosted metrics ledger record blocked")
2303
+ print(f"Status: {payload['status']}")
2304
+ if payload["review_plan"]["readiness_blockers"]:
2305
+ print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
2306
+ print(payload["claim_boundary"])
2307
+ return 0
2308
+
2309
+
1586
2310
  def sanitize_local_proxy_value(value: Any) -> str:
1587
2311
  return sanitize_self_hosted_text(value)
1588
2312
 
@@ -1593,22 +2317,218 @@ def local_proxy_secret_like(value: Any) -> bool:
1593
2317
  return "[REDACTED]" in sanitize_local_proxy_value(value)
1594
2318
 
1595
2319
 
1596
- def is_localhost_host(value: Any) -> bool:
1597
- if not isinstance(value, str):
1598
- return False
1599
- host = value.strip().strip("[]").lower().rstrip(".")
1600
- if host in LOCAL_PROXY_LOCALHOST_NAMES:
1601
- return True
1602
- try:
1603
- return ipaddress.ip_address(host).is_loopback
1604
- except ValueError:
1605
- return False
2320
+ def local_proxy_bytes_secret_like(value: bytes) -> bool:
2321
+ return local_proxy_secret_like(value.decode("utf-8", errors="replace"))
1606
2322
 
1607
2323
 
1608
- def normalize_local_proxy_host(value: Any, *, default: str) -> tuple[str, bool, bool]:
1609
- if value is None or str(value).strip() == "":
1610
- host = default
1611
- else:
2324
+ def local_proxy_request_target_meta(value: Any) -> dict[str, Any]:
2325
+ text = "" if value is None else str(value)
2326
+ raw = text.encode("utf-8", errors="replace")
2327
+ return {
2328
+ "request_target_sha256": hashlib.sha256(raw).hexdigest(),
2329
+ "request_target_bytes": len(raw),
2330
+ }
2331
+
2332
+
2333
+ def normalize_external_allow_host(value: Any) -> tuple[str, list[str]]:
2334
+ raw = "" if value is None else str(value).strip()
2335
+ sanitized = sanitize_local_proxy_value(raw)
2336
+ blockers: list[str] = []
2337
+ host = raw.strip().strip("[]").lower().rstrip(".")
2338
+ if not host:
2339
+ return sanitized, ["invalid_external_allow_host"]
2340
+ if "[REDACTED]" in sanitized:
2341
+ blockers.append("secret_like_external_forwarding_design_metadata")
2342
+ if any(ch in host for ch in ("*", "/", "\\", "@", ":", " ")) or len(host) > 253:
2343
+ blockers.append("invalid_external_allow_host")
2344
+ elif is_localhost_host(host):
2345
+ blockers.append("localhost_external_allow_host_not_allowed")
2346
+ else:
2347
+ try:
2348
+ ip = ipaddress.ip_address(host)
2349
+ except ValueError:
2350
+ labels = host.split(".")
2351
+ label_re = re.compile(r"^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$")
2352
+ if len(labels) < 2 or any(not label_re.fullmatch(label) for label in labels):
2353
+ blockers.append("invalid_external_allow_host")
2354
+ else:
2355
+ if not ip.is_global:
2356
+ blockers.append("non_global_external_allow_host_not_allowed")
2357
+ return sanitized, blockers
2358
+
2359
+
2360
+ def local_proxy_external_forwarding_design_payload(args: argparse.Namespace) -> dict[str, Any]:
2361
+ intent = bool(args.external_forwarding_intent)
2362
+ design_ack = bool(args.external_forwarding_design_ack)
2363
+ raw_hosts = args.allow_host or []
2364
+ raw_schemes = args.allow_scheme or []
2365
+ raw_notes = args.threat_model_note or []
2366
+ redaction_policy = sanitize_local_proxy_value(args.credential_redaction_policy)
2367
+ provider_boundary = sanitize_local_proxy_value(args.provider_evidence_boundary)
2368
+
2369
+ blockers: list[str] = []
2370
+ if not intent:
2371
+ blockers.append("missing_external_forwarding_intent")
2372
+ if not design_ack:
2373
+ blockers.append("missing_external_forwarding_design_ack")
2374
+
2375
+ hosts: list[str] = []
2376
+ if not raw_hosts:
2377
+ blockers.append("missing_external_allow_host")
2378
+ for raw_host in raw_hosts:
2379
+ host, host_blockers = normalize_external_allow_host(raw_host)
2380
+ if host:
2381
+ hosts.append(host)
2382
+ blockers.extend(host_blockers)
2383
+ hosts = sorted(set(hosts))
2384
+
2385
+ schemes = sorted(set(sanitize_local_proxy_value(str(value).strip().lower()) for value in raw_schemes if str(value).strip()))
2386
+ if not schemes:
2387
+ blockers.append("missing_external_allow_scheme")
2388
+ for scheme in schemes:
2389
+ if "[REDACTED]" in scheme:
2390
+ blockers.append("secret_like_external_forwarding_design_metadata")
2391
+ elif scheme not in LOCAL_PROXY_EXTERNAL_ALLOWED_SCHEMES:
2392
+ blockers.append("https_only_external_allow_scheme_required")
2393
+
2394
+ threat_model_notes = [sanitize_local_proxy_value(note) for note in clean_values(raw_notes)]
2395
+ if not threat_model_notes:
2396
+ blockers.append("missing_threat_model_note")
2397
+ if any(local_proxy_secret_like(note) for note in raw_notes):
2398
+ blockers.append("secret_like_external_forwarding_design_metadata")
2399
+
2400
+ if not redaction_policy:
2401
+ blockers.append("missing_credential_redaction_policy")
2402
+ elif redaction_policy != LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY:
2403
+ blockers.append("unsupported_credential_redaction_policy")
2404
+ if not provider_boundary:
2405
+ blockers.append("missing_provider_evidence_boundary")
2406
+ elif provider_boundary != LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY:
2407
+ blockers.append("unsupported_provider_evidence_boundary")
2408
+ if local_proxy_secret_like(redaction_policy) or local_proxy_secret_like(provider_boundary):
2409
+ blockers.append("secret_like_external_forwarding_design_metadata")
2410
+
2411
+ blockers = list(dict.fromkeys(blockers))
2412
+ ready = not blockers
2413
+ return {
2414
+ "tool": TOOL_NAME,
2415
+ "schema_version": LOCAL_PROXY_EXTERNAL_DESIGN_SCHEMA_VERSION,
2416
+ "experiment_id": "local-proxy",
2417
+ "mode": "external_forwarding_design",
2418
+ "status": "ready_for_external_forwarding_design_review" if ready else "blocked_until_external_forwarding_design_constraints",
2419
+ "policy": {
2420
+ "default_off": True,
2421
+ "design_only": True,
2422
+ "external_forwarding_runtime_implemented": False,
2423
+ "external_forwarding_allowed": False,
2424
+ "hidden_external_forwarding": False,
2425
+ "api_key_persistence_allowed": False,
2426
+ "credential_material_forwarded": False,
2427
+ "stable_runtime_behavior_changed": False,
2428
+ "hosted_api_token_savings_claim_allowed": False,
2429
+ "hosted_api_cost_savings_claim_allowed": False,
2430
+ },
2431
+ "network_actions": {
2432
+ "listener_started": False,
2433
+ "outbound_forwarding_attempted": False,
2434
+ "dns_lookup_attempted": False,
2435
+ "external_services_called": False,
2436
+ },
2437
+ "external_forwarding_design": {
2438
+ "intent_acknowledged": intent,
2439
+ "design_acknowledged": design_ack,
2440
+ "allowlist_required": True,
2441
+ "allowlist": {
2442
+ "hosts": hosts,
2443
+ "schemes": schemes,
2444
+ "wildcards_allowed": False,
2445
+ "localhost_allowed": False,
2446
+ "non_global_ip_allowed": False,
2447
+ },
2448
+ "credential_redaction": {
2449
+ "policy": redaction_policy,
2450
+ "required_policy": LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY,
2451
+ "blocked_header_names": sorted(LOCAL_PROXY_SENSITIVE_HEADER_NAMES),
2452
+ "raw_headers_persisted": False,
2453
+ "request_bodies_persisted": False,
2454
+ "response_bodies_persisted": False,
2455
+ },
2456
+ "threat_model": {
2457
+ "required": True,
2458
+ "notes": threat_model_notes,
2459
+ "future_review_required": True,
2460
+ },
2461
+ "provider_evidence_boundary": {
2462
+ "policy": provider_boundary,
2463
+ "required_policy": LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY,
2464
+ "diagnostic_only": True,
2465
+ "provider_measured_matched_tasks_required_for_hosted_claims": True,
2466
+ "hosted_api_token_savings_claim_allowed": False,
2467
+ "hosted_api_cost_savings_claim_allowed": False,
2468
+ },
2469
+ "future_runtime_requirements": [
2470
+ "separate future runtime gate and review",
2471
+ "explicit host/scheme allowlist enforcement before any network connection",
2472
+ "credential-bearing requests blocked or stripped before forwarding",
2473
+ "no CONNECT/TLS interception without a separate reviewed gate",
2474
+ "diagnostic shifted-cost accounting only unless provider-measured matched-task evidence exists",
2475
+ ],
2476
+ },
2477
+ "review_plan": {
2478
+ "readiness_blockers": blockers,
2479
+ "next_steps": [
2480
+ "Treat this as design evidence only; do not forward external traffic from this command.",
2481
+ "Keep existing local-proxy serve runtime literal-loopback-only.",
2482
+ "Require a separate future runtime gate before any external forwarding implementation.",
2483
+ ],
2484
+ },
2485
+ "claim_boundary": (
2486
+ "Dry-run external forwarding design gate only; no listener, DNS lookup, external service call, credential "
2487
+ "persistence, traffic forwarding, or hosted API token/cost savings claim is performed."
2488
+ ),
2489
+ }
2490
+
2491
+
2492
+ def command_plan_local_proxy_external_forwarding(args: argparse.Namespace) -> int:
2493
+ payload = local_proxy_external_forwarding_design_payload(args)
2494
+ if args.json:
2495
+ emit_json(payload)
2496
+ else:
2497
+ print("ContextGuard local proxy external-forwarding design gate (dry-run only)")
2498
+ print("No listener was started, no traffic was forwarded, no DNS lookup was performed, and no API key was persisted.")
2499
+ print(f"Status: {payload['status']}")
2500
+ if payload["review_plan"]["readiness_blockers"]:
2501
+ print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
2502
+ print(payload["claim_boundary"])
2503
+ return 0
2504
+
2505
+
2506
+ def is_localhost_host(value: Any) -> bool:
2507
+ if not isinstance(value, str):
2508
+ return False
2509
+ host = value.strip().strip("[]").lower().rstrip(".")
2510
+ if host in LOCAL_PROXY_LOCALHOST_NAMES:
2511
+ return True
2512
+ try:
2513
+ return ipaddress.ip_address(host).is_loopback
2514
+ except ValueError:
2515
+ return False
2516
+
2517
+
2518
+ def is_loopback_ip_literal(value: Any) -> bool:
2519
+ if not isinstance(value, str):
2520
+ return False
2521
+ host = value.strip().strip("[]").lower().rstrip(".")
2522
+ try:
2523
+ return ipaddress.ip_address(host).is_loopback
2524
+ except ValueError:
2525
+ return False
2526
+
2527
+
2528
+ def normalize_local_proxy_host(value: Any, *, default: str) -> tuple[str, bool, bool]:
2529
+ if value is None or str(value).strip() == "":
2530
+ host = default
2531
+ else:
1612
2532
  host = str(value).strip().strip("[]")
1613
2533
  sanitized = sanitize_local_proxy_value(host)
1614
2534
  return sanitized, is_localhost_host(host), "[REDACTED]" in sanitized
@@ -1626,6 +2546,30 @@ def normalize_local_proxy_port(value: Any, *, default: int) -> tuple[int, bool]:
1626
2546
  return port, 0 <= port <= 65535
1627
2547
 
1628
2548
 
2549
+ def normalize_local_proxy_int_limit(value: Any, *, default: int, maximum: int) -> tuple[int, bool]:
2550
+ if value is None or value == "":
2551
+ return default, True
2552
+ if isinstance(value, bool):
2553
+ return default, False
2554
+ try:
2555
+ parsed = int(value)
2556
+ except (TypeError, ValueError):
2557
+ return default, False
2558
+ return parsed, 1 <= parsed <= maximum
2559
+
2560
+
2561
+ def normalize_local_proxy_timeout(value: Any) -> tuple[float, bool]:
2562
+ if value is None or value == "":
2563
+ return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, True
2564
+ if isinstance(value, bool):
2565
+ return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, False
2566
+ try:
2567
+ parsed = float(value)
2568
+ except (TypeError, ValueError):
2569
+ return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, False
2570
+ return parsed, 0.1 <= parsed <= LOCAL_PROXY_MAX_TIMEOUT_SECONDS
2571
+
2572
+
1629
2573
  def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]:
1630
2574
  if not args.input:
1631
2575
  return {}, {
@@ -1696,6 +2640,12 @@ def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any],
1696
2640
  "persist_api_key",
1697
2641
  "external_forwarding_intent",
1698
2642
  "runtime_gate_ack",
2643
+ "forwarding_gate_ack",
2644
+ "once",
2645
+ "max_request_bytes",
2646
+ "max_response_bytes",
2647
+ "timeout_seconds",
2648
+ "diagnostic_ledger_jsonl",
1699
2649
  }
1700
2650
  ignored.extend(sanitize_self_hosted_ignored_key(key) for key in envelope if key not in allowed)
1701
2651
  return dict(envelope), {
@@ -1708,18 +2658,43 @@ def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any],
1708
2658
 
1709
2659
 
1710
2660
  def coalesce_local_proxy_value(args: argparse.Namespace, payload: dict[str, Any], attr: str, key: str) -> Any:
1711
- value = getattr(args, attr)
2661
+ value = getattr(args, attr, None)
1712
2662
  return value if value is not None else payload.get(key)
1713
2663
 
1714
2664
 
1715
- def coalesce_local_proxy_bool(args: argparse.Namespace, payload: dict[str, Any], attr: str, key: str) -> bool:
1716
- if getattr(args, attr):
1717
- return True
1718
- return bool(payload.get(key))
2665
+ def parse_local_proxy_json_bool(value: Any) -> tuple[bool, bool]:
2666
+ if value is None:
2667
+ return False, True
2668
+ if isinstance(value, bool):
2669
+ return value, True
2670
+ if isinstance(value, str):
2671
+ normalized = value.strip().lower()
2672
+ if normalized in LOCAL_PROXY_TRUE_VALUES:
2673
+ return True, True
2674
+ if normalized in LOCAL_PROXY_FALSE_VALUES:
2675
+ return False, True
2676
+ return False, False
2677
+ if isinstance(value, int) and not isinstance(value, bool):
2678
+ if value == 1:
2679
+ return True, True
2680
+ if value == 0:
2681
+ return False, True
2682
+ return False, False
1719
2683
 
1720
2684
 
1721
- def local_proxy_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
1722
- input_payload, input_meta = read_local_proxy_payload(args)
2685
+ def coalesce_local_proxy_bool(args: argparse.Namespace, payload: dict[str, Any], attr: str, key: str) -> tuple[bool, bool]:
2686
+ if getattr(args, attr, False):
2687
+ return True, True
2688
+ return parse_local_proxy_json_bool(payload.get(key))
2689
+
2690
+
2691
+ def local_proxy_plan_payload(
2692
+ args: argparse.Namespace,
2693
+ input_payload: dict[str, Any] | None = None,
2694
+ input_meta: dict[str, Any] | None = None,
2695
+ ) -> dict[str, Any]:
2696
+ if input_payload is None or input_meta is None:
2697
+ input_payload, input_meta = read_local_proxy_payload(args)
1723
2698
  bind_host_raw = coalesce_local_proxy_value(args, input_payload, "bind_host", "bind_host")
1724
2699
  bind_port_raw = coalesce_local_proxy_value(args, input_payload, "bind_port", "bind_port")
1725
2700
  target_host_raw = coalesce_local_proxy_value(args, input_payload, "target_host", "target_host")
@@ -1729,14 +2704,24 @@ def local_proxy_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
1729
2704
  proxy_label_raw = coalesce_local_proxy_value(args, input_payload, "proxy_label", "proxy_label")
1730
2705
  api_key_raw = coalesce_local_proxy_value(args, input_payload, "api_key", "api_key")
1731
2706
  authorization_raw = coalesce_local_proxy_value(args, input_payload, "authorization_header", "authorization_header")
1732
- persist_api_key = coalesce_local_proxy_bool(args, input_payload, "persist_api_key", "persist_api_key")
1733
- external_forwarding_intent = coalesce_local_proxy_bool(
2707
+ persist_api_key, persist_api_key_valid = coalesce_local_proxy_bool(
2708
+ args,
2709
+ input_payload,
2710
+ "persist_api_key",
2711
+ "persist_api_key",
2712
+ )
2713
+ external_forwarding_intent, external_forwarding_intent_valid = coalesce_local_proxy_bool(
1734
2714
  args,
1735
2715
  input_payload,
1736
2716
  "external_forwarding_intent",
1737
2717
  "external_forwarding_intent",
1738
2718
  )
1739
- runtime_gate_ack = coalesce_local_proxy_bool(args, input_payload, "runtime_gate_ack", "runtime_gate_ack")
2719
+ runtime_gate_ack, runtime_gate_ack_valid = coalesce_local_proxy_bool(
2720
+ args,
2721
+ input_payload,
2722
+ "runtime_gate_ack",
2723
+ "runtime_gate_ack",
2724
+ )
1740
2725
 
1741
2726
  upstream_url = sanitize_local_proxy_value(upstream_url_raw) if upstream_url_raw else None
1742
2727
  upstream_host = None
@@ -1805,6 +2790,12 @@ def local_proxy_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
1805
2790
  blockers: list[str] = []
1806
2791
  if input_meta["truncated"]:
1807
2792
  blockers.append("input_truncated")
2793
+ if not persist_api_key_valid:
2794
+ blockers.append("invalid_persist_api_key")
2795
+ if not external_forwarding_intent_valid:
2796
+ blockers.append("invalid_external_forwarding_intent")
2797
+ if not runtime_gate_ack_valid:
2798
+ blockers.append("invalid_runtime_gate_ack")
1808
2799
  if not bind_port_valid:
1809
2800
  blockers.append("invalid_bind_port")
1810
2801
  if not target_port_valid:
@@ -1920,6 +2911,682 @@ def command_plan_local_proxy(args: argparse.Namespace) -> int:
1920
2911
  return 0
1921
2912
 
1922
2913
 
2914
+ def local_proxy_gate_row(payload: dict[str, Any]) -> dict[str, Any]:
2915
+ return {
2916
+ "schema_version": LOCAL_PROXY_GATE_SCHEMA_VERSION,
2917
+ "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
2918
+ "experiment_id": "local-proxy",
2919
+ "proxy_label": payload["ledger_preview"]["proxy_label"],
2920
+ "bind": payload["bind"],
2921
+ "target": payload["target"],
2922
+ "policy": {
2923
+ "localhost_only": True,
2924
+ "runtime_gate_acknowledged": payload["policy"]["runtime_gate_acknowledged"],
2925
+ "listener_started": False,
2926
+ "traffic_forwarded": False,
2927
+ "dns_lookup_attempted": False,
2928
+ "api_key_persisted": False,
2929
+ "hidden_external_forwarding": False,
2930
+ },
2931
+ "network_actions": payload["network_actions"],
2932
+ "api_key_persistence": payload["api_key_persistence"],
2933
+ "forwarding": payload["forwarding"],
2934
+ "claim_boundary": {
2935
+ "id": "local_proxy_runtime_gate_not_hosted_savings",
2936
+ "hosted_api_token_savings_claim_allowed": False,
2937
+ "hosted_api_cost_savings_claim_allowed": False,
2938
+ "requires_provider_measured_matched_tasks_for_hosted_claims": True,
2939
+ "reason": "This row records a local proxy runtime gate only; it starts no listener and forwards no traffic.",
2940
+ },
2941
+ "shifted_cost_accounting_required": True,
2942
+ }
2943
+
2944
+
2945
+ def local_proxy_record_payload(args: argparse.Namespace) -> dict[str, Any]:
2946
+ payload = local_proxy_plan_payload(args)
2947
+ payload["mode"] = "record"
2948
+ payload["claim_boundary"] = (
2949
+ "Explicit local proxy runtime-gate record only; no listener, forwarding, DNS lookup, API-key persistence, "
2950
+ "external service call, or hosted API token/cost savings claim is performed."
2951
+ )
2952
+ payload["policy"] = dict(payload["policy"])
2953
+ payload["policy"].update({
2954
+ "dry_run_only": False,
2955
+ "runtime_gate_record_only": True,
2956
+ "runtime_gate_recorded": False,
2957
+ "listener_started": False,
2958
+ "traffic_forwarded": False,
2959
+ "stable_runtime_behavior_changed": False,
2960
+ })
2961
+ payload["ledger_record"] = None
2962
+ payload["ledger_jsonl"] = {
2963
+ "path": sanitize_local_proxy_value(args.ledger_jsonl),
2964
+ "write_performed": False,
2965
+ "bytes_written": 0,
2966
+ }
2967
+ blockers = list(payload["review_plan"]["readiness_blockers"])
2968
+ if not payload["policy"]["runtime_gate_acknowledged"]:
2969
+ blockers.append("missing_runtime_gate_ack")
2970
+ blockers = list(dict.fromkeys(blockers))
2971
+ payload["review_plan"]["readiness_blockers"] = blockers
2972
+ payload["ledger_preview"]["schema_version"] = LOCAL_PROXY_GATE_SCHEMA_VERSION
2973
+ payload["ledger_preview"]["ledger_jsonl"] = sanitize_local_proxy_value(args.ledger_jsonl)
2974
+ payload["ledger_preview"]["ledger_write_performed"] = False
2975
+ if blockers:
2976
+ payload["status"] = "blocked_until_local_proxy_gate_ready"
2977
+ return payload
2978
+
2979
+ row = local_proxy_gate_row(payload)
2980
+ bytes_written = append_jsonl_no_follow(Path(args.ledger_jsonl), row, label="local proxy runtime gate ledger")
2981
+ payload["status"] = "recorded"
2982
+ payload["ledger_preview"] = row
2983
+ payload["ledger_record"] = row
2984
+ payload["ledger_jsonl"]["write_performed"] = True
2985
+ payload["ledger_jsonl"]["bytes_written"] = bytes_written
2986
+ payload["policy"]["runtime_gate_recorded"] = True
2987
+ payload["review_plan"]["next_steps"] = [
2988
+ "Use this JSONL row only as a local proxy runtime-gate record.",
2989
+ "Keep any actual proxy listener or forwarding implementation behind a separate reviewed runtime.",
2990
+ "Do not persist API keys or claim hosted token/cost savings from this gate record.",
2991
+ ]
2992
+ return payload
2993
+
2994
+
2995
+ def command_record_local_proxy_runtime_gate(args: argparse.Namespace) -> int:
2996
+ payload = local_proxy_record_payload(args)
2997
+ if args.json:
2998
+ emit_json(payload)
2999
+ else:
3000
+ if payload["status"] == "recorded":
3001
+ print("ContextGuard local proxy runtime-gate record written")
3002
+ print(f"Ledger: {payload['ledger_jsonl']['path']} bytes={payload['ledger_jsonl']['bytes_written']}")
3003
+ else:
3004
+ print("ContextGuard local proxy runtime-gate record blocked")
3005
+ print(f"Status: {payload['status']}")
3006
+ if payload["review_plan"]["readiness_blockers"]:
3007
+ print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
3008
+ print(payload["claim_boundary"])
3009
+ return 0 if payload["status"] == "recorded" else 1
3010
+
3011
+
3012
+ def local_proxy_forward_payload(args: argparse.Namespace) -> dict[str, Any]:
3013
+ input_payload, input_meta = read_local_proxy_payload(args)
3014
+ payload = local_proxy_plan_payload(args, input_payload=input_payload, input_meta=input_meta)
3015
+ forwarding_gate_ack, forwarding_gate_ack_valid = coalesce_local_proxy_bool(
3016
+ args,
3017
+ input_payload,
3018
+ "forwarding_gate_ack",
3019
+ "forwarding_gate_ack",
3020
+ )
3021
+ once, once_valid = coalesce_local_proxy_bool(args, input_payload, "once", "once")
3022
+ max_request_bytes, max_request_valid = normalize_local_proxy_int_limit(
3023
+ coalesce_local_proxy_value(args, input_payload, "max_request_bytes", "max_request_bytes"),
3024
+ default=LOCAL_PROXY_DEFAULT_MAX_REQUEST_BYTES,
3025
+ maximum=LOCAL_PROXY_MAX_FORWARD_BYTES,
3026
+ )
3027
+ max_response_bytes, max_response_valid = normalize_local_proxy_int_limit(
3028
+ coalesce_local_proxy_value(args, input_payload, "max_response_bytes", "max_response_bytes"),
3029
+ default=LOCAL_PROXY_DEFAULT_MAX_RESPONSE_BYTES,
3030
+ maximum=LOCAL_PROXY_MAX_FORWARD_BYTES,
3031
+ )
3032
+ timeout_seconds, timeout_valid = normalize_local_proxy_timeout(
3033
+ coalesce_local_proxy_value(args, input_payload, "timeout_seconds", "timeout_seconds")
3034
+ )
3035
+ diagnostic_ledger_raw = coalesce_local_proxy_value(
3036
+ args,
3037
+ input_payload,
3038
+ "diagnostic_ledger_jsonl",
3039
+ "diagnostic_ledger_jsonl",
3040
+ )
3041
+ diagnostic_ledger_path = sanitize_local_proxy_value(diagnostic_ledger_raw) if diagnostic_ledger_raw else None
3042
+ diagnostic_ledger_write_path = str(diagnostic_ledger_raw) if diagnostic_ledger_raw else None
3043
+ bind_host = payload["bind"]["host"]
3044
+ target_host = payload["target"]["host"]
3045
+ bind_ip_literal = is_loopback_ip_literal(bind_host)
3046
+ target_ip_literal = is_loopback_ip_literal(target_host)
3047
+ upstream_url = payload["target"].get("upstream_url")
3048
+ upstream_scheme = ""
3049
+ if upstream_url:
3050
+ try:
3051
+ upstream_scheme = urlparse(str(upstream_url)).scheme.lower()
3052
+ except ValueError:
3053
+ upstream_scheme = "invalid"
3054
+
3055
+ payload["mode"] = "serve"
3056
+ payload["schema_version"] = LOCAL_PROXY_FORWARD_SCHEMA_VERSION
3057
+ payload["claim_boundary"] = (
3058
+ "Explicit local proxy forwarding MVP only; binds and forwards literal loopback IPs, blocks credential "
3059
+ "material, persists no API keys, performs no DNS lookup, calls no external services, and makes no hosted "
3060
+ "API token/cost savings claim."
3061
+ )
3062
+ payload["policy"] = dict(payload["policy"])
3063
+ payload["policy"].update({
3064
+ "dry_run_only": False,
3065
+ "forwarding_runtime": True,
3066
+ "forwarding_gate_acknowledged": forwarding_gate_ack,
3067
+ "once_required": True,
3068
+ "once": once,
3069
+ "literal_loopback_ip_only": True,
3070
+ "listener_started": False,
3071
+ "traffic_forwarded": False,
3072
+ "stable_runtime_behavior_changed": False,
3073
+ })
3074
+ payload["forwarding"] = dict(payload["forwarding"])
3075
+ payload["forwarding"].update({
3076
+ "actual_local_forwarding_runtime": True,
3077
+ "forwarding_gate_acknowledged": forwarding_gate_ack,
3078
+ "external_forwarding_allowed": False,
3079
+ "connect_tunneling_allowed": False,
3080
+ "https_mitm_allowed": False,
3081
+ })
3082
+ payload["runtime_limits"] = {
3083
+ "once": once,
3084
+ "max_request_bytes": max_request_bytes,
3085
+ "max_response_bytes": max_response_bytes,
3086
+ "timeout_seconds": timeout_seconds,
3087
+ }
3088
+ payload["diagnostic_ledger"] = {
3089
+ "schema_version": LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION,
3090
+ "path": diagnostic_ledger_path,
3091
+ "path_sha256": hashlib.sha256(str(diagnostic_ledger_raw).encode("utf-8", errors="replace")).hexdigest() if diagnostic_ledger_raw else None,
3092
+ "write_requested": bool(diagnostic_ledger_raw),
3093
+ "write_performed": False,
3094
+ "bytes_written": 0,
3095
+ "reason": None if diagnostic_ledger_raw else "not_requested",
3096
+ }
3097
+ payload["client_auth"] = {
3098
+ "required": True,
3099
+ "type": "nonce_header",
3100
+ "header": LOCAL_PROXY_NONCE_HEADER,
3101
+ "delivery": "ready_file",
3102
+ "ready_file_required": True,
3103
+ "nonce_in_public_output": False,
3104
+ "nonce_forwarded_upstream": False,
3105
+ }
3106
+ payload["_diagnostic_ledger_write_path"] = diagnostic_ledger_write_path
3107
+ payload["forward_result"] = None
3108
+
3109
+ blockers = list(payload["review_plan"]["readiness_blockers"])
3110
+ if diagnostic_ledger_raw is not None and local_proxy_secret_like(diagnostic_ledger_raw):
3111
+ blockers.append("secret_like_diagnostic_ledger_path")
3112
+ if not payload["policy"]["runtime_gate_acknowledged"]:
3113
+ blockers.append("missing_runtime_gate_ack")
3114
+ if not forwarding_gate_ack_valid:
3115
+ blockers.append("invalid_forwarding_gate_ack")
3116
+ if not once_valid:
3117
+ blockers.append("invalid_once")
3118
+ if not forwarding_gate_ack:
3119
+ blockers.append("missing_forwarding_gate_ack")
3120
+ if not once:
3121
+ blockers.append("once_required_for_forwarding_mvp")
3122
+ if payload["bind"]["port"] <= 0:
3123
+ blockers.append("bind_port_required_for_listener")
3124
+ if payload["target"]["port"] <= 0:
3125
+ blockers.append("target_port_required_for_forwarding")
3126
+ if not bind_ip_literal:
3127
+ blockers.append("bind_host_must_be_loopback_ip_literal")
3128
+ if not target_ip_literal:
3129
+ blockers.append("target_host_must_be_loopback_ip_literal")
3130
+ if upstream_scheme and upstream_scheme != "http":
3131
+ blockers.append("unsupported_upstream_url_scheme")
3132
+ if not max_request_valid:
3133
+ blockers.append("invalid_max_request_bytes")
3134
+ if not max_response_valid:
3135
+ blockers.append("invalid_max_response_bytes")
3136
+ if not timeout_valid:
3137
+ blockers.append("invalid_timeout_seconds")
3138
+ blockers = list(dict.fromkeys(blockers))
3139
+ payload["review_plan"]["readiness_blockers"] = blockers
3140
+ payload["review_plan"]["next_steps"] = [
3141
+ "Use this MVP only for local loopback HTTP forwarding.",
3142
+ "Keep external forwarding, CONNECT tunneling, credential persistence, and hosted savings claims behind later gates.",
3143
+ "Use --once plus byte/time limits for bounded operation.",
3144
+ ]
3145
+ payload["status"] = "ready_to_serve" if not blockers else "blocked_until_local_proxy_forwarding_ready"
3146
+ return payload
3147
+
3148
+
3149
+ def local_proxy_forward_diagnostic_row(payload: dict[str, Any]) -> dict[str, Any]:
3150
+ result = payload.get("forward_result") or {}
3151
+ return {
3152
+ "schema_version": LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION,
3153
+ "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
3154
+ "experiment_id": "local-proxy",
3155
+ "mode": "serve",
3156
+ "proxy_label": payload["ledger_preview"]["proxy_label"],
3157
+ "bind": payload["bind"],
3158
+ "target": {
3159
+ "host": payload["target"]["host"],
3160
+ "port": payload["target"]["port"],
3161
+ "localhost_only": payload["target"]["localhost_only"],
3162
+ },
3163
+ "request": {
3164
+ "method": result.get("request_method"),
3165
+ "target_sha256": result.get("request_target_sha256"),
3166
+ "target_bytes": result.get("request_target_bytes", 0),
3167
+ "body_bytes": result.get("inbound_request_bytes", 0),
3168
+ "headers_persisted": False,
3169
+ "body_persisted": False,
3170
+ "credential_material_forwarded": False,
3171
+ },
3172
+ "response": {
3173
+ "upstream_status": result.get("upstream_status"),
3174
+ "upstream_response_bytes": result.get("upstream_response_bytes", 0),
3175
+ "body_persisted": False,
3176
+ },
3177
+ "runtime_limits": payload["runtime_limits"],
3178
+ "network_actions": payload["network_actions"],
3179
+ "policy": {
3180
+ "localhost_only": True,
3181
+ "literal_loopback_ip_only": True,
3182
+ "forwarded": bool(result.get("forwarded")),
3183
+ "api_key_persisted": False,
3184
+ "hidden_external_forwarding": False,
3185
+ "external_services_called": False,
3186
+ "dns_lookup_attempted": False,
3187
+ "connect_tunneling_allowed": False,
3188
+ "https_mitm_allowed": False,
3189
+ "hosted_api_token_savings_claim_allowed": False,
3190
+ "hosted_api_cost_savings_claim_allowed": False,
3191
+ },
3192
+ "shifted_cost_accounting": {
3193
+ "required": True,
3194
+ "local_proxy_request": True,
3195
+ "diagnostic_only": True,
3196
+ "provider_measured_matched_tasks_required_for_hosted_claims": True,
3197
+ },
3198
+ "claim_boundary": {
3199
+ "id": "local_proxy_forward_diagnostic_not_hosted_savings",
3200
+ "reason": "This row records one explicit literal-loopback forwarded request as shifted-cost diagnostic evidence only.",
3201
+ "hosted_api_token_savings_claim_allowed": False,
3202
+ "hosted_api_cost_savings_claim_allowed": False,
3203
+ },
3204
+ }
3205
+
3206
+
3207
+ def maybe_write_local_proxy_forward_diagnostic(payload: dict[str, Any]) -> None:
3208
+ ledger = payload.get("diagnostic_ledger")
3209
+ if not isinstance(ledger, dict) or not ledger.get("write_requested"):
3210
+ return
3211
+ if payload.get("status") != "served_once" or not (payload.get("forward_result") or {}).get("forwarded"):
3212
+ if ledger.get("reason") != "preflight_failed":
3213
+ ledger["reason"] = "not_forwarded"
3214
+ return
3215
+ row = local_proxy_forward_diagnostic_row(payload)
3216
+ write_path = payload.get("_diagnostic_ledger_write_path")
3217
+ if not write_path:
3218
+ ledger["reason"] = "not_requested"
3219
+ return
3220
+ bytes_written = append_jsonl_no_follow(Path(str(write_path)), row, label="local proxy forwarding diagnostic ledger")
3221
+ ledger["write_performed"] = True
3222
+ ledger["bytes_written"] = bytes_written
3223
+ ledger["reason"] = None
3224
+ ledger["row_preview"] = row
3225
+
3226
+
3227
+ def local_proxy_has_sensitive_headers(headers: Any) -> list[str]:
3228
+ found: list[str] = []
3229
+ for name, value in headers.items():
3230
+ lower = str(name).lower()
3231
+ if lower == LOCAL_PROXY_NONCE_HEADER.lower():
3232
+ # The per-run proxy nonce is a local client-auth secret delivered only
3233
+ # through the 0600 ready file. It is validated before this check and is
3234
+ # never forwarded upstream; do not let random nonce bytes
3235
+ # probabilistically trip the generic secret-like header detector.
3236
+ continue
3237
+ if lower in LOCAL_PROXY_SENSITIVE_HEADER_NAMES:
3238
+ found.append(lower)
3239
+ elif local_proxy_secret_like(name):
3240
+ found.append("redacted_sensitive_header")
3241
+ elif local_proxy_secret_like(value):
3242
+ found.append(lower)
3243
+ return sorted(set(found))
3244
+
3245
+
3246
+ def local_proxy_safe_forward_headers(headers: Any, *, target_host: str, target_port: int) -> dict[str, str]:
3247
+ return {
3248
+ "Host": f"{target_host}:{target_port}",
3249
+ "Connection": "close",
3250
+ }
3251
+
3252
+
3253
+ def local_proxy_response_headers(headers: Any) -> list[tuple[str, str]]:
3254
+ result: list[tuple[str, str]] = []
3255
+ for name, value in headers.items():
3256
+ lower = str(name).lower()
3257
+ if lower in LOCAL_PROXY_SENSITIVE_HEADER_NAMES or lower in LOCAL_PROXY_HOP_BY_HOP_HEADERS:
3258
+ continue
3259
+ if lower not in {"content-type"}:
3260
+ continue
3261
+ if local_proxy_secret_like(name) or local_proxy_secret_like(value):
3262
+ continue
3263
+ result.append((str(name), str(value)))
3264
+ return result
3265
+
3266
+
3267
+ def write_local_proxy_ready_file(path: str | None, *, bind_host: str, bind_port: int, auth_nonce: str) -> None:
3268
+ if not path:
3269
+ return
3270
+ ready_payload = {
3271
+ "schema_version": LOCAL_PROXY_READY_SCHEMA_VERSION,
3272
+ "experiment_id": "local-proxy",
3273
+ "mode": "serve",
3274
+ "status": "listener_ready",
3275
+ "diagnostic_only": True,
3276
+ "pid": os.getpid(),
3277
+ "bind": {
3278
+ "host": bind_host,
3279
+ "port": bind_port,
3280
+ },
3281
+ "client_auth": {
3282
+ "required": True,
3283
+ "type": "nonce_header",
3284
+ "header": LOCAL_PROXY_NONCE_HEADER,
3285
+ "nonce": auth_nonce,
3286
+ "forwarded_upstream": False,
3287
+ "public_output": False,
3288
+ },
3289
+ }
3290
+ data = json.dumps(ready_payload, sort_keys=True).encode("utf-8") + b"\n"
3291
+ write_regular_file_no_follow_exclusive(Path(path), data, label="local proxy ready file", mode=0o600)
3292
+
3293
+
3294
+ def serve_local_proxy_once(payload: dict[str, Any], *, ready_file: str | None = None) -> dict[str, Any]:
3295
+ bind_host = payload["bind"]["host"]
3296
+ bind_port = int(payload["bind"]["port"])
3297
+ target_host = payload["target"]["host"]
3298
+ target_port = int(payload["target"]["port"])
3299
+ limits = payload["runtime_limits"]
3300
+ max_request_bytes = int(limits["max_request_bytes"])
3301
+ max_response_bytes = int(limits["max_response_bytes"])
3302
+ timeout_seconds = float(limits["timeout_seconds"])
3303
+ auth_nonce = secrets.token_urlsafe(32)
3304
+ server_result: dict[str, Any] = {
3305
+ "served_once": False,
3306
+ "forwarded": False,
3307
+ "blocked_reason": None,
3308
+ "forward_attempted": False,
3309
+ "request_method": None,
3310
+ "request_target_sha256": None,
3311
+ "request_target_bytes": 0,
3312
+ "inbound_request_bytes": 0,
3313
+ "upstream_status": None,
3314
+ "upstream_response_bytes": 0,
3315
+ "downstream_status": None,
3316
+ "sensitive_headers_blocked": [],
3317
+ "listener_started": False,
3318
+ "ready_file_written": False,
3319
+ "client_auth_required": True,
3320
+ "client_auth_header": LOCAL_PROXY_NONCE_HEADER,
3321
+ "client_auth_delivered": False,
3322
+ "client_auth_nonce_forwarded": False,
3323
+ "auth_failures": 0,
3324
+ }
3325
+
3326
+ def finish_blocked(
3327
+ handler: BaseHTTPRequestHandler,
3328
+ status_code: int,
3329
+ reason: str,
3330
+ *,
3331
+ sensitive: list[str] | None = None,
3332
+ consume_once: bool = True,
3333
+ ) -> None:
3334
+ updates = {
3335
+ "forwarded": False,
3336
+ "blocked_reason": reason,
3337
+ "downstream_status": status_code,
3338
+ "sensitive_headers_blocked": sorted(set(sensitive or [])),
3339
+ }
3340
+ if consume_once:
3341
+ updates["served_once"] = True
3342
+ server_result.update(updates)
3343
+ body = json.dumps({"status": "blocked", "reason": reason}, sort_keys=True).encode("utf-8")
3344
+ handler.send_response(status_code)
3345
+ handler.send_header("Content-Type", "application/json")
3346
+ handler.send_header("Content-Length", str(len(body)))
3347
+ handler.send_header("Connection", "close")
3348
+ handler.end_headers()
3349
+ if handler.command != "HEAD":
3350
+ handler.wfile.write(body)
3351
+
3352
+ class LocalProxyHandler(BaseHTTPRequestHandler):
3353
+ server_version = "ContextGuardLocalProxy/0"
3354
+ protocol_version = "HTTP/1.1"
3355
+
3356
+ def log_message(self, format: str, *args: Any) -> None: # noqa: A002 - BaseHTTPRequestHandler API.
3357
+ return
3358
+
3359
+ def authorize_request(self) -> bool:
3360
+ values = self.headers.get_all(LOCAL_PROXY_NONCE_HEADER, [])
3361
+ if len(values) == 0:
3362
+ server_result["auth_failures"] = int(server_result.get("auth_failures", 0)) + 1
3363
+ finish_blocked(self, 403, "missing_proxy_nonce", consume_once=False)
3364
+ return False
3365
+ if len(values) != 1:
3366
+ server_result["auth_failures"] = int(server_result.get("auth_failures", 0)) + 1
3367
+ finish_blocked(self, 403, "duplicate_proxy_nonce", consume_once=False)
3368
+ return False
3369
+ candidate = str(values[0])
3370
+ if not secrets.compare_digest(candidate, auth_nonce):
3371
+ server_result["auth_failures"] = int(server_result.get("auth_failures", 0)) + 1
3372
+ finish_blocked(self, 403, "invalid_proxy_nonce", consume_once=False)
3373
+ return False
3374
+ return True
3375
+
3376
+ def do_CONNECT(self) -> None:
3377
+ server_result["request_method"] = "CONNECT"
3378
+ server_result.update(local_proxy_request_target_meta(self.path))
3379
+ if not self.authorize_request():
3380
+ return
3381
+ finish_blocked(self, 405, "connect_tunneling_not_allowed")
3382
+
3383
+ def do_HEAD(self) -> None:
3384
+ self.forward_request()
3385
+
3386
+ def do_GET(self) -> None:
3387
+ self.forward_request()
3388
+
3389
+ def do_POST(self) -> None:
3390
+ self.block_method()
3391
+
3392
+ def do_PUT(self) -> None:
3393
+ self.block_method()
3394
+
3395
+ def do_PATCH(self) -> None:
3396
+ self.block_method()
3397
+
3398
+ def block_method(self) -> None:
3399
+ server_result["request_method"] = self.command
3400
+ server_result.update(local_proxy_request_target_meta(self.path))
3401
+ if not self.authorize_request():
3402
+ return
3403
+ finish_blocked(self, 405, "method_not_allowed")
3404
+
3405
+ def do_DELETE(self) -> None:
3406
+ self.block_method()
3407
+
3408
+ def do_OPTIONS(self) -> None:
3409
+ self.block_method()
3410
+
3411
+ def do_TRACE(self) -> None:
3412
+ self.block_method()
3413
+
3414
+ def forward_request(self) -> None:
3415
+ server_result["request_method"] = self.command
3416
+ server_result.update(local_proxy_request_target_meta(self.path))
3417
+ if not self.authorize_request():
3418
+ return
3419
+ if local_proxy_secret_like(self.path):
3420
+ finish_blocked(self, 400, "secret_like_request_target")
3421
+ return
3422
+ parsed_target = urlparse(self.path)
3423
+ if parsed_target.scheme or parsed_target.netloc:
3424
+ finish_blocked(self, 400, "absolute_proxy_url_not_allowed")
3425
+ return
3426
+ if str(self.headers.get("Transfer-Encoding", "")).strip():
3427
+ finish_blocked(self, 400, "transfer_encoding_not_allowed")
3428
+ return
3429
+ sensitive_headers = local_proxy_has_sensitive_headers(self.headers)
3430
+ if sensitive_headers:
3431
+ finish_blocked(self, 403, "sensitive_request_headers_blocked", sensitive=sensitive_headers)
3432
+ return
3433
+ raw_length = self.headers.get("Content-Length")
3434
+ try:
3435
+ content_length = int(raw_length) if raw_length else 0
3436
+ except ValueError:
3437
+ finish_blocked(self, 400, "invalid_content_length")
3438
+ return
3439
+ if content_length < 0 or content_length > max_request_bytes:
3440
+ finish_blocked(self, 413, "request_body_exceeds_limit")
3441
+ return
3442
+ if content_length:
3443
+ finish_blocked(self, 400, "request_body_not_allowed_for_forwarding_mvp")
3444
+ return
3445
+ body = self.rfile.read(content_length) if content_length else b""
3446
+ server_result["inbound_request_bytes"] = len(body)
3447
+ path = self.path if self.path.startswith("/") else f"/{self.path}"
3448
+ conn = http.client.HTTPConnection(target_host, target_port, timeout=timeout_seconds)
3449
+ try:
3450
+ server_result["forward_attempted"] = True
3451
+ conn.request(
3452
+ self.command,
3453
+ path,
3454
+ body=body,
3455
+ headers=local_proxy_safe_forward_headers(self.headers, target_host=target_host, target_port=target_port),
3456
+ )
3457
+ response = conn.getresponse()
3458
+ response_body = response.read(max_response_bytes + 1)
3459
+ if len(response_body) > max_response_bytes:
3460
+ finish_blocked(self, 502, "upstream_response_exceeds_limit")
3461
+ return
3462
+ if local_proxy_bytes_secret_like(response_body):
3463
+ finish_blocked(self, 502, "upstream_response_sensitive_content_blocked")
3464
+ return
3465
+ self.send_response(response.status, response.reason)
3466
+ for header_name, header_value in local_proxy_response_headers(response.headers):
3467
+ self.send_header(header_name, header_value)
3468
+ self.send_header("Content-Length", str(len(response_body)))
3469
+ self.send_header("Connection", "close")
3470
+ self.end_headers()
3471
+ if self.command != "HEAD":
3472
+ self.wfile.write(response_body)
3473
+ server_result.update({
3474
+ "served_once": True,
3475
+ "forwarded": True,
3476
+ "blocked_reason": None,
3477
+ "upstream_status": response.status,
3478
+ "upstream_response_bytes": len(response_body),
3479
+ "downstream_status": response.status,
3480
+ })
3481
+ except (OSError, http.client.HTTPException, TimeoutError) as exc:
3482
+ finish_blocked(self, 502, "upstream_forward_error")
3483
+ server_result["error"] = sanitize_local_proxy_value(str(exc))
3484
+ finally:
3485
+ conn.close()
3486
+
3487
+ address_family = socket.AF_INET6 if ":" in bind_host else socket.AF_INET
3488
+ class LocalProxyHTTPServer(HTTPServer):
3489
+ def server_bind(self) -> None:
3490
+ TCPServer.server_bind(self)
3491
+ host, port = self.server_address[:2]
3492
+ self.server_name = str(host)
3493
+ self.server_port = int(port)
3494
+
3495
+ def get_request(self) -> tuple[Any, Any]:
3496
+ request, client_address = super().get_request()
3497
+ request.settimeout(timeout_seconds)
3498
+ return request, client_address
3499
+
3500
+ LocalProxyHTTPServer.address_family = address_family
3501
+ try:
3502
+ httpd = LocalProxyHTTPServer((bind_host, bind_port), LocalProxyHandler)
3503
+ except OSError as exc:
3504
+ raise RegistryError(f"could not start local proxy listener: {os_error_detail(exc)}") from exc
3505
+ httpd.timeout = timeout_seconds
3506
+ try:
3507
+ try:
3508
+ write_local_proxy_ready_file(ready_file, bind_host=bind_host, bind_port=bind_port, auth_nonce=auth_nonce)
3509
+ server_result["ready_file_written"] = bool(ready_file)
3510
+ server_result["client_auth_delivered"] = bool(ready_file)
3511
+ server_result["listener_started"] = True
3512
+ except RegistryError as exc:
3513
+ server_result.update({
3514
+ "served_once": False,
3515
+ "forwarded": False,
3516
+ "blocked_reason": "ready_file_write_failed",
3517
+ "downstream_status": None,
3518
+ "error": sanitize_local_proxy_value(str(exc)),
3519
+ })
3520
+ return server_result
3521
+ deadline = time.monotonic() + timeout_seconds
3522
+ while not server_result["served_once"]:
3523
+ remaining = deadline - time.monotonic()
3524
+ if remaining <= 0:
3525
+ break
3526
+ httpd.timeout = max(0.001, min(timeout_seconds, remaining))
3527
+ httpd.handle_request()
3528
+ if not server_result["served_once"] and not server_result.get("blocked_reason"):
3529
+ server_result.update({
3530
+ "blocked_reason": "timeout_waiting_for_request",
3531
+ "downstream_status": None,
3532
+ })
3533
+ finally:
3534
+ httpd.server_close()
3535
+ return server_result
3536
+
3537
+
3538
+ def command_serve_local_proxy(args: argparse.Namespace) -> int:
3539
+ payload = local_proxy_forward_payload(args)
3540
+ diagnostic_ledger = payload.get("diagnostic_ledger") if isinstance(payload.get("diagnostic_ledger"), dict) else {}
3541
+ if payload["status"] == "ready_to_serve" and not args.ready_file:
3542
+ payload["status"] = "blocked_until_local_proxy_forwarding_ready"
3543
+ payload["review_plan"]["readiness_blockers"].append("missing_ready_file_for_proxy_nonce")
3544
+ diagnostic_ledger["reason"] = "not_forwarded" if diagnostic_ledger.get("write_requested") else diagnostic_ledger.get("reason")
3545
+ if payload["status"] == "ready_to_serve" and diagnostic_ledger.get("write_requested"):
3546
+ try:
3547
+ preflight_append_jsonl_no_follow(
3548
+ Path(str(payload.get("_diagnostic_ledger_write_path"))),
3549
+ label="local proxy forwarding diagnostic ledger",
3550
+ )
3551
+ except RegistryError as exc:
3552
+ payload["status"] = "blocked_until_local_proxy_forwarding_ready"
3553
+ payload["review_plan"]["readiness_blockers"].append("diagnostic_ledger_preflight_failed")
3554
+ diagnostic_ledger["reason"] = "preflight_failed"
3555
+ diagnostic_ledger["error"] = sanitize_local_proxy_value(str(exc))
3556
+ if payload["status"] == "ready_to_serve":
3557
+ result = serve_local_proxy_once(payload, ready_file=args.ready_file)
3558
+ payload["forward_result"] = result
3559
+ payload["network_actions"]["listener_started"] = bool(result.get("listener_started"))
3560
+ payload["network_actions"]["outbound_forwarding_attempted"] = bool(result["forward_attempted"])
3561
+ payload["network_actions"]["dns_lookup_attempted"] = False
3562
+ payload["network_actions"]["external_services_called"] = False
3563
+ payload["policy"]["listener_started"] = bool(result.get("listener_started"))
3564
+ payload["policy"]["traffic_forwarded"] = bool(result["forwarded"])
3565
+ if result["forwarded"]:
3566
+ payload["status"] = "served_once"
3567
+ elif result.get("blocked_reason") == "ready_file_write_failed":
3568
+ payload["status"] = "blocked_until_local_proxy_forwarding_ready"
3569
+ payload["review_plan"]["readiness_blockers"].append("ready_file_write_failed")
3570
+ else:
3571
+ payload["status"] = "blocked_request"
3572
+ maybe_write_local_proxy_forward_diagnostic(payload)
3573
+ payload.pop("_diagnostic_ledger_write_path", None)
3574
+ if args.json:
3575
+ emit_json(payload)
3576
+ else:
3577
+ if payload["status"] == "served_once":
3578
+ print("ContextGuard local proxy served one loopback request")
3579
+ else:
3580
+ print("ContextGuard local proxy serve blocked")
3581
+ print(f"Status: {payload['status']}")
3582
+ if payload["review_plan"]["readiness_blockers"]:
3583
+ print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
3584
+ if payload.get("forward_result") and payload["forward_result"].get("blocked_reason"):
3585
+ print(f"Request blocker: {payload['forward_result']['blocked_reason']}")
3586
+ print(payload["claim_boundary"])
3587
+ return 0 if payload["status"] == "served_once" else 1
3588
+
3589
+
1923
3590
  LEARNED_CODE_FENCE_RE = re.compile(r"(?m)^\s*(?:```|~~~)")
1924
3591
  LEARNED_DIFF_RE = re.compile(r"(?m)^\s*(diff --git |@@\s+-|--- |\+\+\+ |[+-].*)")
1925
3592
  LEARNED_IDENTIFIER_RE = re.compile(
@@ -2072,6 +3739,104 @@ def valid_learned_reexpand_command(receipt_id: str | None, command: str | None)
2072
3739
  return False, "invalid_reexpand_command"
2073
3740
 
2074
3741
 
3742
+ def verify_learned_fallback_artifact(
3743
+ receipt_id: str | None,
3744
+ *,
3745
+ expected_sha256: str,
3746
+ expected_bytes: int,
3747
+ ) -> tuple[bool, str | None, dict[str, Any]]:
3748
+ if not receipt_id or not LEARNED_ARTIFACT_ID_RE.fullmatch(receipt_id):
3749
+ return False, "invalid_reexpand_command", {"checked": False, "read_directories": []}
3750
+ read_dirs = context_diff_artifact_read_dirs()
3751
+ details: dict[str, Any] = {
3752
+ "checked": True,
3753
+ "read_directories": [str(path) for path in read_dirs],
3754
+ "matched_directory": None,
3755
+ "content_sha256": None,
3756
+ "content_bytes": None,
3757
+ }
3758
+ for directory in read_dirs:
3759
+ content_path, meta_path = context_diff_artifact_paths(directory, receipt_id)
3760
+ meta_loaded = read_bounded_regular_file(
3761
+ meta_path,
3762
+ max_bytes=MAX_LEARNED_COMPRESSION_ARTIFACT_METADATA_BYTES,
3763
+ label="learned-compression fallback metadata",
3764
+ missing_ok=True,
3765
+ )
3766
+ content_loaded = read_bounded_regular_file(
3767
+ content_path,
3768
+ max_bytes=max(MAX_LEARNED_COMPRESSION_INPUT_BYTES, expected_bytes),
3769
+ label="learned-compression fallback content",
3770
+ missing_ok=True,
3771
+ )
3772
+ if meta_loaded is None and content_loaded is None:
3773
+ continue
3774
+ if meta_loaded is None or content_loaded is None:
3775
+ return False, "fallback_receipt_invalid", details
3776
+ meta_raw, meta_truncated = meta_loaded
3777
+ content_raw, content_truncated = content_loaded
3778
+ if meta_truncated or content_truncated:
3779
+ return False, "fallback_receipt_invalid", details
3780
+ try:
3781
+ metadata = json.loads(meta_raw.decode("utf-8"))
3782
+ except (UnicodeDecodeError, json.JSONDecodeError):
3783
+ return False, "fallback_receipt_invalid", details
3784
+ if not isinstance(metadata, dict) or metadata.get("artifact_id") != receipt_id:
3785
+ return False, "fallback_receipt_invalid", details
3786
+ stored = metadata.get("stored_output")
3787
+ stored_sha = stored.get("sha256") if isinstance(stored, dict) else None
3788
+ stored_bytes = stored.get("bytes") if isinstance(stored, dict) else None
3789
+ actual_sha = hashlib.sha256(content_raw).hexdigest()
3790
+ actual_bytes = len(content_raw)
3791
+ details.update({
3792
+ "matched_directory": str(directory),
3793
+ "content_sha256": actual_sha,
3794
+ "content_bytes": actual_bytes,
3795
+ })
3796
+ if stored_sha != actual_sha or stored_bytes != actual_bytes:
3797
+ return False, "fallback_receipt_invalid", details
3798
+ if actual_sha != expected_sha256 or actual_bytes != expected_bytes:
3799
+ return False, "fallback_content_mismatch", details
3800
+ return True, None, details
3801
+ return False, "fallback_receipt_not_found", details
3802
+
3803
+
3804
+ def read_learned_candidate_replacement(args: argparse.Namespace) -> tuple[str | None, dict[str, Any]]:
3805
+ if args.replacement_text is not None and args.replacement_file:
3806
+ raise RegistryError("learned-compression emit accepts only one of --replacement-text or --replacement-file")
3807
+ if args.replacement_text is not None:
3808
+ text = str(args.replacement_text)
3809
+ raw = text.encode("utf-8")
3810
+ truncated = len(raw) > MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES
3811
+ raw = raw[:MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES]
3812
+ text = raw.decode("utf-8", errors="replace")
3813
+ source_label = "inline"
3814
+ elif args.replacement_file:
3815
+ path = Path(args.replacement_file)
3816
+ loaded = read_bounded_regular_file(
3817
+ path,
3818
+ max_bytes=MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES,
3819
+ label="learned-compression candidate replacement",
3820
+ )
3821
+ assert loaded is not None
3822
+ raw, truncated = loaded
3823
+ text = raw.decode("utf-8", errors="replace")
3824
+ source_label = path.name
3825
+ else:
3826
+ text = None
3827
+ raw = b""
3828
+ truncated = False
3829
+ source_label = None
3830
+ return text, {
3831
+ "source_label": source_label,
3832
+ "bytes": len(raw),
3833
+ "lines": len(text.splitlines()) if text is not None else 0,
3834
+ "sha256": hashlib.sha256(raw).hexdigest() if text is not None else None,
3835
+ "truncated": truncated,
3836
+ "max_bytes": MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES,
3837
+ }
3838
+
3839
+
2075
3840
  def learned_compression_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
2076
3841
  text, input_meta = read_learned_input(args)
2077
3842
  receipt_id = args.exact_fallback_receipt.strip() if args.exact_fallback_receipt else None
@@ -2165,6 +3930,133 @@ def command_plan_learned_compression(args: argparse.Namespace) -> int:
2165
3930
  return 0
2166
3931
 
2167
3932
 
3933
+ def learned_compression_emit_payload(args: argparse.Namespace) -> dict[str, Any]:
3934
+ payload = learned_compression_plan_payload(args)
3935
+ receipt_id = args.exact_fallback_receipt.strip() if args.exact_fallback_receipt else None
3936
+ reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
3937
+ reexpand_valid, _fallback_blocker = valid_learned_reexpand_command(receipt_id, reexpand_command)
3938
+ fallback_verified = False
3939
+ fallback_blocker = None
3940
+ fallback_verification: dict[str, Any] = {"checked": False, "read_directories": []}
3941
+ if reexpand_valid:
3942
+ fallback_verified, fallback_blocker, fallback_verification = verify_learned_fallback_artifact(
3943
+ receipt_id,
3944
+ expected_sha256=payload["input"]["sha256"],
3945
+ expected_bytes=payload["input"]["bytes"],
3946
+ )
3947
+
3948
+ candidate_text, candidate_meta = read_learned_candidate_replacement(args)
3949
+ candidate_counts = learned_signal_counts(candidate_text or "")
3950
+ candidate_content_type = learned_content_type(candidate_text or "", candidate_counts)
3951
+
3952
+ blockers = list(payload["review_plan"]["readiness_blockers"])
3953
+ if fallback_blocker:
3954
+ blockers.append(fallback_blocker)
3955
+ if candidate_text is None or not candidate_text.strip():
3956
+ blockers.append("missing_candidate_replacement")
3957
+ if candidate_meta["truncated"]:
3958
+ blockers.append("candidate_replacement_truncated")
3959
+ if (
3960
+ candidate_text is not None
3961
+ and not candidate_meta["truncated"]
3962
+ and candidate_meta["bytes"] >= payload["input"]["bytes"]
3963
+ ):
3964
+ blockers.append("candidate_not_smaller_than_input")
3965
+ if candidate_text is not None and candidate_text.strip() and candidate_content_type != "prose":
3966
+ blockers.append("candidate_non_prose_input")
3967
+ for blocker, count in candidate_counts.items():
3968
+ if count:
3969
+ blockers.append(f"candidate_{blocker}")
3970
+ blockers = list(dict.fromkeys(blockers))
3971
+ ready = not blockers
3972
+
3973
+ payload["mode"] = "emit"
3974
+ payload["status"] = "candidate_emitted" if ready else "blocked_until_candidate_ready"
3975
+ payload["policy"] = dict(payload["policy"])
3976
+ payload["policy"].update({
3977
+ "runtime_compression_allowed": False,
3978
+ "caller_supplied_candidate_required": True,
3979
+ "caller_supplied_candidate_allowed": ready,
3980
+ "lossy_replacement_allowed": ready,
3981
+ "learned_compressor_called": False,
3982
+ "embedding_or_reranker_called": False,
3983
+ "model_call_allowed": False,
3984
+ "subprocess_allowed": False,
3985
+ })
3986
+ payload["exact_fallback"] = {
3987
+ "required": True,
3988
+ "available": bool(receipt_id and reexpand_command and reexpand_valid and fallback_verified),
3989
+ "receipt_id": receipt_id,
3990
+ "cli": reexpand_command,
3991
+ "verified": fallback_verified,
3992
+ "valid_command_shape": reexpand_valid,
3993
+ "verification": fallback_verification,
3994
+ "note": "Emit mode validates exact local fallback command shape and verifies local artifact content matches the input prose.",
3995
+ }
3996
+ payload["candidate_scan"] = {
3997
+ "content_type": candidate_content_type,
3998
+ "counts": candidate_counts,
3999
+ "protected_signals": [name for name, count in candidate_counts.items() if count],
4000
+ }
4001
+ payload["replacement"] = candidate_meta
4002
+ payload["review_plan"]["readiness_blockers"] = blockers
4003
+ payload["review_plan"]["protected_signals"] = [name for name, count in payload["protected_signal_scan"]["counts"].items() if count]
4004
+ payload["review_plan"]["candidate_protected_signals"] = [
4005
+ name for name, count in candidate_counts.items() if count
4006
+ ]
4007
+ payload["review_plan"]["next_steps"] = [
4008
+ "Human-review the caller-supplied candidate against the exact fallback before using it.",
4009
+ "Reject candidates that omit protected facts, prompt-like text, paths, code, diffs, identifiers, or numeric constants.",
4010
+ "Treat byte reduction as local proxy evidence only; do not claim hosted token/cost savings.",
4011
+ ]
4012
+ payload["claim_boundary"] = (
4013
+ "Explicit local learned-compression candidate emission only; ContextGuard does not run a learned compressor, "
4014
+ "model, embedding, reranker, subprocess, or external service, and byte reduction is not hosted API token or cost evidence."
4015
+ )
4016
+ bytes_after = candidate_meta["bytes"] if candidate_text is not None else 0
4017
+ payload["compression_evidence"] = {
4018
+ "bytes_before": payload["input"]["bytes"],
4019
+ "bytes_after": bytes_after,
4020
+ "byte_reduction": max(0, payload["input"]["bytes"] - bytes_after),
4021
+ "byte_reduction_proxy_only": True,
4022
+ "hosted_api_token_savings_claim_allowed": False,
4023
+ "hosted_api_cost_savings_claim_allowed": False,
4024
+ }
4025
+ if ready and candidate_text is not None:
4026
+ payload["candidate_replacement"] = {
4027
+ "text": candidate_text,
4028
+ "bytes": candidate_meta["bytes"],
4029
+ "lines": candidate_meta["lines"],
4030
+ "sha256": candidate_meta["sha256"],
4031
+ "source_label": candidate_meta["source_label"],
4032
+ "caller_supplied": True,
4033
+ }
4034
+ else:
4035
+ payload.pop("candidate_replacement", None)
4036
+ return payload
4037
+
4038
+
4039
+ def command_emit_learned_compression(args: argparse.Namespace) -> int:
4040
+ payload = learned_compression_emit_payload(args)
4041
+ if args.json:
4042
+ emit_json(payload)
4043
+ else:
4044
+ if payload["status"] == "candidate_emitted":
4045
+ print("ContextGuard learned-compression candidate emitted")
4046
+ print(
4047
+ f"Candidate: bytes={payload['replacement']['bytes']} "
4048
+ f"sha256={payload['replacement']['sha256']}"
4049
+ )
4050
+ print(f"Exact fallback: {payload['exact_fallback']['cli']}")
4051
+ else:
4052
+ print("ContextGuard learned-compression candidate blocked")
4053
+ print(f"Status: {payload['status']}")
4054
+ if payload["review_plan"]["readiness_blockers"]:
4055
+ print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
4056
+ print(payload["claim_boundary"])
4057
+ return 0 if payload["status"] == "candidate_emitted" else 1
4058
+
4059
+
2168
4060
  def add_common_args(parser: argparse.ArgumentParser) -> None:
2169
4061
  parser.add_argument("--root", help="Project root for default project-local experiment config (default: cwd).")
2170
4062
  parser.add_argument("--config", help="Project-local config path. Relative paths resolve under --root; absolute paths must stay inside --root.")
@@ -2283,6 +4175,34 @@ def build_parser() -> argparse.ArgumentParser:
2283
4175
  local_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
2284
4176
  local_proxy.set_defaults(func=command_plan_local_proxy)
2285
4177
 
4178
+ external_proxy = plan_sub.add_parser(
4179
+ "local-proxy-external-forwarding",
4180
+ help="Dry-run an external-forwarding opt-in design gate without forwarding traffic.",
4181
+ )
4182
+ external_proxy.add_argument(
4183
+ "--external-forwarding-intent",
4184
+ action="store_true",
4185
+ help="Acknowledge intent to design a future external-forwarding proxy surface.",
4186
+ )
4187
+ external_proxy.add_argument(
4188
+ "--external-forwarding-design-ack",
4189
+ action="store_true",
4190
+ help="Acknowledge this command is design-only and does not enable external forwarding.",
4191
+ )
4192
+ external_proxy.add_argument("--allow-host", action="append", help="Explicit non-wildcard public host allowed by the future design. Repeatable.")
4193
+ external_proxy.add_argument("--allow-scheme", action="append", help="Allowed scheme for the future design; HTTPS is required. Repeatable.")
4194
+ external_proxy.add_argument("--threat-model-note", action="append", help="Threat-model note for the future external-forwarding design. Repeatable.")
4195
+ external_proxy.add_argument(
4196
+ "--credential-redaction-policy",
4197
+ help=f"Required policy: {LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY}.",
4198
+ )
4199
+ external_proxy.add_argument(
4200
+ "--provider-evidence-boundary",
4201
+ help=f"Required policy: {LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY}.",
4202
+ )
4203
+ external_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
4204
+ external_proxy.set_defaults(func=command_plan_local_proxy_external_forwarding)
4205
+
2286
4206
  learned = plan_sub.add_parser(
2287
4207
  "learned-compression",
2288
4208
  help="Dry-run a deny-by-default learned/synthetic compression safety gate.",
@@ -2296,6 +4216,176 @@ def build_parser() -> argparse.ArgumentParser:
2296
4216
  learned.add_argument("--json", action="store_true", help="Emit JSON output.")
2297
4217
  learned.set_defaults(func=command_plan_learned_compression)
2298
4218
 
4219
+ emit_parser = sub.add_parser("emit", help="Emit explicit local runtime outputs for experimental lanes.")
4220
+ emit_sub = emit_parser.add_subparsers(dest="emit_command", required=True)
4221
+ emit_context_diff = emit_sub.add_parser(
4222
+ "context-diff-compaction",
4223
+ help="Emit a caller-supplied compact diff replacement only with exact retrieval metadata.",
4224
+ )
4225
+ emit_context_diff.add_argument("--input", help="Read original diff text from a file instead of stdin.")
4226
+ emit_context_diff.add_argument("--source-label", help="Safe label to use for the diff input source in reports.")
4227
+ emit_context_diff.add_argument("--receipt-id", required=True, help="Exact local artifact receipt id for the original diff.")
4228
+ emit_context_diff.add_argument("--reexpand-command", required=True, help="Exact command that restores the original diff.")
4229
+ replacement_group = emit_context_diff.add_mutually_exclusive_group(required=True)
4230
+ replacement_group.add_argument("--replacement-text", help="Caller-supplied compact replacement text to emit.")
4231
+ replacement_group.add_argument("--replacement-file", help="Read caller-supplied compact replacement text from a file.")
4232
+ emit_context_diff.add_argument("--json", action="store_true", help="Emit JSON output.")
4233
+ emit_context_diff.set_defaults(func=command_emit_context_diff_compaction)
4234
+
4235
+ emit_visual_ocr = emit_sub.add_parser(
4236
+ "visual-crop-ocr",
4237
+ help="Emit a caller-supplied visual crop/OCR evidence pack without image/OCR services.",
4238
+ )
4239
+ emit_visual_ocr.add_argument("--full-evidence-receipt", help="User-supplied receipt/id for the original full visual evidence.")
4240
+ emit_visual_ocr.add_argument("--full-evidence-label", help="Safe label for the full visual evidence.")
4241
+ emit_visual_ocr.add_argument("--crop-label", help="Safe label for the cropped region or crop fixture.")
4242
+ emit_visual_ocr.add_argument("--crop-bounds", help="Crop bounds as x,y,width,height integers.")
4243
+ emit_visual_ocr.add_argument("--image-size", help="Original image size as width,height integers.")
4244
+ emit_visual_ocr.add_argument("--ocr-text", help="Bounded OCR fixture text supplied inline.")
4245
+ emit_visual_ocr.add_argument("--ocr-text-file", help="Read bounded OCR fixture text from a UTF-8 text file.")
4246
+ emit_visual_ocr.add_argument("--ocr-source-label", help="Safe label for OCR text source; defaults to inline or file basename.")
4247
+ emit_visual_ocr.add_argument("--ocr-confidence", help="OCR confidence as a finite decimal from 0.0 to 1.0.")
4248
+ emit_visual_ocr.add_argument("--ocr-error-note", action="append", help="Known OCR error/uncertainty note. Repeatable.")
4249
+ emit_visual_ocr.add_argument("--missed-context-note", action="append", help="Potential context outside crop/OCR text. Repeatable.")
4250
+ emit_visual_ocr.add_argument("--json", action="store_true", help="Emit JSON output.")
4251
+ emit_visual_ocr.set_defaults(func=command_emit_visual_crop_ocr)
4252
+
4253
+ emit_learned = emit_sub.add_parser(
4254
+ "learned-compression",
4255
+ help="Emit a caller-supplied compact prose candidate only with verified exact fallback.",
4256
+ )
4257
+ emit_learned.add_argument("--input", help="Read original prose text from a file instead of stdin.")
4258
+ emit_learned.add_argument("--source-label", help="Safe label to use for the input source in reports.")
4259
+ emit_learned.add_argument("--sanitized", action="store_true", help="Assert input is already sanitized.")
4260
+ emit_learned.add_argument("--trusted-source", action="store_true", help="Assert input came from a trusted source.")
4261
+ emit_learned.add_argument("--exact-fallback-receipt", required=True, help="Local exact fallback receipt id for the original text.")
4262
+ emit_learned.add_argument("--reexpand-command", required=True, help="Local exact re-expand command bound to the receipt id.")
4263
+ learned_replacement_group = emit_learned.add_mutually_exclusive_group(required=True)
4264
+ learned_replacement_group.add_argument("--replacement-text", help="Caller-supplied compact prose candidate to emit.")
4265
+ learned_replacement_group.add_argument("--replacement-file", help="Read caller-supplied compact prose candidate from a file.")
4266
+ emit_learned.add_argument("--json", action="store_true", help="Emit JSON output.")
4267
+ emit_learned.set_defaults(func=command_emit_learned_compression)
4268
+
4269
+ record_parser = sub.add_parser("record", help="Run explicit local runtime recorders for experimental lanes.")
4270
+ record_sub = record_parser.add_subparsers(dest="record_command", required=True)
4271
+ record_self_hosted = record_sub.add_parser(
4272
+ "self-hosted-metrics-ledger",
4273
+ help="Append one self-hosted/local metrics sidecar row to a JSONL ledger.",
4274
+ )
4275
+ record_self_hosted.add_argument("--ledger-jsonl", required=True, help="Local JSONL ledger path to append.")
4276
+ record_self_hosted.add_argument("--input", help="Read an explicit self_hosted_metrics JSON envelope from a file instead of stdin.")
4277
+ record_self_hosted.add_argument("--source-label", help="Safe label to use for the input source in reports.")
4278
+ record_self_hosted.add_argument("--latency-ms", type=float, default=None, help="Local/model-server latency in milliseconds.")
4279
+ record_self_hosted.add_argument("--peak-memory-mb", type=float, default=None, help="Peak local/model-server memory in MiB/MB.")
4280
+ record_self_hosted.add_argument("--quality-score", type=float, default=None, help="Quality score from 0.0 to 1.0.")
4281
+ record_self_hosted.add_argument("--energy-wh", type=float, default=None, help="Diagnostic local energy use in watt-hours.")
4282
+ record_self_hosted.add_argument("--local-cost-usd", type=float, default=None, help="Diagnostic local/self-hosted cost in USD.")
4283
+ record_self_hosted.add_argument("--tokens-per-second", type=float, default=None, help="Diagnostic local throughput.")
4284
+ record_self_hosted.add_argument("--model-server", help="Sanitized label for local model server/runtime.")
4285
+ record_self_hosted.add_argument("--optimization", help="Sanitized label for the local optimization under test.")
4286
+ record_self_hosted.add_argument("--quality-metric", help="Sanitized label for quality metric.")
4287
+ record_self_hosted.add_argument("--hardware", help="Sanitized local hardware label.")
4288
+ record_self_hosted.add_argument("--runtime", help="Sanitized local runtime label.")
4289
+ record_self_hosted.add_argument("--dataset", help="Sanitized dataset label.")
4290
+ record_self_hosted.add_argument("--task-id", default="self-hosted-metrics-manual", help="Sanitized task id for the ledger row.")
4291
+ record_self_hosted.add_argument("--variant", default="self-hosted-metrics-ledger", help="Sanitized variant label for the ledger row.")
4292
+ record_self_hosted.add_argument(
4293
+ "--success",
4294
+ choices=("true", "false", "unknown"),
4295
+ default="unknown",
4296
+ help="Optional success value for the local run; unknown writes JSON null.",
4297
+ )
4298
+ record_self_hosted.add_argument(
4299
+ "--notes",
4300
+ default="explicit self-hosted metrics record; no hosted API savings claim",
4301
+ help="Sanitized note for the ledger row.",
4302
+ )
4303
+ record_self_hosted.add_argument("--json", action="store_true", help="Emit JSON output.")
4304
+ record_self_hosted.set_defaults(func=command_record_self_hosted_metrics_ledger)
4305
+
4306
+ record_local_proxy = record_sub.add_parser(
4307
+ "local-proxy-runtime-gate",
4308
+ help="Append one localhost-only local proxy runtime-gate row without starting a proxy.",
4309
+ )
4310
+ record_local_proxy.add_argument("--input", help="Read a local_proxy JSON envelope from a file instead of CLI flags.")
4311
+ record_local_proxy.add_argument("--bind-host", help="Advisory bind host; must be localhost/loopback.")
4312
+ record_local_proxy.add_argument("--bind-port", default=None, help="Advisory bind port; 0 means unspecified/ephemeral.")
4313
+ record_local_proxy.add_argument("--target-host", help="Advisory target host; must be localhost/loopback.")
4314
+ record_local_proxy.add_argument("--target-port", default=None, help="Advisory target port; 0 means unspecified.")
4315
+ record_local_proxy.add_argument("--upstream-url", help="Advisory upstream URL; host must be localhost/loopback.")
4316
+ record_local_proxy.add_argument("--ledger-jsonl", required=True, help="Local JSONL ledger path to append the gate row.")
4317
+ record_local_proxy.add_argument("--proxy-label", help="Safe label for this local proxy gate record.")
4318
+ record_local_proxy.add_argument("--api-key", help="Blocked/redacted API key material; never persisted or emitted raw.")
4319
+ record_local_proxy.add_argument("--authorization-header", help="Blocked/redacted Authorization header; never persisted or emitted raw.")
4320
+ record_local_proxy.add_argument("--persist-api-key", action="store_true", help="Declare API-key persistence intent; blocked.")
4321
+ record_local_proxy.add_argument(
4322
+ "--external-forwarding-intent",
4323
+ action="store_true",
4324
+ help="Declare future external forwarding intent; blocked in this gate recorder.",
4325
+ )
4326
+ record_local_proxy.add_argument(
4327
+ "--runtime-gate-ack",
4328
+ action="store_true",
4329
+ help="Acknowledge this is only a local gate record and any forwarding needs a separate runtime gate.",
4330
+ )
4331
+ record_local_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
4332
+ record_local_proxy.set_defaults(func=command_record_local_proxy_runtime_gate)
4333
+
4334
+ serve_parser = sub.add_parser("serve", help="Run explicit bounded local servers for experimental lanes.")
4335
+ serve_sub = serve_parser.add_subparsers(dest="serve_command", required=True)
4336
+ serve_local_proxy = serve_sub.add_parser(
4337
+ "local-proxy",
4338
+ help="Serve one bounded localhost-only HTTP forwarding request.",
4339
+ )
4340
+ serve_local_proxy.add_argument("--input", help="Read a local_proxy JSON envelope from a file instead of CLI flags.")
4341
+ serve_local_proxy.add_argument("--bind-host", help="Bind host; actual serving requires a literal loopback IP.")
4342
+ serve_local_proxy.add_argument("--bind-port", default=None, help="Bind port; must be a nonzero explicit port for serving.")
4343
+ serve_local_proxy.add_argument("--target-host", help="Target host; actual forwarding requires a literal loopback IP.")
4344
+ serve_local_proxy.add_argument("--target-port", default=None, help="Target port; must be a nonzero explicit port for forwarding.")
4345
+ serve_local_proxy.add_argument("--upstream-url", help="Optional upstream URL; host must be a literal loopback IP for serving.")
4346
+ serve_local_proxy.add_argument("--proxy-label", help="Safe label for this local proxy serve run.")
4347
+ serve_local_proxy.add_argument(
4348
+ "--diagnostic-ledger-jsonl",
4349
+ help="Append one shifted-cost diagnostic JSONL row only after a successful loopback forwarded request.",
4350
+ )
4351
+ serve_local_proxy.add_argument("--api-key", help="Blocked/redacted API key material; never persisted or emitted raw.")
4352
+ serve_local_proxy.add_argument("--authorization-header", help="Blocked/redacted Authorization header; never persisted or emitted raw.")
4353
+ serve_local_proxy.add_argument("--persist-api-key", action="store_true", help="Declare API-key persistence intent; blocked.")
4354
+ serve_local_proxy.add_argument(
4355
+ "--external-forwarding-intent",
4356
+ action="store_true",
4357
+ help="Declare external forwarding intent; blocked in this local-only runtime.",
4358
+ )
4359
+ serve_local_proxy.add_argument(
4360
+ "--runtime-gate-ack",
4361
+ action="store_true",
4362
+ help="Acknowledge this is an explicit experimental runtime.",
4363
+ )
4364
+ serve_local_proxy.add_argument(
4365
+ "--forwarding-gate-ack",
4366
+ action="store_true",
4367
+ help="Acknowledge this starts a loopback-only forwarding listener for one bounded request.",
4368
+ )
4369
+ serve_local_proxy.add_argument("--once", action="store_true", help="Serve exactly one accepted or blocked request; required for this MVP.")
4370
+ serve_local_proxy.add_argument(
4371
+ "--max-request-bytes",
4372
+ default=None,
4373
+ help=f"Maximum request body bytes, 1..{LOCAL_PROXY_MAX_FORWARD_BYTES}.",
4374
+ )
4375
+ serve_local_proxy.add_argument(
4376
+ "--max-response-bytes",
4377
+ default=None,
4378
+ help=f"Maximum upstream response bytes, 1..{LOCAL_PROXY_MAX_FORWARD_BYTES}.",
4379
+ )
4380
+ serve_local_proxy.add_argument(
4381
+ "--timeout-seconds",
4382
+ default=None,
4383
+ help=f"Listener/upstream timeout seconds, 0.1..{LOCAL_PROXY_MAX_TIMEOUT_SECONDS}.",
4384
+ )
4385
+ serve_local_proxy.add_argument("--ready-file", help=argparse.SUPPRESS)
4386
+ serve_local_proxy.add_argument("--json", action="store_true", help="Emit JSON output after the single request completes.")
4387
+ serve_local_proxy.set_defaults(func=command_serve_local_proxy)
4388
+
2299
4389
  return parser
2300
4390
 
2301
4391