@ictechgy/context-guard 0.4.7 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.ko.md +60 -22
- package/README.md +55 -21
- package/context-guard-kit/README.md +2 -2
- package/context-guard-kit/context_filter.py +212 -21
- package/context-guard-kit/context_guard_cli.py +174 -2
- package/context-guard-kit/context_pack.py +66 -21
- package/context-guard-kit/cost_guard.py +126 -59
- package/context-guard-kit/experimental_registry.py +2476 -166
- package/package.json +1 -1
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +1 -1
- package/plugins/context-guard/README.md +9 -2
- package/plugins/context-guard/bin/context-guard +174 -2
- package/plugins/context-guard/bin/context-guard-cost +126 -59
- package/plugins/context-guard/bin/context-guard-experiments +2476 -166
- package/plugins/context-guard/bin/context-guard-filter +212 -21
- package/plugins/context-guard/bin/context-guard-pack +66 -21
|
@@ -11,13 +11,20 @@ from __future__ import annotations
|
|
|
11
11
|
import argparse
|
|
12
12
|
from dataclasses import asdict, dataclass
|
|
13
13
|
from datetime import datetime, timezone
|
|
14
|
+
import http.client
|
|
15
|
+
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
14
16
|
import hashlib
|
|
15
17
|
import ipaddress
|
|
16
18
|
import json
|
|
17
19
|
import math
|
|
20
|
+
import os
|
|
18
21
|
import re
|
|
22
|
+
import secrets
|
|
19
23
|
import shlex
|
|
24
|
+
import socket
|
|
25
|
+
from socketserver import TCPServer
|
|
20
26
|
from pathlib import Path
|
|
27
|
+
import stat
|
|
21
28
|
import sys
|
|
22
29
|
from typing import Any, NoReturn
|
|
23
30
|
import unicodedata
|
|
@@ -26,9 +33,16 @@ from urllib.parse import urlparse
|
|
|
26
33
|
TOOL_NAME = "context-guard-experiments"
|
|
27
34
|
CONFIG_SCHEMA_VERSION = "contextguard.experiments.v1"
|
|
28
35
|
DEFAULT_CONFIG = Path(".context-guard") / "experiments.json"
|
|
36
|
+
MAX_CONFIG_BYTES = 64_000
|
|
29
37
|
MAX_CONTEXT_DIFF_INPUT_BYTES = 256_000
|
|
38
|
+
MAX_CONTEXT_DIFF_REPLACEMENT_BYTES = 128_000
|
|
39
|
+
MAX_CONTEXT_DIFF_ARTIFACT_METADATA_BYTES = 64_000
|
|
40
|
+
DEFAULT_CONTEXT_DIFF_ARTIFACT_DIR = Path(".context-guard") / "artifacts"
|
|
41
|
+
LEGACY_CONTEXT_DIFF_ARTIFACT_DIR = Path(".claude-token-optimizer") / "artifacts"
|
|
30
42
|
MAX_VISUAL_OCR_TEXT_BYTES = 64_000
|
|
31
43
|
MAX_LEARNED_COMPRESSION_INPUT_BYTES = 128_000
|
|
44
|
+
MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES = 64_000
|
|
45
|
+
MAX_LEARNED_COMPRESSION_ARTIFACT_METADATA_BYTES = 64_000
|
|
32
46
|
MAX_SELF_HOSTED_METRICS_INPUT_BYTES = 64_000
|
|
33
47
|
SELF_HOSTED_METRICS_SCHEMA_VERSION = "contextguard.bench.self-hosted-metrics.v1"
|
|
34
48
|
SELF_HOSTED_METRICS_KEY = "self_hosted_metrics"
|
|
@@ -44,11 +58,58 @@ TOKEN_PROXY_BYTES_PER_TOKEN = 4
|
|
|
44
58
|
MAX_SELF_HOSTED_JSON_DEPTH = 100
|
|
45
59
|
MAX_SELF_HOSTED_JSON_NODES = 10_000
|
|
46
60
|
LOCAL_PROXY_SCHEMA_VERSION = "contextguard.experiments.local-proxy-plan.v1"
|
|
61
|
+
LOCAL_PROXY_GATE_SCHEMA_VERSION = "contextguard.experiments.local-proxy-gate.v1"
|
|
62
|
+
LOCAL_PROXY_FORWARD_SCHEMA_VERSION = "contextguard.experiments.local-proxy-forward.v1"
|
|
63
|
+
LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION = "contextguard.experiments.local-proxy-forward-diagnostic.v1"
|
|
64
|
+
LOCAL_PROXY_READY_SCHEMA_VERSION = "contextguard.experiments.local-proxy-ready.v1"
|
|
65
|
+
LOCAL_PROXY_EXTERNAL_DESIGN_SCHEMA_VERSION = "contextguard.experiments.local-proxy-external-forwarding-design.v1"
|
|
47
66
|
LOCAL_PROXY_DEFAULT_BIND_HOST = "127.0.0.1"
|
|
48
67
|
LOCAL_PROXY_DEFAULT_BIND_PORT = 0
|
|
49
68
|
LOCAL_PROXY_DEFAULT_TARGET_HOST = "127.0.0.1"
|
|
50
69
|
LOCAL_PROXY_DEFAULT_TARGET_PORT = 0
|
|
51
70
|
LOCAL_PROXY_LOCALHOST_NAMES = {"localhost"}
|
|
71
|
+
LOCAL_PROXY_TRUE_VALUES = {"1", "on", "true", "yes", "y"}
|
|
72
|
+
LOCAL_PROXY_FALSE_VALUES = {"", "0", "false", "n", "no", "off"}
|
|
73
|
+
LOCAL_PROXY_DEFAULT_MAX_REQUEST_BYTES = 64 * 1024
|
|
74
|
+
LOCAL_PROXY_DEFAULT_MAX_RESPONSE_BYTES = 256 * 1024
|
|
75
|
+
LOCAL_PROXY_MAX_FORWARD_BYTES = 2 * 1024 * 1024
|
|
76
|
+
LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS = 5.0
|
|
77
|
+
LOCAL_PROXY_MAX_TIMEOUT_SECONDS = 30.0
|
|
78
|
+
LOCAL_PROXY_EXTERNAL_ALLOWED_SCHEMES = {"https"}
|
|
79
|
+
LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY = "strip-sensitive-headers"
|
|
80
|
+
LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY = "diagnostic-only-provider-measured-required"
|
|
81
|
+
LOCAL_PROXY_SENSITIVE_HEADER_NAMES = {
|
|
82
|
+
"authorization",
|
|
83
|
+
"proxy-authorization",
|
|
84
|
+
"x-api-key",
|
|
85
|
+
"api-key",
|
|
86
|
+
"x-anthropic-api-key",
|
|
87
|
+
"x-openai-api-key",
|
|
88
|
+
"openai-api-key",
|
|
89
|
+
"cookie",
|
|
90
|
+
"set-cookie",
|
|
91
|
+
}
|
|
92
|
+
LOCAL_PROXY_HOP_BY_HOP_HEADERS = {
|
|
93
|
+
"connection",
|
|
94
|
+
"keep-alive",
|
|
95
|
+
"proxy-authenticate",
|
|
96
|
+
"proxy-authorization",
|
|
97
|
+
"te",
|
|
98
|
+
"trailer",
|
|
99
|
+
"transfer-encoding",
|
|
100
|
+
"upgrade",
|
|
101
|
+
}
|
|
102
|
+
ALLOWED_FIRST_COMPONENT_SYMLINKS = {
|
|
103
|
+
"tmp": Path("/private/tmp"),
|
|
104
|
+
"var": Path("/private/var"),
|
|
105
|
+
}
|
|
106
|
+
DIR_FD_OPEN_SUPPORTED = os.open in getattr(os, "supports_dir_fd", set())
|
|
107
|
+
DIR_FD_MKDIR_SUPPORTED = os.mkdir in getattr(os, "supports_dir_fd", set())
|
|
108
|
+
DIR_FD_STAT_NOFOLLOW_SUPPORTED = (
|
|
109
|
+
os.stat in getattr(os, "supports_dir_fd", set())
|
|
110
|
+
and os.stat in getattr(os, "supports_follow_symlinks", set())
|
|
111
|
+
)
|
|
112
|
+
NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
|
|
52
113
|
|
|
53
114
|
|
|
54
115
|
@dataclass(frozen=True)
|
|
@@ -130,37 +191,51 @@ EXPERIMENTS: tuple[Experiment, ...] = (
|
|
|
130
191
|
Experiment(
|
|
131
192
|
id="context-diff-compaction",
|
|
132
193
|
name="Reviewable context-diff compaction",
|
|
133
|
-
summary="
|
|
194
|
+
summary="Explicit receipt-backed runtime for caller-supplied compact diff replacements with stable exact handles.",
|
|
134
195
|
stability="experimental",
|
|
135
196
|
default_enabled=False,
|
|
136
197
|
risk_level="medium",
|
|
137
198
|
claim_boundary="Smaller local diffs are proxy evidence only; hosted savings require provider-measured matched tasks.",
|
|
138
199
|
gate_requirements=("explicit opt-in", "human-reviewable diff", "local receipt", "exact re-expand handle"),
|
|
139
|
-
runtime_status="available-
|
|
140
|
-
commands=(
|
|
141
|
-
|
|
200
|
+
runtime_status="available-explicit-runtime",
|
|
201
|
+
commands=(
|
|
202
|
+
"context-guard experiments plan context-diff-compaction",
|
|
203
|
+
"context-guard experiments emit context-diff-compaction --receipt-id <id> --reexpand-command <cmd>",
|
|
204
|
+
),
|
|
205
|
+
opt_in_flags=(
|
|
206
|
+
"plan context-diff-compaction",
|
|
207
|
+
"emit context-diff-compaction",
|
|
208
|
+
"--receipt-id",
|
|
209
|
+
"--reexpand-command",
|
|
210
|
+
"--replacement-text|--replacement-file",
|
|
211
|
+
),
|
|
142
212
|
config_effect=(
|
|
143
|
-
"Registry enablement records project-local intent only; context-diff
|
|
144
|
-
"
|
|
213
|
+
"Registry enablement records project-local intent only; context-diff replacement emits only through the "
|
|
214
|
+
"explicit emit command with exact retrieval metadata and caller-supplied compact text."
|
|
145
215
|
),
|
|
146
216
|
evidence_contract=(
|
|
147
|
-
"
|
|
148
|
-
"
|
|
217
|
+
"Emitted replacements require human-reviewable hunks, caller-supplied compact text, and exact local "
|
|
218
|
+
"artifact content that matches the input diff plus re-expand metadata; smaller local diffs remain proxy "
|
|
219
|
+
"evidence only."
|
|
149
220
|
),
|
|
150
221
|
),
|
|
151
222
|
Experiment(
|
|
152
223
|
id="visual-crop-ocr",
|
|
153
|
-
name="Visual crop/OCR evidence
|
|
154
|
-
summary="
|
|
224
|
+
name="Visual crop/OCR evidence pack",
|
|
225
|
+
summary="Explicit local runtime for caller-supplied visual crop/OCR evidence packs.",
|
|
155
226
|
stability="experimental",
|
|
156
227
|
default_enabled=False,
|
|
157
228
|
risk_level="medium",
|
|
158
229
|
claim_boundary="Image/OCR byte reductions are proxy evidence until provider image/text token fields are measured.",
|
|
159
230
|
gate_requirements=("explicit opt-in", "original evidence preserved", "confidence/error notes", "missed-context guardrail"),
|
|
160
|
-
runtime_status="available-
|
|
161
|
-
commands=(
|
|
231
|
+
runtime_status="available-explicit-runtime",
|
|
232
|
+
commands=(
|
|
233
|
+
"context-guard experiments plan visual-crop-ocr",
|
|
234
|
+
"context-guard experiments emit visual-crop-ocr",
|
|
235
|
+
),
|
|
162
236
|
opt_in_flags=(
|
|
163
237
|
"plan visual-crop-ocr",
|
|
238
|
+
"emit visual-crop-ocr",
|
|
164
239
|
"--full-evidence-receipt",
|
|
165
240
|
"--crop-bounds",
|
|
166
241
|
"--image-size",
|
|
@@ -170,48 +245,64 @@ EXPERIMENTS: tuple[Experiment, ...] = (
|
|
|
170
245
|
"--missed-context-note",
|
|
171
246
|
),
|
|
172
247
|
config_effect=(
|
|
173
|
-
"Registry enablement records project-local intent only; visual crop/OCR
|
|
174
|
-
"
|
|
248
|
+
"Registry enablement records project-local intent only; visual crop/OCR evidence packs emit only through "
|
|
249
|
+
"the explicit emit command and do not run OCR, crop images, call providers, write files, or change stable behavior."
|
|
175
250
|
),
|
|
176
251
|
evidence_contract=(
|
|
177
|
-
"
|
|
178
|
-
"missed-context guardrails before human review."
|
|
252
|
+
"Emitted evidence packs require the full visual evidence receipt plus caller-supplied crop/OCR evidence, "
|
|
253
|
+
"OCR confidence/error notes when OCR is present, and missed-context guardrails before human review."
|
|
179
254
|
),
|
|
180
255
|
),
|
|
181
256
|
Experiment(
|
|
182
257
|
id="learned-compression",
|
|
183
|
-
name="Learned/synthetic compression
|
|
184
|
-
summary="
|
|
258
|
+
name="Learned/synthetic compression candidate gate",
|
|
259
|
+
summary="Explicit local runtime for caller-supplied compact prose candidates with verified exact fallback.",
|
|
185
260
|
stability="experimental",
|
|
186
261
|
default_enabled=False,
|
|
187
262
|
risk_level="high",
|
|
188
263
|
claim_boundary="Semantic compression cannot claim savings or correctness without matched-task quality and provider token evidence.",
|
|
189
264
|
gate_requirements=("explicit opt-in", "sanitized unprotected prose only", "protected-zone denial", "exact fallback or receipt"),
|
|
190
|
-
runtime_status="available-
|
|
191
|
-
commands=(
|
|
192
|
-
|
|
265
|
+
runtime_status="available-explicit-runtime",
|
|
266
|
+
commands=(
|
|
267
|
+
"context-guard experiments plan learned-compression",
|
|
268
|
+
"context-guard experiments emit learned-compression --exact-fallback-receipt <id> --reexpand-command <cmd>",
|
|
269
|
+
),
|
|
270
|
+
opt_in_flags=(
|
|
271
|
+
"plan learned-compression",
|
|
272
|
+
"emit learned-compression",
|
|
273
|
+
"--sanitized",
|
|
274
|
+
"--trusted-source",
|
|
275
|
+
"--exact-fallback-receipt",
|
|
276
|
+
"--reexpand-command",
|
|
277
|
+
"--replacement-text|--replacement-file",
|
|
278
|
+
),
|
|
193
279
|
config_effect=(
|
|
194
|
-
"Registry enablement records project-local intent only; learned
|
|
195
|
-
"and
|
|
280
|
+
"Registry enablement records project-local intent only; learned-compression candidates emit only through "
|
|
281
|
+
"the explicit emit command and do not run learned compressors, embeddings, rerankers, model calls, subprocesses, or external services."
|
|
196
282
|
),
|
|
197
283
|
evidence_contract=(
|
|
198
|
-
"
|
|
199
|
-
"denial of protected or prompt-like signals."
|
|
284
|
+
"Emitted candidates require caller-asserted sanitized trusted prose, verified exact local fallback content, "
|
|
285
|
+
"a smaller caller-supplied prose candidate, and denial of protected or prompt-like signals."
|
|
200
286
|
),
|
|
201
287
|
),
|
|
202
288
|
Experiment(
|
|
203
289
|
id="self-hosted-metrics-ledger",
|
|
204
290
|
name="Self-hosted metrics ledger",
|
|
205
|
-
summary="
|
|
291
|
+
summary="Explicit local ledger runtime for self-hosted/local metrics sidecars kept separate from hosted API claims.",
|
|
206
292
|
stability="experimental",
|
|
207
293
|
default_enabled=False,
|
|
208
294
|
risk_level="low",
|
|
209
295
|
claim_boundary="Self-hosted memory/latency metrics must stay separate from hosted API token/cost claims.",
|
|
210
296
|
gate_requirements=("explicit opt-in", "separate ledger fields", "shifted-cost accounting"),
|
|
211
|
-
runtime_status="available-
|
|
212
|
-
commands=(
|
|
297
|
+
runtime_status="available-explicit-runtime",
|
|
298
|
+
commands=(
|
|
299
|
+
"context-guard experiments plan self-hosted-metrics-ledger",
|
|
300
|
+
"context-guard experiments record self-hosted-metrics-ledger --ledger-jsonl <path>",
|
|
301
|
+
),
|
|
213
302
|
opt_in_flags=(
|
|
214
303
|
"plan self-hosted-metrics-ledger",
|
|
304
|
+
"record self-hosted-metrics-ledger",
|
|
305
|
+
"--ledger-jsonl",
|
|
215
306
|
"--input",
|
|
216
307
|
"--latency-ms",
|
|
217
308
|
"--peak-memory-mb",
|
|
@@ -223,43 +314,68 @@ EXPERIMENTS: tuple[Experiment, ...] = (
|
|
|
223
314
|
"--optimization",
|
|
224
315
|
),
|
|
225
316
|
config_effect=(
|
|
226
|
-
"Registry enablement records project-local intent only; self-hosted metrics
|
|
227
|
-
"
|
|
317
|
+
"Registry enablement records project-local intent only; self-hosted metrics still write a ledger only "
|
|
318
|
+
"when the explicit record command is invoked with --ledger-jsonl."
|
|
228
319
|
),
|
|
229
320
|
evidence_contract=(
|
|
230
|
-
"
|
|
231
|
-
"from hosted API token/cost savings."
|
|
321
|
+
"The explicit record command writes context-guard-bench JSONL ledger sidecars; self-hosted metrics "
|
|
322
|
+
"remain separate from hosted API token/cost savings."
|
|
232
323
|
),
|
|
233
324
|
),
|
|
234
325
|
Experiment(
|
|
235
326
|
id="local-proxy",
|
|
236
|
-
name="Local proxy
|
|
237
|
-
summary="
|
|
327
|
+
name="Local proxy runtime gate",
|
|
328
|
+
summary="Explicit local gate-record runtime for localhost-only proxy experiments with no hidden forwarding or API-key persistence.",
|
|
238
329
|
stability="experimental",
|
|
239
330
|
default_enabled=False,
|
|
240
331
|
risk_level="high",
|
|
241
332
|
claim_boundary="Proxy metrics are diagnostic only; no hosted savings claim without provider-measured evidence.",
|
|
242
333
|
gate_requirements=("explicit opt-in", "localhost-only default", "no API-key persistence", "no hidden external forwarding"),
|
|
243
|
-
runtime_status="available-
|
|
244
|
-
commands=(
|
|
334
|
+
runtime_status="available-explicit-runtime",
|
|
335
|
+
commands=(
|
|
336
|
+
"context-guard experiments plan local-proxy",
|
|
337
|
+
"context-guard experiments plan local-proxy-external-forwarding",
|
|
338
|
+
"context-guard experiments record local-proxy-runtime-gate --ledger-jsonl <path>",
|
|
339
|
+
"context-guard experiments serve local-proxy --bind-host 127.0.0.1 --bind-port <port> --target-host 127.0.0.1 --target-port <port> --runtime-gate-ack --forwarding-gate-ack --once",
|
|
340
|
+
"context-guard experiments serve local-proxy --diagnostic-ledger-jsonl <path> ...",
|
|
341
|
+
),
|
|
245
342
|
opt_in_flags=(
|
|
246
343
|
"plan local-proxy",
|
|
344
|
+
"plan local-proxy-external-forwarding",
|
|
345
|
+
"record local-proxy-runtime-gate",
|
|
346
|
+
"serve local-proxy",
|
|
247
347
|
"--bind-host",
|
|
248
348
|
"--bind-port",
|
|
249
349
|
"--target-host",
|
|
250
350
|
"--target-port",
|
|
251
351
|
"--upstream-url",
|
|
352
|
+
"--ledger-jsonl",
|
|
252
353
|
"--runtime-gate-ack",
|
|
354
|
+
"--forwarding-gate-ack",
|
|
355
|
+
"--once",
|
|
356
|
+
"--max-request-bytes",
|
|
357
|
+
"--max-response-bytes",
|
|
358
|
+
"--diagnostic-ledger-jsonl",
|
|
253
359
|
"--external-forwarding-intent",
|
|
360
|
+
"--external-forwarding-design-ack",
|
|
361
|
+
"--allow-host",
|
|
362
|
+
"--allow-scheme",
|
|
363
|
+
"--threat-model-note",
|
|
364
|
+
"--credential-redaction-policy",
|
|
365
|
+
"--provider-evidence-boundary",
|
|
254
366
|
"--persist-api-key",
|
|
255
367
|
),
|
|
256
368
|
config_effect=(
|
|
257
|
-
"Registry enablement records project-local intent only; local proxy
|
|
258
|
-
"
|
|
369
|
+
"Registry enablement records project-local intent only; local proxy record/serve runtimes run only through "
|
|
370
|
+
"explicit commands. Serve binds and forwards only literal loopback addresses, blocks credential material, "
|
|
371
|
+
"and never persists API keys or calls non-local services; external-forwarding planning is design-only."
|
|
259
372
|
),
|
|
260
373
|
evidence_contract=(
|
|
261
|
-
"
|
|
262
|
-
"
|
|
374
|
+
"Gate rows require localhost-only bind/target metadata and explicit runtime gate acknowledgement. Serve "
|
|
375
|
+
"evidence requires loopback-only bind/target IPs, explicit forwarding acknowledgement, no credential "
|
|
376
|
+
"forwarding or persistence, bounded bytes/timeouts, and optional diagnostic ledger rows that remain "
|
|
377
|
+
"shifted-cost evidence only. External-forwarding design plans require threat model notes, explicit "
|
|
378
|
+
"allowlists, credential redaction policy, and provider-evidence boundaries before any future runtime."
|
|
263
379
|
),
|
|
264
380
|
),
|
|
265
381
|
)
|
|
@@ -276,6 +392,461 @@ def fail(message: str, code: int = 2) -> NoReturn:
|
|
|
276
392
|
raise SystemExit(code)
|
|
277
393
|
|
|
278
394
|
|
|
395
|
+
def os_error_detail(exc: OSError) -> str:
|
|
396
|
+
detail = exc.strerror or exc.__class__.__name__
|
|
397
|
+
if exc.errno is not None:
|
|
398
|
+
return f"{detail} (errno {exc.errno})"
|
|
399
|
+
return detail
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _no_follow_flag(*, label: str) -> int:
|
|
403
|
+
if not NO_FOLLOW_SUPPORTED:
|
|
404
|
+
raise RegistryError(f"{label} requires O_NOFOLLOW support")
|
|
405
|
+
return os.O_NOFOLLOW
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _directory_open_flags(*, follow_final: bool = False, label: str) -> int:
|
|
409
|
+
flags = os.O_RDONLY
|
|
410
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
411
|
+
flags |= os.O_CLOEXEC
|
|
412
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
413
|
+
flags |= os.O_DIRECTORY
|
|
414
|
+
if not follow_final:
|
|
415
|
+
flags |= _no_follow_flag(label=label)
|
|
416
|
+
return flags
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _file_open_flags(*, label: str, write: bool = False) -> int:
|
|
420
|
+
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC if write else os.O_RDONLY
|
|
421
|
+
flags |= _no_follow_flag(label=label)
|
|
422
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
423
|
+
flags |= os.O_CLOEXEC
|
|
424
|
+
if hasattr(os, "O_NONBLOCK"):
|
|
425
|
+
flags |= os.O_NONBLOCK
|
|
426
|
+
if hasattr(os, "O_NOCTTY"):
|
|
427
|
+
flags |= os.O_NOCTTY
|
|
428
|
+
return flags
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _temp_file_open_flags(*, label: str) -> int:
|
|
432
|
+
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
|
|
433
|
+
flags |= _no_follow_flag(label=label)
|
|
434
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
435
|
+
flags |= os.O_CLOEXEC
|
|
436
|
+
if hasattr(os, "O_NOCTTY"):
|
|
437
|
+
flags |= os.O_NOCTTY
|
|
438
|
+
return flags
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _append_file_open_flags(*, label: str) -> int:
|
|
442
|
+
flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
|
|
443
|
+
flags |= _no_follow_flag(label=label)
|
|
444
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
445
|
+
flags |= os.O_CLOEXEC
|
|
446
|
+
if hasattr(os, "O_NONBLOCK"):
|
|
447
|
+
flags |= os.O_NONBLOCK
|
|
448
|
+
if hasattr(os, "O_NOCTTY"):
|
|
449
|
+
flags |= os.O_NOCTTY
|
|
450
|
+
return flags
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def _leaf_name(path: Path, *, label: str) -> str:
|
|
454
|
+
name = path.name
|
|
455
|
+
if name in {"", ".", ".."}:
|
|
456
|
+
raise RegistryError(f"{label} must name a regular file")
|
|
457
|
+
return name
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _normalized_link_target(anchor: Path, raw_target: str) -> Path:
|
|
461
|
+
target = Path(raw_target)
|
|
462
|
+
if target.is_absolute():
|
|
463
|
+
return Path(os.path.normpath(str(target)))
|
|
464
|
+
return Path(os.path.normpath(str(anchor / target)))
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
|
|
468
|
+
if not path.is_absolute():
|
|
469
|
+
return path
|
|
470
|
+
parts = path.parts
|
|
471
|
+
if len(parts) < 2:
|
|
472
|
+
return path
|
|
473
|
+
first = parts[1]
|
|
474
|
+
expected = ALLOWED_FIRST_COMPONENT_SYMLINKS.get(first)
|
|
475
|
+
if expected is None:
|
|
476
|
+
return path
|
|
477
|
+
link = Path(path.anchor) / first
|
|
478
|
+
try:
|
|
479
|
+
if link.is_symlink() and _normalized_link_target(Path(path.anchor), os.readlink(link)) == expected:
|
|
480
|
+
return expected.joinpath(*parts[2:])
|
|
481
|
+
except OSError:
|
|
482
|
+
return path
|
|
483
|
+
return path
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def normalize_local_path(path: Path) -> Path:
|
|
487
|
+
path = path.expanduser()
|
|
488
|
+
if not path.is_absolute():
|
|
489
|
+
path = Path.cwd() / path
|
|
490
|
+
return normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(path))))
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def normalize_project_path(root: Path, candidate: Path, *, label: str) -> Path:
|
|
494
|
+
candidate = candidate.expanduser()
|
|
495
|
+
if not candidate.is_absolute():
|
|
496
|
+
candidate = root / candidate
|
|
497
|
+
normalized = normalize_allowed_first_absolute_symlink(Path(os.path.normpath(str(candidate))))
|
|
498
|
+
try:
|
|
499
|
+
normalized.relative_to(root)
|
|
500
|
+
except ValueError as exc:
|
|
501
|
+
raise RegistryError(f"{label} must stay inside project root: {normalized}") from exc
|
|
502
|
+
return normalized
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def open_directory_no_follow(path: Path, *, label: str, create: bool = False, missing_ok: bool = False) -> int | None:
|
|
506
|
+
path = normalize_allowed_first_absolute_symlink(path)
|
|
507
|
+
if not DIR_FD_OPEN_SUPPORTED:
|
|
508
|
+
raise RegistryError(f"{label} requires dir_fd open support")
|
|
509
|
+
if create and not DIR_FD_MKDIR_SUPPORTED:
|
|
510
|
+
raise RegistryError(f"{label} requires dir_fd mkdir support")
|
|
511
|
+
flags = _directory_open_flags(label=label)
|
|
512
|
+
if path.is_absolute():
|
|
513
|
+
anchor = path.anchor or os.sep
|
|
514
|
+
parts = path.parts[1:]
|
|
515
|
+
try:
|
|
516
|
+
current_fd = os.open(anchor, _directory_open_flags(follow_final=True, label=label))
|
|
517
|
+
except OSError as exc:
|
|
518
|
+
raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
519
|
+
else:
|
|
520
|
+
parts = path.parts
|
|
521
|
+
try:
|
|
522
|
+
current_fd = os.open(".", flags)
|
|
523
|
+
except OSError as exc:
|
|
524
|
+
raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
525
|
+
try:
|
|
526
|
+
for part in parts:
|
|
527
|
+
if part in {"", "."}:
|
|
528
|
+
continue
|
|
529
|
+
if part == "..":
|
|
530
|
+
raise RegistryError(f"{label} must not contain parent traversal")
|
|
531
|
+
next_fd = -1
|
|
532
|
+
try:
|
|
533
|
+
next_fd = os.open(part, flags, dir_fd=current_fd)
|
|
534
|
+
except FileNotFoundError:
|
|
535
|
+
if missing_ok:
|
|
536
|
+
os.close(current_fd)
|
|
537
|
+
current_fd = -1
|
|
538
|
+
return None
|
|
539
|
+
if not create:
|
|
540
|
+
raise RegistryError(f"could not inspect {label}: missing directory component") from None
|
|
541
|
+
try:
|
|
542
|
+
os.mkdir(part, mode=0o755, dir_fd=current_fd)
|
|
543
|
+
except FileExistsError:
|
|
544
|
+
pass
|
|
545
|
+
except OSError as exc:
|
|
546
|
+
raise RegistryError(f"could not create {label}: {os_error_detail(exc)}") from exc
|
|
547
|
+
try:
|
|
548
|
+
next_fd = os.open(part, flags, dir_fd=current_fd)
|
|
549
|
+
except OSError as exc:
|
|
550
|
+
raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
551
|
+
except OSError as exc:
|
|
552
|
+
raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
553
|
+
try:
|
|
554
|
+
if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
|
|
555
|
+
raise RegistryError(f"{label} must not traverse non-directory components")
|
|
556
|
+
except Exception:
|
|
557
|
+
if next_fd >= 0:
|
|
558
|
+
try:
|
|
559
|
+
os.close(next_fd)
|
|
560
|
+
except OSError:
|
|
561
|
+
pass
|
|
562
|
+
raise
|
|
563
|
+
try:
|
|
564
|
+
os.close(current_fd)
|
|
565
|
+
except OSError:
|
|
566
|
+
pass
|
|
567
|
+
current_fd = next_fd
|
|
568
|
+
owned_fd = current_fd
|
|
569
|
+
current_fd = -1
|
|
570
|
+
return owned_fd
|
|
571
|
+
finally:
|
|
572
|
+
if current_fd >= 0:
|
|
573
|
+
try:
|
|
574
|
+
os.close(current_fd)
|
|
575
|
+
except OSError:
|
|
576
|
+
pass
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def _precheck_regular_leaf(parent_fd: int, leaf_name: str, *, label: str, missing_ok: bool = False) -> bool:
|
|
580
|
+
if not DIR_FD_STAT_NOFOLLOW_SUPPORTED:
|
|
581
|
+
raise RegistryError(f"{label} requires dir_fd stat support")
|
|
582
|
+
try:
|
|
583
|
+
st = os.stat(leaf_name, dir_fd=parent_fd, follow_symlinks=False)
|
|
584
|
+
except FileNotFoundError:
|
|
585
|
+
if missing_ok:
|
|
586
|
+
return False
|
|
587
|
+
raise RegistryError(f"could not inspect {label}: missing file") from None
|
|
588
|
+
except OSError as exc:
|
|
589
|
+
raise RegistryError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
|
|
590
|
+
if not stat.S_ISREG(st.st_mode):
|
|
591
|
+
raise RegistryError(f"{label} must be a regular file")
|
|
592
|
+
return True
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def read_bounded_regular_file(path: Path, *, max_bytes: int, label: str, missing_ok: bool = False) -> tuple[bytes, bool] | None:
|
|
596
|
+
path = normalize_local_path(path)
|
|
597
|
+
parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", missing_ok=missing_ok)
|
|
598
|
+
if parent_fd is None:
|
|
599
|
+
return None
|
|
600
|
+
fd = -1
|
|
601
|
+
try:
|
|
602
|
+
leaf = _leaf_name(path, label=label)
|
|
603
|
+
exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=missing_ok)
|
|
604
|
+
if not exists:
|
|
605
|
+
return None
|
|
606
|
+
fd = os.open(leaf, _file_open_flags(label=label), dir_fd=parent_fd)
|
|
607
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
608
|
+
raise RegistryError(f"{label} must be a regular file")
|
|
609
|
+
chunks: list[bytes] = []
|
|
610
|
+
remaining = max_bytes + 1
|
|
611
|
+
while remaining > 0:
|
|
612
|
+
chunk = os.read(fd, min(64 * 1024, remaining))
|
|
613
|
+
if not chunk:
|
|
614
|
+
break
|
|
615
|
+
chunks.append(chunk)
|
|
616
|
+
remaining -= len(chunk)
|
|
617
|
+
raw = b"".join(chunks)
|
|
618
|
+
truncated = len(raw) > max_bytes
|
|
619
|
+
return raw[:max_bytes], truncated
|
|
620
|
+
except OSError as exc:
|
|
621
|
+
raise RegistryError(f"could not read {label}: {os_error_detail(exc)}") from exc
|
|
622
|
+
finally:
|
|
623
|
+
if fd >= 0:
|
|
624
|
+
try:
|
|
625
|
+
os.close(fd)
|
|
626
|
+
except OSError:
|
|
627
|
+
pass
|
|
628
|
+
try:
|
|
629
|
+
os.close(parent_fd)
|
|
630
|
+
except OSError:
|
|
631
|
+
pass
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def write_all_fd(fd: int, data: bytes) -> None:
|
|
635
|
+
view = memoryview(data)
|
|
636
|
+
offset = 0
|
|
637
|
+
while offset < len(view):
|
|
638
|
+
written = os.write(fd, view[offset:])
|
|
639
|
+
if written <= 0:
|
|
640
|
+
raise OSError("short write")
|
|
641
|
+
offset += written
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def write_regular_file_no_follow(path: Path, data: bytes, *, label: str) -> None:
|
|
645
|
+
path = normalize_local_path(path)
|
|
646
|
+
parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
|
|
647
|
+
if parent_fd is None: # pragma: no cover - create=True never returns None.
|
|
648
|
+
raise RegistryError(f"could not inspect {label} parent")
|
|
649
|
+
fd = -1
|
|
650
|
+
temp_leaf: str | None = None
|
|
651
|
+
try:
|
|
652
|
+
leaf = _leaf_name(path, label=label)
|
|
653
|
+
exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
|
|
654
|
+
mode = 0o644
|
|
655
|
+
if exists:
|
|
656
|
+
try:
|
|
657
|
+
mode = stat.S_IMODE(os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False).st_mode) or 0o644
|
|
658
|
+
except OSError:
|
|
659
|
+
mode = 0o644
|
|
660
|
+
for _attempt in range(20):
|
|
661
|
+
candidate = _leaf_name(Path(f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.tmp"), label=f"{label} temp")
|
|
662
|
+
try:
|
|
663
|
+
fd = os.open(candidate, _temp_file_open_flags(label=f"{label} temp"), mode, dir_fd=parent_fd)
|
|
664
|
+
temp_leaf = candidate
|
|
665
|
+
break
|
|
666
|
+
except FileExistsError:
|
|
667
|
+
continue
|
|
668
|
+
if fd < 0 or temp_leaf is None:
|
|
669
|
+
raise RegistryError(f"could not create temporary {label}")
|
|
670
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
671
|
+
raise RegistryError(f"{label} temp must be a regular file")
|
|
672
|
+
write_all_fd(fd, data)
|
|
673
|
+
try:
|
|
674
|
+
os.fsync(fd)
|
|
675
|
+
except OSError:
|
|
676
|
+
pass
|
|
677
|
+
try:
|
|
678
|
+
os.close(fd)
|
|
679
|
+
except OSError:
|
|
680
|
+
pass
|
|
681
|
+
fd = -1
|
|
682
|
+
os.replace(temp_leaf, leaf, src_dir_fd=parent_fd, dst_dir_fd=parent_fd)
|
|
683
|
+
temp_leaf = None
|
|
684
|
+
except OSError as exc:
|
|
685
|
+
raise RegistryError(f"could not write {label}: {os_error_detail(exc)}") from exc
|
|
686
|
+
finally:
|
|
687
|
+
if fd >= 0:
|
|
688
|
+
try:
|
|
689
|
+
os.close(fd)
|
|
690
|
+
except OSError:
|
|
691
|
+
pass
|
|
692
|
+
if temp_leaf is not None:
|
|
693
|
+
try:
|
|
694
|
+
os.unlink(temp_leaf, dir_fd=parent_fd)
|
|
695
|
+
except OSError:
|
|
696
|
+
pass
|
|
697
|
+
try:
|
|
698
|
+
os.fsync(parent_fd)
|
|
699
|
+
except OSError:
|
|
700
|
+
pass
|
|
701
|
+
try:
|
|
702
|
+
os.close(parent_fd)
|
|
703
|
+
except OSError:
|
|
704
|
+
pass
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def _reject_parent_traversal(path: Path, *, label: str) -> None:
|
|
708
|
+
if any(part == ".." for part in path.parts):
|
|
709
|
+
raise RegistryError(f"{label} must not contain parent traversal")
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def write_regular_file_no_follow_exclusive(path: Path, data: bytes, *, label: str, mode: int = 0o600) -> None:
|
|
713
|
+
_reject_parent_traversal(path, label=label)
|
|
714
|
+
path = normalize_local_path(path)
|
|
715
|
+
parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent")
|
|
716
|
+
if parent_fd is None: # pragma: no cover - missing_ok is not enabled.
|
|
717
|
+
raise RegistryError(f"could not inspect {label} parent")
|
|
718
|
+
fd = -1
|
|
719
|
+
created = False
|
|
720
|
+
success = False
|
|
721
|
+
try:
|
|
722
|
+
leaf = _leaf_name(path, label=label)
|
|
723
|
+
exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
|
|
724
|
+
if exists:
|
|
725
|
+
raise RegistryError(f"{label} must not already exist")
|
|
726
|
+
flags = _temp_file_open_flags(label=label)
|
|
727
|
+
fd = os.open(leaf, flags, mode, dir_fd=parent_fd)
|
|
728
|
+
created = True
|
|
729
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
730
|
+
raise RegistryError(f"{label} must be a regular file")
|
|
731
|
+
try:
|
|
732
|
+
os.fchmod(fd, mode)
|
|
733
|
+
except OSError:
|
|
734
|
+
pass
|
|
735
|
+
write_all_fd(fd, data)
|
|
736
|
+
try:
|
|
737
|
+
os.fsync(fd)
|
|
738
|
+
except OSError:
|
|
739
|
+
pass
|
|
740
|
+
success = True
|
|
741
|
+
except FileExistsError as exc:
|
|
742
|
+
raise RegistryError(f"{label} must not already exist") from exc
|
|
743
|
+
except OSError as exc:
|
|
744
|
+
raise RegistryError(f"could not write {label}: {os_error_detail(exc)}") from exc
|
|
745
|
+
finally:
|
|
746
|
+
if fd >= 0:
|
|
747
|
+
try:
|
|
748
|
+
os.close(fd)
|
|
749
|
+
except OSError:
|
|
750
|
+
pass
|
|
751
|
+
if created and not success:
|
|
752
|
+
try:
|
|
753
|
+
os.unlink(_leaf_name(path, label=label), dir_fd=parent_fd)
|
|
754
|
+
except OSError:
|
|
755
|
+
pass
|
|
756
|
+
try:
|
|
757
|
+
os.fsync(parent_fd)
|
|
758
|
+
except OSError:
|
|
759
|
+
pass
|
|
760
|
+
try:
|
|
761
|
+
os.close(parent_fd)
|
|
762
|
+
except OSError:
|
|
763
|
+
pass
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def append_jsonl_no_follow(path: Path, payload: dict[str, Any], *, label: str) -> int:
|
|
767
|
+
path = normalize_local_path(path)
|
|
768
|
+
parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
|
|
769
|
+
if parent_fd is None: # pragma: no cover - create=True never returns None.
|
|
770
|
+
raise RegistryError(f"could not inspect {label} parent")
|
|
771
|
+
fd = -1
|
|
772
|
+
try:
|
|
773
|
+
leaf = _leaf_name(path, label=label)
|
|
774
|
+
_precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
|
|
775
|
+
fd = os.open(leaf, _append_file_open_flags(label=label), 0o600, dir_fd=parent_fd)
|
|
776
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
777
|
+
raise RegistryError(f"{label} must be a regular file")
|
|
778
|
+
data = json.dumps(payload, ensure_ascii=False, sort_keys=True).encode("utf-8") + b"\n"
|
|
779
|
+
write_all_fd(fd, data)
|
|
780
|
+
try:
|
|
781
|
+
os.fsync(fd)
|
|
782
|
+
except OSError:
|
|
783
|
+
pass
|
|
784
|
+
return len(data)
|
|
785
|
+
except OSError as exc:
|
|
786
|
+
raise RegistryError(f"could not append {label}: {os_error_detail(exc)}") from exc
|
|
787
|
+
finally:
|
|
788
|
+
if fd >= 0:
|
|
789
|
+
try:
|
|
790
|
+
os.close(fd)
|
|
791
|
+
except OSError:
|
|
792
|
+
pass
|
|
793
|
+
try:
|
|
794
|
+
os.fsync(parent_fd)
|
|
795
|
+
except OSError:
|
|
796
|
+
pass
|
|
797
|
+
try:
|
|
798
|
+
os.close(parent_fd)
|
|
799
|
+
except OSError:
|
|
800
|
+
pass
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
def preflight_append_jsonl_no_follow(path: Path, *, label: str) -> None:
|
|
804
|
+
"""Validate that a JSONL append target is no-follow appendable before side effects."""
|
|
805
|
+
path = normalize_local_path(path)
|
|
806
|
+
parent_fd = open_directory_no_follow(path.parent, label=f"{label} parent", create=True)
|
|
807
|
+
if parent_fd is None: # pragma: no cover - create=True never returns None.
|
|
808
|
+
raise RegistryError(f"could not inspect {label} parent")
|
|
809
|
+
fd = -1
|
|
810
|
+
temp_leaf: str | None = None
|
|
811
|
+
try:
|
|
812
|
+
leaf = _leaf_name(path, label=label)
|
|
813
|
+
exists = _precheck_regular_leaf(parent_fd, leaf, label=label, missing_ok=True)
|
|
814
|
+
if exists:
|
|
815
|
+
fd = os.open(leaf, _append_file_open_flags(label=label), 0o600, dir_fd=parent_fd)
|
|
816
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
817
|
+
raise RegistryError(f"{label} must be a regular file")
|
|
818
|
+
return
|
|
819
|
+
for _attempt in range(20):
|
|
820
|
+
candidate = _leaf_name(Path(f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.preflight"), label=f"{label} preflight")
|
|
821
|
+
try:
|
|
822
|
+
fd = os.open(candidate, _temp_file_open_flags(label=f"{label} preflight"), 0o600, dir_fd=parent_fd)
|
|
823
|
+
temp_leaf = candidate
|
|
824
|
+
break
|
|
825
|
+
except FileExistsError:
|
|
826
|
+
continue
|
|
827
|
+
if fd < 0 or temp_leaf is None:
|
|
828
|
+
raise RegistryError(f"could not create temporary {label} preflight")
|
|
829
|
+
if not stat.S_ISREG(os.fstat(fd).st_mode):
|
|
830
|
+
raise RegistryError(f"{label} preflight temp must be a regular file")
|
|
831
|
+
except OSError as exc:
|
|
832
|
+
raise RegistryError(f"could not append {label}: {os_error_detail(exc)}") from exc
|
|
833
|
+
finally:
|
|
834
|
+
if fd >= 0:
|
|
835
|
+
try:
|
|
836
|
+
os.close(fd)
|
|
837
|
+
except OSError:
|
|
838
|
+
pass
|
|
839
|
+
if temp_leaf is not None:
|
|
840
|
+
try:
|
|
841
|
+
os.unlink(temp_leaf, dir_fd=parent_fd)
|
|
842
|
+
except OSError:
|
|
843
|
+
pass
|
|
844
|
+
try:
|
|
845
|
+
os.close(parent_fd)
|
|
846
|
+
except OSError:
|
|
847
|
+
pass
|
|
848
|
+
|
|
849
|
+
|
|
279
850
|
def resolve_root(raw_root: str | None) -> Path:
|
|
280
851
|
root = Path(raw_root) if raw_root else Path.cwd()
|
|
281
852
|
try:
|
|
@@ -286,27 +857,25 @@ def resolve_root(raw_root: str | None) -> Path:
|
|
|
286
857
|
|
|
287
858
|
def resolve_config_path(root: Path, raw_config: str | None) -> Path:
|
|
288
859
|
if raw_config:
|
|
289
|
-
candidate = Path(raw_config)
|
|
290
|
-
if not candidate.is_absolute():
|
|
291
|
-
candidate = root / candidate
|
|
860
|
+
candidate = Path(raw_config)
|
|
292
861
|
else:
|
|
293
|
-
candidate =
|
|
294
|
-
|
|
295
|
-
resolved = candidate.resolve(strict=False)
|
|
296
|
-
except OSError as exc:
|
|
297
|
-
raise RegistryError(f"could not resolve config path: {candidate}: {exc}") from exc
|
|
298
|
-
try:
|
|
299
|
-
resolved.relative_to(root)
|
|
300
|
-
except ValueError as exc:
|
|
301
|
-
raise RegistryError(f"config path must stay inside project root: {resolved}") from exc
|
|
302
|
-
return resolved
|
|
862
|
+
candidate = DEFAULT_CONFIG
|
|
863
|
+
return normalize_project_path(root, candidate, label="config path")
|
|
303
864
|
|
|
304
865
|
|
|
305
866
|
def load_config(path: Path) -> dict[str, Any]:
|
|
306
|
-
|
|
867
|
+
loaded = read_bounded_regular_file(path, max_bytes=MAX_CONFIG_BYTES, label="config", missing_ok=True)
|
|
868
|
+
if loaded is None:
|
|
307
869
|
return {"schema_version": CONFIG_SCHEMA_VERSION, "enabled": []}
|
|
870
|
+
raw, truncated = loaded
|
|
871
|
+
if truncated:
|
|
872
|
+
raise RegistryError("config exceeded max bytes")
|
|
308
873
|
try:
|
|
309
|
-
|
|
874
|
+
text = raw.decode("utf-8")
|
|
875
|
+
except UnicodeDecodeError as exc:
|
|
876
|
+
raise RegistryError(f"could not decode config UTF-8: {path}: {exc.reason}") from exc
|
|
877
|
+
try:
|
|
878
|
+
data = json.loads(text)
|
|
310
879
|
except json.JSONDecodeError as exc:
|
|
311
880
|
raise RegistryError(f"could not parse config JSON: {path}: {exc.msg}") from exc
|
|
312
881
|
except OSError as exc:
|
|
@@ -328,11 +897,8 @@ def write_config(path: Path, enabled: set[str]) -> dict[str, Any]:
|
|
|
328
897
|
"updated_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
|
329
898
|
"enabled": sorted(enabled),
|
|
330
899
|
}
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
334
|
-
except OSError as exc:
|
|
335
|
-
raise RegistryError(f"could not write config: {path}: {exc}") from exc
|
|
900
|
+
payload = (json.dumps(data, indent=2, sort_keys=True) + "\n").encode("utf-8")
|
|
901
|
+
write_regular_file_no_follow(path, payload, label="config")
|
|
336
902
|
return data
|
|
337
903
|
|
|
338
904
|
|
|
@@ -452,6 +1018,7 @@ def command_disable(args: argparse.Namespace) -> int:
|
|
|
452
1018
|
|
|
453
1019
|
DIFF_GIT_RE = re.compile(r"^diff --git (?P<old>\S+) (?P<new>\S+)$")
|
|
454
1020
|
HUNK_RE = re.compile(r"^@@\s+-(?P<old_start>\d+)(?:,(?P<old_count>\d+))?\s+\+(?P<new_start>\d+)(?:,(?P<new_count>\d+))?\s+@@(?P<section>.*)$")
|
|
1021
|
+
CONTEXT_DIFF_ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
|
|
455
1022
|
|
|
456
1023
|
|
|
457
1024
|
def read_bounded_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
|
|
@@ -459,18 +1026,16 @@ def read_bounded_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
|
|
|
459
1026
|
if args.input:
|
|
460
1027
|
path = Path(args.input)
|
|
461
1028
|
source_label = source_label or str(path)
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
except OSError as exc:
|
|
466
|
-
raise RegistryError(f"could not read input: {path}: {exc}") from exc
|
|
1029
|
+
loaded = read_bounded_regular_file(path, max_bytes=MAX_CONTEXT_DIFF_INPUT_BYTES, label="input")
|
|
1030
|
+
assert loaded is not None
|
|
1031
|
+
raw, truncated = loaded
|
|
467
1032
|
else:
|
|
468
1033
|
source_label = source_label or "stdin"
|
|
469
1034
|
raw = sys.stdin.buffer.read(MAX_CONTEXT_DIFF_INPUT_BYTES + 1)
|
|
1035
|
+
truncated = len(raw) > MAX_CONTEXT_DIFF_INPUT_BYTES
|
|
1036
|
+
raw = raw[:MAX_CONTEXT_DIFF_INPUT_BYTES]
|
|
470
1037
|
if not raw:
|
|
471
1038
|
raise RegistryError("context-diff-compaction plan requires diff input on stdin or --input")
|
|
472
|
-
truncated = len(raw) > MAX_CONTEXT_DIFF_INPUT_BYTES
|
|
473
|
-
raw = raw[:MAX_CONTEXT_DIFF_INPUT_BYTES]
|
|
474
1039
|
text = raw.decode("utf-8", errors="replace")
|
|
475
1040
|
metadata = {
|
|
476
1041
|
"source_label": source_label,
|
|
@@ -492,13 +1057,16 @@ def strip_diff_prefix(path: str) -> str:
|
|
|
492
1057
|
def summarize_diff(text: str, *, max_files: int = 50, max_hunks: int = 200) -> dict[str, Any]:
|
|
493
1058
|
files: list[dict[str, Any]] = []
|
|
494
1059
|
current: dict[str, Any] | None = None
|
|
1060
|
+
current_hunk: dict[str, Any] | None = None
|
|
495
1061
|
total_hunks = 0
|
|
1062
|
+
summarized_hunks = 0
|
|
496
1063
|
lines = text.splitlines()
|
|
497
1064
|
diff_header_count = 0
|
|
498
1065
|
for line_number, line in enumerate(lines, start=1):
|
|
499
1066
|
match = DIFF_GIT_RE.match(line)
|
|
500
1067
|
if match:
|
|
501
1068
|
diff_header_count += 1
|
|
1069
|
+
current_hunk = None
|
|
502
1070
|
if len(files) >= max_files:
|
|
503
1071
|
current = None
|
|
504
1072
|
continue
|
|
@@ -515,28 +1083,199 @@ def summarize_diff(text: str, *, max_files: int = 50, max_hunks: int = 200) -> d
|
|
|
515
1083
|
total_hunks += 1
|
|
516
1084
|
if current is None:
|
|
517
1085
|
if len(files) >= max_files:
|
|
1086
|
+
current_hunk = None
|
|
518
1087
|
continue
|
|
519
1088
|
current = {"old_path": None, "new_path": None, "diff_header_line": None, "hunks": []}
|
|
520
1089
|
files.append(current)
|
|
521
1090
|
if len(current["hunks"]) < max_hunks:
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
1091
|
+
current_hunk = {
|
|
1092
|
+
"line": line_number,
|
|
1093
|
+
"old_start": int(hunk.group("old_start")),
|
|
1094
|
+
"old_count": int(hunk.group("old_count") or "1"),
|
|
1095
|
+
"new_start": int(hunk.group("new_start")),
|
|
1096
|
+
"new_count": int(hunk.group("new_count") or "1"),
|
|
1097
|
+
"section": hunk.group("section").strip()[:120],
|
|
1098
|
+
"added_lines": 0,
|
|
1099
|
+
"removed_lines": 0,
|
|
1100
|
+
"context_lines": 0,
|
|
1101
|
+
"body_lines": 0,
|
|
1102
|
+
"reviewable": False,
|
|
1103
|
+
}
|
|
1104
|
+
current["hunks"].append(current_hunk)
|
|
1105
|
+
summarized_hunks += 1
|
|
1106
|
+
else:
|
|
1107
|
+
current_hunk = None
|
|
1108
|
+
continue
|
|
1109
|
+
if current_hunk is not None:
|
|
1110
|
+
changed = False
|
|
1111
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
1112
|
+
current_hunk["added_lines"] += 1
|
|
1113
|
+
changed = True
|
|
1114
|
+
elif line.startswith("-") and not line.startswith("---"):
|
|
1115
|
+
current_hunk["removed_lines"] += 1
|
|
1116
|
+
changed = True
|
|
1117
|
+
elif line.startswith(" "):
|
|
1118
|
+
current_hunk["context_lines"] += 1
|
|
1119
|
+
else:
|
|
1120
|
+
continue
|
|
1121
|
+
current_hunk["body_lines"] += 1
|
|
1122
|
+
reviewable_hunks = 0
|
|
1123
|
+
malformed_hunks = 0
|
|
1124
|
+
for file_summary in files:
|
|
1125
|
+
for hunk_summary in file_summary["hunks"]:
|
|
1126
|
+
old_body_lines = hunk_summary["removed_lines"] + hunk_summary["context_lines"]
|
|
1127
|
+
new_body_lines = hunk_summary["added_lines"] + hunk_summary["context_lines"]
|
|
1128
|
+
has_changes = bool(hunk_summary["added_lines"] or hunk_summary["removed_lines"])
|
|
1129
|
+
well_formed = (
|
|
1130
|
+
old_body_lines == hunk_summary["old_count"]
|
|
1131
|
+
and new_body_lines == hunk_summary["new_count"]
|
|
1132
|
+
)
|
|
1133
|
+
hunk_summary["old_body_lines"] = old_body_lines
|
|
1134
|
+
hunk_summary["new_body_lines"] = new_body_lines
|
|
1135
|
+
hunk_summary["has_changes"] = has_changes
|
|
1136
|
+
hunk_summary["well_formed"] = well_formed
|
|
1137
|
+
hunk_summary["reviewable"] = bool(has_changes and well_formed)
|
|
1138
|
+
if hunk_summary["reviewable"]:
|
|
1139
|
+
reviewable_hunks += 1
|
|
1140
|
+
elif not well_formed:
|
|
1141
|
+
malformed_hunks += 1
|
|
532
1142
|
return {
|
|
533
1143
|
"file_count": len(files),
|
|
534
1144
|
"hunk_count": total_hunks,
|
|
1145
|
+
"summarized_hunk_count": summarized_hunks,
|
|
1146
|
+
"reviewable_hunk_count": reviewable_hunks,
|
|
1147
|
+
"malformed_hunk_count": malformed_hunks,
|
|
535
1148
|
"truncated_files": max(0, diff_header_count - len(files)),
|
|
1149
|
+
"truncated_hunks": max(0, total_hunks - summarized_hunks),
|
|
536
1150
|
"files": files,
|
|
537
1151
|
}
|
|
538
1152
|
|
|
539
1153
|
|
|
1154
|
+
def valid_context_diff_reexpand_command(receipt_id: str | None, command: str | None) -> tuple[bool, str | None]:
|
|
1155
|
+
if not receipt_id or not command:
|
|
1156
|
+
return False, "missing_exact_receipt_or_reexpand_command"
|
|
1157
|
+
if not CONTEXT_DIFF_ARTIFACT_ID_RE.fullmatch(receipt_id):
|
|
1158
|
+
return False, "invalid_reexpand_command"
|
|
1159
|
+
if any(token in command for token in (";", "|", "&", ">", "<", "`", "$", "\n", "\r")):
|
|
1160
|
+
return False, "invalid_reexpand_command"
|
|
1161
|
+
try:
|
|
1162
|
+
argv = shlex.split(command)
|
|
1163
|
+
except ValueError:
|
|
1164
|
+
return False, "invalid_reexpand_command"
|
|
1165
|
+
if argv == ["context-guard-artifact", "get", receipt_id, "--full"]:
|
|
1166
|
+
return True, None
|
|
1167
|
+
if argv == ["context-guard", "artifact", "get", receipt_id, "--full"]:
|
|
1168
|
+
return True, None
|
|
1169
|
+
return False, "invalid_reexpand_command"
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
def context_diff_artifact_read_dirs() -> list[Path]:
|
|
1173
|
+
return [DEFAULT_CONTEXT_DIFF_ARTIFACT_DIR, LEGACY_CONTEXT_DIFF_ARTIFACT_DIR]
|
|
1174
|
+
|
|
1175
|
+
|
|
1176
|
+
def context_diff_artifact_paths(directory: Path, receipt_id: str) -> tuple[Path, Path]:
|
|
1177
|
+
return directory / f"{receipt_id}.txt", directory / f"{receipt_id}.json"
|
|
1178
|
+
|
|
1179
|
+
|
|
1180
|
+
def verify_context_diff_artifact(
|
|
1181
|
+
receipt_id: str | None,
|
|
1182
|
+
*,
|
|
1183
|
+
expected_sha256: str,
|
|
1184
|
+
expected_bytes: int,
|
|
1185
|
+
) -> tuple[bool, str | None, dict[str, Any]]:
|
|
1186
|
+
if not receipt_id or not CONTEXT_DIFF_ARTIFACT_ID_RE.fullmatch(receipt_id):
|
|
1187
|
+
return False, "invalid_reexpand_command", {"checked": False, "read_directories": []}
|
|
1188
|
+
read_dirs = context_diff_artifact_read_dirs()
|
|
1189
|
+
details: dict[str, Any] = {
|
|
1190
|
+
"checked": True,
|
|
1191
|
+
"read_directories": [str(path) for path in read_dirs],
|
|
1192
|
+
"matched_directory": None,
|
|
1193
|
+
"content_sha256": None,
|
|
1194
|
+
"content_bytes": None,
|
|
1195
|
+
}
|
|
1196
|
+
for directory in read_dirs:
|
|
1197
|
+
content_path, meta_path = context_diff_artifact_paths(directory, receipt_id)
|
|
1198
|
+
meta_loaded = read_bounded_regular_file(
|
|
1199
|
+
meta_path,
|
|
1200
|
+
max_bytes=MAX_CONTEXT_DIFF_ARTIFACT_METADATA_BYTES,
|
|
1201
|
+
label="context-diff artifact metadata",
|
|
1202
|
+
missing_ok=True,
|
|
1203
|
+
)
|
|
1204
|
+
content_loaded = read_bounded_regular_file(
|
|
1205
|
+
content_path,
|
|
1206
|
+
max_bytes=max(MAX_CONTEXT_DIFF_INPUT_BYTES, expected_bytes),
|
|
1207
|
+
label="context-diff artifact content",
|
|
1208
|
+
missing_ok=True,
|
|
1209
|
+
)
|
|
1210
|
+
if meta_loaded is None and content_loaded is None:
|
|
1211
|
+
continue
|
|
1212
|
+
if meta_loaded is None or content_loaded is None:
|
|
1213
|
+
return False, "artifact_receipt_invalid", details
|
|
1214
|
+
meta_raw, meta_truncated = meta_loaded
|
|
1215
|
+
content_raw, content_truncated = content_loaded
|
|
1216
|
+
if meta_truncated or content_truncated:
|
|
1217
|
+
return False, "artifact_receipt_invalid", details
|
|
1218
|
+
try:
|
|
1219
|
+
metadata = json.loads(meta_raw.decode("utf-8"))
|
|
1220
|
+
except (UnicodeDecodeError, json.JSONDecodeError):
|
|
1221
|
+
return False, "artifact_receipt_invalid", details
|
|
1222
|
+
if not isinstance(metadata, dict) or metadata.get("artifact_id") != receipt_id:
|
|
1223
|
+
return False, "artifact_receipt_invalid", details
|
|
1224
|
+
stored = metadata.get("stored_output")
|
|
1225
|
+
stored_sha = stored.get("sha256") if isinstance(stored, dict) else None
|
|
1226
|
+
stored_bytes = stored.get("bytes") if isinstance(stored, dict) else None
|
|
1227
|
+
actual_sha = hashlib.sha256(content_raw).hexdigest()
|
|
1228
|
+
actual_bytes = len(content_raw)
|
|
1229
|
+
details.update({
|
|
1230
|
+
"matched_directory": str(directory),
|
|
1231
|
+
"content_sha256": actual_sha,
|
|
1232
|
+
"content_bytes": actual_bytes,
|
|
1233
|
+
})
|
|
1234
|
+
if stored_sha != actual_sha or stored_bytes != actual_bytes:
|
|
1235
|
+
return False, "artifact_receipt_invalid", details
|
|
1236
|
+
if actual_sha != expected_sha256 or actual_bytes != expected_bytes:
|
|
1237
|
+
return False, "artifact_content_mismatch", details
|
|
1238
|
+
return True, None, details
|
|
1239
|
+
return False, "artifact_receipt_not_found", details
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
def read_context_diff_replacement(args: argparse.Namespace) -> tuple[str | None, dict[str, Any]]:
|
|
1243
|
+
if args.replacement_text is not None and args.replacement_file:
|
|
1244
|
+
raise RegistryError("context-diff-compaction emit accepts only one of --replacement-text or --replacement-file")
|
|
1245
|
+
if args.replacement_text is not None:
|
|
1246
|
+
text = str(args.replacement_text)
|
|
1247
|
+
raw = text.encode("utf-8")
|
|
1248
|
+
truncated = len(raw) > MAX_CONTEXT_DIFF_REPLACEMENT_BYTES
|
|
1249
|
+
raw = raw[:MAX_CONTEXT_DIFF_REPLACEMENT_BYTES]
|
|
1250
|
+
text = raw.decode("utf-8", errors="replace")
|
|
1251
|
+
source_label = "inline"
|
|
1252
|
+
elif args.replacement_file:
|
|
1253
|
+
path = Path(args.replacement_file)
|
|
1254
|
+
loaded = read_bounded_regular_file(
|
|
1255
|
+
path,
|
|
1256
|
+
max_bytes=MAX_CONTEXT_DIFF_REPLACEMENT_BYTES,
|
|
1257
|
+
label="context-diff replacement",
|
|
1258
|
+
)
|
|
1259
|
+
assert loaded is not None
|
|
1260
|
+
raw, truncated = loaded
|
|
1261
|
+
text = raw.decode("utf-8", errors="replace")
|
|
1262
|
+
source_label = str(path)
|
|
1263
|
+
else:
|
|
1264
|
+
text = None
|
|
1265
|
+
raw = b""
|
|
1266
|
+
truncated = False
|
|
1267
|
+
source_label = None
|
|
1268
|
+
metadata = {
|
|
1269
|
+
"source_label": source_label,
|
|
1270
|
+
"bytes": len(raw),
|
|
1271
|
+
"lines": len(text.splitlines()) if text is not None else 0,
|
|
1272
|
+
"sha256": hashlib.sha256(raw).hexdigest() if text is not None else None,
|
|
1273
|
+
"truncated": truncated,
|
|
1274
|
+
"max_bytes": MAX_CONTEXT_DIFF_REPLACEMENT_BYTES,
|
|
1275
|
+
}
|
|
1276
|
+
return text, metadata
|
|
1277
|
+
|
|
1278
|
+
|
|
540
1279
|
def context_diff_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
541
1280
|
text, input_meta = read_bounded_input(args)
|
|
542
1281
|
summary = summarize_diff(text)
|
|
@@ -548,7 +1287,11 @@ def context_diff_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
|
548
1287
|
readiness_blockers.append("missing_exact_receipt_or_reexpand_command")
|
|
549
1288
|
if input_meta["truncated"]:
|
|
550
1289
|
readiness_blockers.append("input_truncated")
|
|
551
|
-
if summary
|
|
1290
|
+
if summary.get("truncated_files", 0) or summary.get("truncated_hunks", 0):
|
|
1291
|
+
readiness_blockers.append("diff_summary_truncated")
|
|
1292
|
+
if summary.get("malformed_hunk_count", 0):
|
|
1293
|
+
readiness_blockers.append("malformed_diff_hunks")
|
|
1294
|
+
if summary["file_count"] == 0 or summary.get("reviewable_hunk_count", 0) == 0:
|
|
552
1295
|
readiness_blockers.append("no_reviewable_diff_hunks")
|
|
553
1296
|
status = (
|
|
554
1297
|
"ready_for_human_review"
|
|
@@ -577,7 +1320,7 @@ def context_diff_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
|
577
1320
|
"artifact_id": receipt_id,
|
|
578
1321
|
"cli": reexpand_command,
|
|
579
1322
|
"verified": False,
|
|
580
|
-
"note": "
|
|
1323
|
+
"note": "Dry-run planning records user-supplied handles for human review only; it does not verify local receipt storage.",
|
|
581
1324
|
},
|
|
582
1325
|
"review_plan": {
|
|
583
1326
|
"summary": summary,
|
|
@@ -620,6 +1363,119 @@ def command_plan_context_diff_compaction(args: argparse.Namespace) -> int:
|
|
|
620
1363
|
return 0
|
|
621
1364
|
|
|
622
1365
|
|
|
1366
|
+
def context_diff_emit_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
1367
|
+
payload = context_diff_plan_payload(args)
|
|
1368
|
+
receipt_id = args.receipt_id.strip() if args.receipt_id else None
|
|
1369
|
+
reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
|
|
1370
|
+
reexpand_valid, reexpand_blocker = valid_context_diff_reexpand_command(receipt_id, reexpand_command)
|
|
1371
|
+
replacement_text, replacement_meta = read_context_diff_replacement(args)
|
|
1372
|
+
artifact_verified = False
|
|
1373
|
+
artifact_blocker = None
|
|
1374
|
+
artifact_verification: dict[str, Any] = {"checked": False, "read_directories": []}
|
|
1375
|
+
if reexpand_valid:
|
|
1376
|
+
artifact_verified, artifact_blocker, artifact_verification = verify_context_diff_artifact(
|
|
1377
|
+
receipt_id,
|
|
1378
|
+
expected_sha256=payload["input"]["sha256"],
|
|
1379
|
+
expected_bytes=payload["input"]["bytes"],
|
|
1380
|
+
)
|
|
1381
|
+
|
|
1382
|
+
blockers = list(payload["review_plan"]["readiness_blockers"])
|
|
1383
|
+
if reexpand_blocker:
|
|
1384
|
+
blockers.append(reexpand_blocker)
|
|
1385
|
+
if artifact_blocker:
|
|
1386
|
+
blockers.append(artifact_blocker)
|
|
1387
|
+
if replacement_text is None or not replacement_text.strip():
|
|
1388
|
+
blockers.append("missing_compacted_replacement")
|
|
1389
|
+
if replacement_meta["truncated"]:
|
|
1390
|
+
blockers.append("replacement_truncated")
|
|
1391
|
+
if (
|
|
1392
|
+
replacement_text is not None
|
|
1393
|
+
and not replacement_meta["truncated"]
|
|
1394
|
+
and replacement_meta["bytes"] >= payload["input"]["bytes"]
|
|
1395
|
+
):
|
|
1396
|
+
blockers.append("replacement_not_smaller_than_input")
|
|
1397
|
+
blockers = list(dict.fromkeys(blockers))
|
|
1398
|
+
ready = not blockers
|
|
1399
|
+
|
|
1400
|
+
replacement_record = None
|
|
1401
|
+
if ready and replacement_text is not None:
|
|
1402
|
+
replacement_record = {
|
|
1403
|
+
"text": replacement_text,
|
|
1404
|
+
"bytes": replacement_meta["bytes"],
|
|
1405
|
+
"lines": replacement_meta["lines"],
|
|
1406
|
+
"sha256": replacement_meta["sha256"],
|
|
1407
|
+
"source_label": replacement_meta["source_label"],
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1410
|
+
payload["mode"] = "emit"
|
|
1411
|
+
payload["status"] = "replacement_emitted" if ready else "blocked_until_emit_ready"
|
|
1412
|
+
payload["transform_policy"] = {
|
|
1413
|
+
"automatic_compaction": False,
|
|
1414
|
+
"lossy_replacement_allowed": ready,
|
|
1415
|
+
"semantic_rewrite_allowed": False,
|
|
1416
|
+
"caller_supplied_replacement_required": True,
|
|
1417
|
+
"human_review_required": True,
|
|
1418
|
+
"stable_runtime_behavior_changed": False,
|
|
1419
|
+
}
|
|
1420
|
+
payload["exact_retrieval"] = {
|
|
1421
|
+
"required": True,
|
|
1422
|
+
"available": bool(receipt_id and reexpand_command and reexpand_valid and artifact_verified),
|
|
1423
|
+
"artifact_id": receipt_id,
|
|
1424
|
+
"cli": reexpand_command,
|
|
1425
|
+
"verified": artifact_verified,
|
|
1426
|
+
"valid_command_shape": reexpand_valid,
|
|
1427
|
+
"verification": artifact_verification,
|
|
1428
|
+
"note": "Emit mode validates exact local artifact command shape and verifies local artifact content matches the input diff.",
|
|
1429
|
+
}
|
|
1430
|
+
payload["replacement"] = replacement_meta
|
|
1431
|
+
payload["review_plan"]["readiness_blockers"] = blockers
|
|
1432
|
+
payload["review_plan"]["bounded_loss_disclosure"] = (
|
|
1433
|
+
"Compacted replacement is caller supplied and lossy; use exact_retrieval.cli to recover the original diff "
|
|
1434
|
+
"before relying on omitted details."
|
|
1435
|
+
)
|
|
1436
|
+
payload["review_plan"]["next_steps"] = [
|
|
1437
|
+
"Human-review the compacted replacement against the original diff before use.",
|
|
1438
|
+
"Use exact_retrieval.cli to recover the original diff whenever omitted details matter.",
|
|
1439
|
+
"Treat bytes_before/bytes_after as local proxy evidence only; do not claim hosted token/cost savings.",
|
|
1440
|
+
]
|
|
1441
|
+
payload["claim_boundary"] = (
|
|
1442
|
+
"Explicit local context-diff replacement emission only; smaller local diffs are proxy evidence and are not "
|
|
1443
|
+
"hosted API token or cost savings evidence."
|
|
1444
|
+
)
|
|
1445
|
+
bytes_after = replacement_meta["bytes"] if replacement_text is not None else 0
|
|
1446
|
+
payload["compaction_evidence"] = {
|
|
1447
|
+
"bytes_before": payload["input"]["bytes"],
|
|
1448
|
+
"bytes_after": bytes_after,
|
|
1449
|
+
"byte_reduction": max(0, payload["input"]["bytes"] - bytes_after),
|
|
1450
|
+
"byte_reduction_proxy_only": True,
|
|
1451
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
1452
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
1453
|
+
}
|
|
1454
|
+
payload["compacted_replacement"] = replacement_record
|
|
1455
|
+
return payload
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
def command_emit_context_diff_compaction(args: argparse.Namespace) -> int:
|
|
1459
|
+
payload = context_diff_emit_payload(args)
|
|
1460
|
+
if args.json:
|
|
1461
|
+
emit_json(payload)
|
|
1462
|
+
else:
|
|
1463
|
+
if payload["status"] == "replacement_emitted":
|
|
1464
|
+
print("ContextGuard context-diff compact replacement emitted")
|
|
1465
|
+
print(
|
|
1466
|
+
f"Replacement: bytes={payload['replacement']['bytes']} "
|
|
1467
|
+
f"sha256={payload['replacement']['sha256']}"
|
|
1468
|
+
)
|
|
1469
|
+
print(f"Exact re-expand: {payload['exact_retrieval']['cli']}")
|
|
1470
|
+
else:
|
|
1471
|
+
print("ContextGuard context-diff compact replacement blocked")
|
|
1472
|
+
print(f"Status: {payload['status']}")
|
|
1473
|
+
if payload["review_plan"]["readiness_blockers"]:
|
|
1474
|
+
print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
|
|
1475
|
+
print(payload["claim_boundary"])
|
|
1476
|
+
return 0 if payload["status"] == "replacement_emitted" else 1
|
|
1477
|
+
|
|
1478
|
+
|
|
623
1479
|
def clean_values(values: list[str] | None) -> list[str]:
|
|
624
1480
|
return [value.strip() for value in values or [] if value.strip()]
|
|
625
1481
|
|
|
@@ -678,23 +1534,21 @@ def read_visual_ocr_text(args: argparse.Namespace) -> dict[str, Any]:
|
|
|
678
1534
|
if args.ocr_text_file is not None:
|
|
679
1535
|
path = Path(args.ocr_text_file)
|
|
680
1536
|
source_label = args.ocr_source_label.strip() if args.ocr_source_label else path.name
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
except OSError as exc:
|
|
685
|
-
raise RegistryError(f"could not read OCR text file: {path}: {exc}") from exc
|
|
1537
|
+
loaded = read_bounded_regular_file(path, max_bytes=MAX_VISUAL_OCR_TEXT_BYTES, label="OCR text file")
|
|
1538
|
+
assert loaded is not None
|
|
1539
|
+
raw, truncated = loaded
|
|
686
1540
|
source_type = "file"
|
|
687
1541
|
elif args.ocr_text is not None:
|
|
688
1542
|
raw = args.ocr_text.encode("utf-8")
|
|
689
1543
|
source_label = args.ocr_source_label.strip() if args.ocr_source_label else "inline"
|
|
690
1544
|
source_type = "inline"
|
|
1545
|
+
truncated = len(raw) > MAX_VISUAL_OCR_TEXT_BYTES
|
|
1546
|
+
raw = raw[:MAX_VISUAL_OCR_TEXT_BYTES]
|
|
691
1547
|
else:
|
|
692
1548
|
raw = b""
|
|
693
1549
|
source_label = args.ocr_source_label.strip() if args.ocr_source_label else None
|
|
694
1550
|
source_type = None
|
|
695
|
-
|
|
696
|
-
truncated = len(raw) > MAX_VISUAL_OCR_TEXT_BYTES
|
|
697
|
-
raw = raw[:MAX_VISUAL_OCR_TEXT_BYTES]
|
|
1551
|
+
truncated = False
|
|
698
1552
|
try:
|
|
699
1553
|
text = raw.decode("utf-8")
|
|
700
1554
|
valid_encoding = True
|
|
@@ -710,6 +1564,7 @@ def read_visual_ocr_text(args: argparse.Namespace) -> dict[str, Any]:
|
|
|
710
1564
|
"truncated": truncated,
|
|
711
1565
|
"max_bytes": MAX_VISUAL_OCR_TEXT_BYTES,
|
|
712
1566
|
"valid_utf8": valid_encoding,
|
|
1567
|
+
"text": text,
|
|
713
1568
|
"text_preview": text,
|
|
714
1569
|
"has_text": bool(text.strip()),
|
|
715
1570
|
}
|
|
@@ -872,6 +1727,93 @@ def command_plan_visual_crop_ocr(args: argparse.Namespace) -> int:
|
|
|
872
1727
|
return 0
|
|
873
1728
|
|
|
874
1729
|
|
|
1730
|
+
def visual_crop_ocr_evidence_pack_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
1731
|
+
payload = visual_crop_ocr_plan_payload(args)
|
|
1732
|
+
blockers = list(payload["review_plan"]["readiness_blockers"])
|
|
1733
|
+
ready = not blockers
|
|
1734
|
+
crop = payload["derived_evidence"]["crop"]
|
|
1735
|
+
ocr = payload["derived_evidence"]["ocr"]
|
|
1736
|
+
|
|
1737
|
+
image_area = None
|
|
1738
|
+
crop_area = None
|
|
1739
|
+
if crop["bounds"] is not None and crop["image_size"] is not None:
|
|
1740
|
+
image_area = crop["image_size"]["width"] * crop["image_size"]["height"]
|
|
1741
|
+
crop_area = crop["bounds"]["width"] * crop["bounds"]["height"]
|
|
1742
|
+
|
|
1743
|
+
payload["mode"] = "emit"
|
|
1744
|
+
payload["status"] = "evidence_pack_emitted" if ready else "blocked_until_visual_evidence_pack_ready"
|
|
1745
|
+
payload["guardrails"] = dict(payload["guardrails"])
|
|
1746
|
+
payload["guardrails"].update({
|
|
1747
|
+
"candidate_replacement_allowed": False,
|
|
1748
|
+
"evidence_pack_allowed": ready,
|
|
1749
|
+
"runtime_writes_files": False,
|
|
1750
|
+
"external_services_called": False,
|
|
1751
|
+
})
|
|
1752
|
+
payload["claim_boundary"] = (
|
|
1753
|
+
"Explicit local visual crop/OCR evidence-pack emission only; image area and OCR byte reductions are proxy "
|
|
1754
|
+
"evidence and are not hosted API token or cost savings evidence."
|
|
1755
|
+
)
|
|
1756
|
+
payload["reduction_evidence"] = {
|
|
1757
|
+
"image_area_before": image_area,
|
|
1758
|
+
"crop_area_after": crop_area if crop["available"] else None,
|
|
1759
|
+
"crop_area_reduction": (image_area - crop_area) if crop["available"] and image_area is not None and crop_area is not None else None,
|
|
1760
|
+
"ocr_text_bytes": ocr["metadata"]["bytes"] if ocr["available"] else None,
|
|
1761
|
+
"proxy_only": True,
|
|
1762
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
1763
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
1764
|
+
}
|
|
1765
|
+
payload["review_plan"]["next_steps"] = [
|
|
1766
|
+
"Human-review crop/OCR evidence against the full visual evidence receipt before using it as a substitute.",
|
|
1767
|
+
"Read missed-context notes before relying on omitted visual regions.",
|
|
1768
|
+
"Treat image area/OCR byte reductions as local proxy evidence only; do not claim hosted token/cost savings.",
|
|
1769
|
+
]
|
|
1770
|
+
if ready:
|
|
1771
|
+
payload["evidence_pack"] = {
|
|
1772
|
+
"schema_version": "contextguard.visual-evidence-pack.v1",
|
|
1773
|
+
"full_visual_evidence": payload["full_visual_evidence"],
|
|
1774
|
+
"crop_evidence": crop if crop["available"] else None,
|
|
1775
|
+
"ocr_evidence": (
|
|
1776
|
+
{
|
|
1777
|
+
"source_type": ocr["source_type"],
|
|
1778
|
+
"source_label": ocr["source_label"],
|
|
1779
|
+
"text": ocr["text_preview"],
|
|
1780
|
+
"metadata": ocr["metadata"],
|
|
1781
|
+
"confidence": ocr["confidence"],
|
|
1782
|
+
"error_notes": ocr["error_notes"],
|
|
1783
|
+
}
|
|
1784
|
+
if ocr["available"]
|
|
1785
|
+
else None
|
|
1786
|
+
),
|
|
1787
|
+
"missed_context_notes": payload["review_plan"]["missed_context_notes"],
|
|
1788
|
+
"guardrails": payload["guardrails"],
|
|
1789
|
+
"reduction_evidence": payload["reduction_evidence"],
|
|
1790
|
+
"claim_boundary": payload["claim_boundary"],
|
|
1791
|
+
}
|
|
1792
|
+
return payload
|
|
1793
|
+
|
|
1794
|
+
|
|
1795
|
+
def command_emit_visual_crop_ocr(args: argparse.Namespace) -> int:
|
|
1796
|
+
payload = visual_crop_ocr_evidence_pack_payload(args)
|
|
1797
|
+
if args.json:
|
|
1798
|
+
emit_json(payload)
|
|
1799
|
+
else:
|
|
1800
|
+
if payload["status"] == "evidence_pack_emitted":
|
|
1801
|
+
print("ContextGuard visual crop/OCR evidence pack emitted")
|
|
1802
|
+
print(f"Full evidence receipt: {payload['full_visual_evidence']['receipt_id']}")
|
|
1803
|
+
print(
|
|
1804
|
+
"Derived evidence: "
|
|
1805
|
+
f"crop={payload['derived_evidence']['crop']['available']} "
|
|
1806
|
+
f"ocr={payload['derived_evidence']['ocr']['available']}"
|
|
1807
|
+
)
|
|
1808
|
+
else:
|
|
1809
|
+
print("ContextGuard visual crop/OCR evidence pack blocked")
|
|
1810
|
+
print(f"Status: {payload['status']}")
|
|
1811
|
+
if payload["review_plan"]["readiness_blockers"]:
|
|
1812
|
+
print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
|
|
1813
|
+
print(payload["claim_boundary"])
|
|
1814
|
+
return 0 if payload["status"] == "evidence_pack_emitted" else 1
|
|
1815
|
+
|
|
1816
|
+
|
|
875
1817
|
SECRET_LABEL_KEY_RE = (
|
|
876
1818
|
r"[A-Za-z0-9_.-]*(?:"
|
|
877
1819
|
r"api[-_]?key|apikey|token|secret|password|passwd|pwd|client[-_]?secret|"
|
|
@@ -1059,22 +2001,21 @@ def read_self_hosted_payload(args: argparse.Namespace) -> tuple[Any, dict[str, A
|
|
|
1059
2001
|
path = Path(args.input)
|
|
1060
2002
|
source_label = source_label or sanitize_self_hosted_text(path)
|
|
1061
2003
|
try:
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
if exc.errno is not None:
|
|
1068
|
-
detail = f"{detail} (errno {exc.errno})"
|
|
1069
|
-
raise RegistryError(f"could not read self-hosted metrics input: {safe_path}: {detail}") from exc
|
|
2004
|
+
loaded = read_bounded_regular_file(path, max_bytes=MAX_SELF_HOSTED_METRICS_INPUT_BYTES, label=f"self-hosted metrics input: {source_label}")
|
|
2005
|
+
except RegistryError as exc:
|
|
2006
|
+
raise RegistryError(f"could not read self-hosted metrics input: {source_label}: {exc}") from exc
|
|
2007
|
+
assert loaded is not None
|
|
2008
|
+
raw, loaded_truncated = loaded
|
|
1070
2009
|
else:
|
|
1071
2010
|
source_label = source_label or "stdin"
|
|
1072
2011
|
raw = sys.stdin.buffer.read(MAX_SELF_HOSTED_METRICS_INPUT_BYTES + 1)
|
|
1073
|
-
|
|
2012
|
+
loaded_truncated = len(raw) > MAX_SELF_HOSTED_METRICS_INPUT_BYTES
|
|
2013
|
+
raw = raw[:MAX_SELF_HOSTED_METRICS_INPUT_BYTES]
|
|
2014
|
+
if loaded_truncated:
|
|
1074
2015
|
return None, {
|
|
1075
2016
|
"source_label": source_label,
|
|
1076
2017
|
"bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
|
|
1077
|
-
"sha256": hashlib.sha256(raw
|
|
2018
|
+
"sha256": hashlib.sha256(raw).hexdigest(),
|
|
1078
2019
|
"truncated": True,
|
|
1079
2020
|
"max_bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
|
|
1080
2021
|
"envelope_source": None,
|
|
@@ -1129,6 +2070,70 @@ def select_self_hosted_envelope(payload: Any) -> tuple[Any, str | None, list[str
|
|
|
1129
2070
|
return None, None, ignored
|
|
1130
2071
|
|
|
1131
2072
|
|
|
2073
|
+
def parse_optional_success(value: str | None) -> bool | None:
|
|
2074
|
+
if value is None or value == "unknown":
|
|
2075
|
+
return None
|
|
2076
|
+
return value == "true"
|
|
2077
|
+
|
|
2078
|
+
|
|
2079
|
+
def self_hosted_metrics_ledger_row(
|
|
2080
|
+
sidecar: dict[str, Any],
|
|
2081
|
+
*,
|
|
2082
|
+
task_id: str = "self-hosted-metrics-manual",
|
|
2083
|
+
variant: str = "self-hosted-metrics-ledger",
|
|
2084
|
+
success: bool | None = None,
|
|
2085
|
+
notes: str = "explicit self-hosted metrics record; no hosted API savings claim",
|
|
2086
|
+
claude_version: str = "manual",
|
|
2087
|
+
wall_time_seconds: float = 0.0,
|
|
2088
|
+
) -> dict[str, Any]:
|
|
2089
|
+
return {
|
|
2090
|
+
"schema_version": BENCH_RUN_EVIDENCE_SCHEMA_VERSION,
|
|
2091
|
+
"date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
2092
|
+
"claude_version": sanitize_self_hosted_text(claude_version) or "manual",
|
|
2093
|
+
"task_id": sanitize_self_hosted_text(task_id) or "self-hosted-metrics-manual",
|
|
2094
|
+
"variant": sanitize_self_hosted_text(variant) or "self-hosted-metrics-ledger",
|
|
2095
|
+
"transform_id": "self-hosted-metrics-ledger",
|
|
2096
|
+
"success": success,
|
|
2097
|
+
"primary_tokens_measured": False,
|
|
2098
|
+
"primary_tokens": 0,
|
|
2099
|
+
"primary_cost_measured": False,
|
|
2100
|
+
"primary_cost_usd": 0.0,
|
|
2101
|
+
"provider_cached_tokens": None,
|
|
2102
|
+
"provider_cached_tokens_measured": False,
|
|
2103
|
+
"wall_time_seconds": wall_time_seconds,
|
|
2104
|
+
"external_tokens_measured": False,
|
|
2105
|
+
"external_tokens": 0,
|
|
2106
|
+
"external_cost_measured": False,
|
|
2107
|
+
"external_cost_usd": 0.0,
|
|
2108
|
+
"total_cost_with_shift_usd": None,
|
|
2109
|
+
"artifacts_used": 0,
|
|
2110
|
+
"bytes_before": 0,
|
|
2111
|
+
"bytes_after": 0,
|
|
2112
|
+
"hook_triggers": 0,
|
|
2113
|
+
"turns": 0,
|
|
2114
|
+
"notes": sanitize_self_hosted_text(notes)
|
|
2115
|
+
or "explicit self-hosted metrics record; no hosted API savings claim",
|
|
2116
|
+
"measurement_availability": {
|
|
2117
|
+
"primary_tokens": False,
|
|
2118
|
+
"primary_cost": False,
|
|
2119
|
+
"external_tokens": False,
|
|
2120
|
+
"external_cost": False,
|
|
2121
|
+
"shifted_cost": False,
|
|
2122
|
+
"provider_cache": False,
|
|
2123
|
+
"byte_metrics": False,
|
|
2124
|
+
"wall_time": False,
|
|
2125
|
+
"self_hosted_metrics": True,
|
|
2126
|
+
},
|
|
2127
|
+
"self_hosted_metrics": sidecar,
|
|
2128
|
+
"proxy_metrics": {
|
|
2129
|
+
"byte_metrics_observed": False,
|
|
2130
|
+
"token_proxy": "chars_div_4",
|
|
2131
|
+
"bytes_per_token": TOKEN_PROXY_BYTES_PER_TOKEN,
|
|
2132
|
+
"claim_boundary": "proxy_only_not_hosted_token_savings",
|
|
2133
|
+
},
|
|
2134
|
+
}
|
|
2135
|
+
|
|
2136
|
+
|
|
1132
2137
|
def self_hosted_metrics_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
1133
2138
|
cli_metrics = cli_self_hosted_metrics(args)
|
|
1134
2139
|
if cli_metrics:
|
|
@@ -1188,51 +2193,12 @@ def self_hosted_metrics_plan_payload(args: argparse.Namespace) -> dict[str, Any]
|
|
|
1188
2193
|
ready = not blockers
|
|
1189
2194
|
ledger_preview = None
|
|
1190
2195
|
if sidecar is not None:
|
|
1191
|
-
ledger_preview =
|
|
1192
|
-
|
|
1193
|
-
"
|
|
1194
|
-
"
|
|
1195
|
-
"
|
|
1196
|
-
|
|
1197
|
-
"transform_id": "self-hosted-metrics-ledger",
|
|
1198
|
-
"success": None,
|
|
1199
|
-
"primary_tokens_measured": False,
|
|
1200
|
-
"primary_tokens": 0,
|
|
1201
|
-
"primary_cost_measured": False,
|
|
1202
|
-
"primary_cost_usd": 0.0,
|
|
1203
|
-
"provider_cached_tokens": None,
|
|
1204
|
-
"provider_cached_tokens_measured": False,
|
|
1205
|
-
"wall_time_seconds": 0.0,
|
|
1206
|
-
"external_tokens_measured": False,
|
|
1207
|
-
"external_tokens": 0,
|
|
1208
|
-
"external_cost_measured": False,
|
|
1209
|
-
"external_cost_usd": 0.0,
|
|
1210
|
-
"total_cost_with_shift_usd": None,
|
|
1211
|
-
"artifacts_used": 0,
|
|
1212
|
-
"bytes_before": 0,
|
|
1213
|
-
"bytes_after": 0,
|
|
1214
|
-
"hook_triggers": 0,
|
|
1215
|
-
"turns": 0,
|
|
1216
|
-
"notes": "dry-run preview; no ledger file written",
|
|
1217
|
-
"measurement_availability": {
|
|
1218
|
-
"primary_tokens": False,
|
|
1219
|
-
"primary_cost": False,
|
|
1220
|
-
"external_tokens": False,
|
|
1221
|
-
"external_cost": False,
|
|
1222
|
-
"shifted_cost": False,
|
|
1223
|
-
"provider_cache": False,
|
|
1224
|
-
"byte_metrics": False,
|
|
1225
|
-
"wall_time": False,
|
|
1226
|
-
"self_hosted_metrics": True,
|
|
1227
|
-
},
|
|
1228
|
-
"self_hosted_metrics": sidecar,
|
|
1229
|
-
"proxy_metrics": {
|
|
1230
|
-
"byte_metrics_observed": False,
|
|
1231
|
-
"token_proxy": "chars_div_4",
|
|
1232
|
-
"bytes_per_token": TOKEN_PROXY_BYTES_PER_TOKEN,
|
|
1233
|
-
"claim_boundary": "proxy_only_not_hosted_token_savings",
|
|
1234
|
-
},
|
|
1235
|
-
}
|
|
2196
|
+
ledger_preview = self_hosted_metrics_ledger_row(
|
|
2197
|
+
sidecar,
|
|
2198
|
+
task_id="self-hosted-metrics-dry-run",
|
|
2199
|
+
notes="dry-run preview; no ledger file written",
|
|
2200
|
+
claude_version="dry-run",
|
|
2201
|
+
)
|
|
1236
2202
|
return {
|
|
1237
2203
|
"tool": TOOL_NAME,
|
|
1238
2204
|
"schema_version": CONFIG_SCHEMA_VERSION,
|
|
@@ -1278,6 +2244,65 @@ def command_plan_self_hosted_metrics_ledger(args: argparse.Namespace) -> int:
|
|
|
1278
2244
|
return 0
|
|
1279
2245
|
|
|
1280
2246
|
|
|
2247
|
+
def self_hosted_metrics_record_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
2248
|
+
payload = self_hosted_metrics_plan_payload(args)
|
|
2249
|
+
payload["mode"] = "record"
|
|
2250
|
+
payload["claim_boundary"] = (
|
|
2251
|
+
"Explicit local self-hosted metrics ledger record only; local/model-server metrics are diagnostic sidecars "
|
|
2252
|
+
"and are not hosted API token or cost savings evidence."
|
|
2253
|
+
)
|
|
2254
|
+
payload["policy"]["ledger_write_performed"] = False
|
|
2255
|
+
payload["policy"]["stable_runtime_behavior_changed"] = False
|
|
2256
|
+
payload["ledger_record"] = None
|
|
2257
|
+
payload["ledger_jsonl"] = {
|
|
2258
|
+
"path": sanitize_self_hosted_text(args.ledger_jsonl),
|
|
2259
|
+
"write_performed": False,
|
|
2260
|
+
"bytes_written": 0,
|
|
2261
|
+
}
|
|
2262
|
+
if payload["self_hosted_metrics"] is None or payload["review_plan"]["readiness_blockers"]:
|
|
2263
|
+
payload["status"] = "blocked_until_metrics"
|
|
2264
|
+
return payload
|
|
2265
|
+
|
|
2266
|
+
row = self_hosted_metrics_ledger_row(
|
|
2267
|
+
payload["self_hosted_metrics"],
|
|
2268
|
+
task_id=args.task_id,
|
|
2269
|
+
variant=args.variant,
|
|
2270
|
+
success=parse_optional_success(args.success),
|
|
2271
|
+
notes=args.notes,
|
|
2272
|
+
claude_version="manual",
|
|
2273
|
+
)
|
|
2274
|
+
bytes_written = append_jsonl_no_follow(Path(args.ledger_jsonl), row, label="self-hosted metrics ledger")
|
|
2275
|
+
payload["status"] = "recorded"
|
|
2276
|
+
payload["ledger_preview"] = row
|
|
2277
|
+
payload["ledger_record"] = row
|
|
2278
|
+
payload["policy"]["ledger_write_performed"] = True
|
|
2279
|
+
payload["ledger_jsonl"]["write_performed"] = True
|
|
2280
|
+
payload["ledger_jsonl"]["bytes_written"] = bytes_written
|
|
2281
|
+
payload["review_plan"]["next_steps"] = [
|
|
2282
|
+
"Use this JSONL row only as self-hosted/local diagnostic evidence.",
|
|
2283
|
+
"Keep hosted API token/cost savings claims behind provider-measured matched successful tasks.",
|
|
2284
|
+
"Compare this sidecar with benchmark rows only through explicit shifted-cost accounting.",
|
|
2285
|
+
]
|
|
2286
|
+
return payload
|
|
2287
|
+
|
|
2288
|
+
|
|
2289
|
+
def command_record_self_hosted_metrics_ledger(args: argparse.Namespace) -> int:
|
|
2290
|
+
payload = self_hosted_metrics_record_payload(args)
|
|
2291
|
+
if args.json:
|
|
2292
|
+
emit_json(payload)
|
|
2293
|
+
else:
|
|
2294
|
+
if payload["status"] == "recorded":
|
|
2295
|
+
print("ContextGuard self-hosted metrics ledger record written")
|
|
2296
|
+
print(f"Ledger: {payload['ledger_jsonl']['path']} bytes={payload['ledger_jsonl']['bytes_written']}")
|
|
2297
|
+
else:
|
|
2298
|
+
print("ContextGuard self-hosted metrics ledger record blocked")
|
|
2299
|
+
print(f"Status: {payload['status']}")
|
|
2300
|
+
if payload["review_plan"]["readiness_blockers"]:
|
|
2301
|
+
print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
|
|
2302
|
+
print(payload["claim_boundary"])
|
|
2303
|
+
return 0
|
|
2304
|
+
|
|
2305
|
+
|
|
1281
2306
|
def sanitize_local_proxy_value(value: Any) -> str:
|
|
1282
2307
|
return sanitize_self_hosted_text(value)
|
|
1283
2308
|
|
|
@@ -1288,6 +2313,192 @@ def local_proxy_secret_like(value: Any) -> bool:
|
|
|
1288
2313
|
return "[REDACTED]" in sanitize_local_proxy_value(value)
|
|
1289
2314
|
|
|
1290
2315
|
|
|
2316
|
+
def local_proxy_bytes_secret_like(value: bytes) -> bool:
|
|
2317
|
+
return local_proxy_secret_like(value.decode("utf-8", errors="replace"))
|
|
2318
|
+
|
|
2319
|
+
|
|
2320
|
+
def local_proxy_request_target_meta(value: Any) -> dict[str, Any]:
|
|
2321
|
+
text = "" if value is None else str(value)
|
|
2322
|
+
raw = text.encode("utf-8", errors="replace")
|
|
2323
|
+
return {
|
|
2324
|
+
"request_target_sha256": hashlib.sha256(raw).hexdigest(),
|
|
2325
|
+
"request_target_bytes": len(raw),
|
|
2326
|
+
}
|
|
2327
|
+
|
|
2328
|
+
|
|
2329
|
+
def normalize_external_allow_host(value: Any) -> tuple[str, list[str]]:
|
|
2330
|
+
raw = "" if value is None else str(value).strip()
|
|
2331
|
+
sanitized = sanitize_local_proxy_value(raw)
|
|
2332
|
+
blockers: list[str] = []
|
|
2333
|
+
host = raw.strip().strip("[]").lower().rstrip(".")
|
|
2334
|
+
if not host:
|
|
2335
|
+
return sanitized, ["invalid_external_allow_host"]
|
|
2336
|
+
if "[REDACTED]" in sanitized:
|
|
2337
|
+
blockers.append("secret_like_external_forwarding_design_metadata")
|
|
2338
|
+
if any(ch in host for ch in ("*", "/", "\\", "@", ":", " ")) or len(host) > 253:
|
|
2339
|
+
blockers.append("invalid_external_allow_host")
|
|
2340
|
+
elif is_localhost_host(host):
|
|
2341
|
+
blockers.append("localhost_external_allow_host_not_allowed")
|
|
2342
|
+
else:
|
|
2343
|
+
try:
|
|
2344
|
+
ip = ipaddress.ip_address(host)
|
|
2345
|
+
except ValueError:
|
|
2346
|
+
labels = host.split(".")
|
|
2347
|
+
label_re = re.compile(r"^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$")
|
|
2348
|
+
if len(labels) < 2 or any(not label_re.fullmatch(label) for label in labels):
|
|
2349
|
+
blockers.append("invalid_external_allow_host")
|
|
2350
|
+
else:
|
|
2351
|
+
if not ip.is_global:
|
|
2352
|
+
blockers.append("non_global_external_allow_host_not_allowed")
|
|
2353
|
+
return sanitized, blockers
|
|
2354
|
+
|
|
2355
|
+
|
|
2356
|
+
def local_proxy_external_forwarding_design_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
2357
|
+
intent = bool(args.external_forwarding_intent)
|
|
2358
|
+
design_ack = bool(args.external_forwarding_design_ack)
|
|
2359
|
+
raw_hosts = args.allow_host or []
|
|
2360
|
+
raw_schemes = args.allow_scheme or []
|
|
2361
|
+
raw_notes = args.threat_model_note or []
|
|
2362
|
+
redaction_policy = sanitize_local_proxy_value(args.credential_redaction_policy)
|
|
2363
|
+
provider_boundary = sanitize_local_proxy_value(args.provider_evidence_boundary)
|
|
2364
|
+
|
|
2365
|
+
blockers: list[str] = []
|
|
2366
|
+
if not intent:
|
|
2367
|
+
blockers.append("missing_external_forwarding_intent")
|
|
2368
|
+
if not design_ack:
|
|
2369
|
+
blockers.append("missing_external_forwarding_design_ack")
|
|
2370
|
+
|
|
2371
|
+
hosts: list[str] = []
|
|
2372
|
+
if not raw_hosts:
|
|
2373
|
+
blockers.append("missing_external_allow_host")
|
|
2374
|
+
for raw_host in raw_hosts:
|
|
2375
|
+
host, host_blockers = normalize_external_allow_host(raw_host)
|
|
2376
|
+
if host:
|
|
2377
|
+
hosts.append(host)
|
|
2378
|
+
blockers.extend(host_blockers)
|
|
2379
|
+
hosts = sorted(set(hosts))
|
|
2380
|
+
|
|
2381
|
+
schemes = sorted(set(sanitize_local_proxy_value(str(value).strip().lower()) for value in raw_schemes if str(value).strip()))
|
|
2382
|
+
if not schemes:
|
|
2383
|
+
blockers.append("missing_external_allow_scheme")
|
|
2384
|
+
for scheme in schemes:
|
|
2385
|
+
if "[REDACTED]" in scheme:
|
|
2386
|
+
blockers.append("secret_like_external_forwarding_design_metadata")
|
|
2387
|
+
elif scheme not in LOCAL_PROXY_EXTERNAL_ALLOWED_SCHEMES:
|
|
2388
|
+
blockers.append("https_only_external_allow_scheme_required")
|
|
2389
|
+
|
|
2390
|
+
threat_model_notes = [sanitize_local_proxy_value(note) for note in clean_values(raw_notes)]
|
|
2391
|
+
if not threat_model_notes:
|
|
2392
|
+
blockers.append("missing_threat_model_note")
|
|
2393
|
+
if any(local_proxy_secret_like(note) for note in raw_notes):
|
|
2394
|
+
blockers.append("secret_like_external_forwarding_design_metadata")
|
|
2395
|
+
|
|
2396
|
+
if not redaction_policy:
|
|
2397
|
+
blockers.append("missing_credential_redaction_policy")
|
|
2398
|
+
elif redaction_policy != LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY:
|
|
2399
|
+
blockers.append("unsupported_credential_redaction_policy")
|
|
2400
|
+
if not provider_boundary:
|
|
2401
|
+
blockers.append("missing_provider_evidence_boundary")
|
|
2402
|
+
elif provider_boundary != LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY:
|
|
2403
|
+
blockers.append("unsupported_provider_evidence_boundary")
|
|
2404
|
+
if local_proxy_secret_like(redaction_policy) or local_proxy_secret_like(provider_boundary):
|
|
2405
|
+
blockers.append("secret_like_external_forwarding_design_metadata")
|
|
2406
|
+
|
|
2407
|
+
blockers = list(dict.fromkeys(blockers))
|
|
2408
|
+
ready = not blockers
|
|
2409
|
+
return {
|
|
2410
|
+
"tool": TOOL_NAME,
|
|
2411
|
+
"schema_version": LOCAL_PROXY_EXTERNAL_DESIGN_SCHEMA_VERSION,
|
|
2412
|
+
"experiment_id": "local-proxy",
|
|
2413
|
+
"mode": "external_forwarding_design",
|
|
2414
|
+
"status": "ready_for_external_forwarding_design_review" if ready else "blocked_until_external_forwarding_design_constraints",
|
|
2415
|
+
"policy": {
|
|
2416
|
+
"default_off": True,
|
|
2417
|
+
"design_only": True,
|
|
2418
|
+
"external_forwarding_runtime_implemented": False,
|
|
2419
|
+
"external_forwarding_allowed": False,
|
|
2420
|
+
"hidden_external_forwarding": False,
|
|
2421
|
+
"api_key_persistence_allowed": False,
|
|
2422
|
+
"credential_material_forwarded": False,
|
|
2423
|
+
"stable_runtime_behavior_changed": False,
|
|
2424
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
2425
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
2426
|
+
},
|
|
2427
|
+
"network_actions": {
|
|
2428
|
+
"listener_started": False,
|
|
2429
|
+
"outbound_forwarding_attempted": False,
|
|
2430
|
+
"dns_lookup_attempted": False,
|
|
2431
|
+
"external_services_called": False,
|
|
2432
|
+
},
|
|
2433
|
+
"external_forwarding_design": {
|
|
2434
|
+
"intent_acknowledged": intent,
|
|
2435
|
+
"design_acknowledged": design_ack,
|
|
2436
|
+
"allowlist_required": True,
|
|
2437
|
+
"allowlist": {
|
|
2438
|
+
"hosts": hosts,
|
|
2439
|
+
"schemes": schemes,
|
|
2440
|
+
"wildcards_allowed": False,
|
|
2441
|
+
"localhost_allowed": False,
|
|
2442
|
+
"non_global_ip_allowed": False,
|
|
2443
|
+
},
|
|
2444
|
+
"credential_redaction": {
|
|
2445
|
+
"policy": redaction_policy,
|
|
2446
|
+
"required_policy": LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY,
|
|
2447
|
+
"blocked_header_names": sorted(LOCAL_PROXY_SENSITIVE_HEADER_NAMES),
|
|
2448
|
+
"raw_headers_persisted": False,
|
|
2449
|
+
"request_bodies_persisted": False,
|
|
2450
|
+
"response_bodies_persisted": False,
|
|
2451
|
+
},
|
|
2452
|
+
"threat_model": {
|
|
2453
|
+
"required": True,
|
|
2454
|
+
"notes": threat_model_notes,
|
|
2455
|
+
"future_review_required": True,
|
|
2456
|
+
},
|
|
2457
|
+
"provider_evidence_boundary": {
|
|
2458
|
+
"policy": provider_boundary,
|
|
2459
|
+
"required_policy": LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY,
|
|
2460
|
+
"diagnostic_only": True,
|
|
2461
|
+
"provider_measured_matched_tasks_required_for_hosted_claims": True,
|
|
2462
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
2463
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
2464
|
+
},
|
|
2465
|
+
"future_runtime_requirements": [
|
|
2466
|
+
"separate future runtime gate and review",
|
|
2467
|
+
"explicit host/scheme allowlist enforcement before any network connection",
|
|
2468
|
+
"credential-bearing requests blocked or stripped before forwarding",
|
|
2469
|
+
"no CONNECT/TLS interception without a separate reviewed gate",
|
|
2470
|
+
"diagnostic shifted-cost accounting only unless provider-measured matched-task evidence exists",
|
|
2471
|
+
],
|
|
2472
|
+
},
|
|
2473
|
+
"review_plan": {
|
|
2474
|
+
"readiness_blockers": blockers,
|
|
2475
|
+
"next_steps": [
|
|
2476
|
+
"Treat this as design evidence only; do not forward external traffic from this command.",
|
|
2477
|
+
"Keep existing local-proxy serve runtime literal-loopback-only.",
|
|
2478
|
+
"Require a separate future runtime gate before any external forwarding implementation.",
|
|
2479
|
+
],
|
|
2480
|
+
},
|
|
2481
|
+
"claim_boundary": (
|
|
2482
|
+
"Dry-run external forwarding design gate only; no listener, DNS lookup, external service call, credential "
|
|
2483
|
+
"persistence, traffic forwarding, or hosted API token/cost savings claim is performed."
|
|
2484
|
+
),
|
|
2485
|
+
}
|
|
2486
|
+
|
|
2487
|
+
|
|
2488
|
+
def command_plan_local_proxy_external_forwarding(args: argparse.Namespace) -> int:
|
|
2489
|
+
payload = local_proxy_external_forwarding_design_payload(args)
|
|
2490
|
+
if args.json:
|
|
2491
|
+
emit_json(payload)
|
|
2492
|
+
else:
|
|
2493
|
+
print("ContextGuard local proxy external-forwarding design gate (dry-run only)")
|
|
2494
|
+
print("No listener was started, no traffic was forwarded, no DNS lookup was performed, and no API key was persisted.")
|
|
2495
|
+
print(f"Status: {payload['status']}")
|
|
2496
|
+
if payload["review_plan"]["readiness_blockers"]:
|
|
2497
|
+
print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
|
|
2498
|
+
print(payload["claim_boundary"])
|
|
2499
|
+
return 0
|
|
2500
|
+
|
|
2501
|
+
|
|
1291
2502
|
def is_localhost_host(value: Any) -> bool:
|
|
1292
2503
|
if not isinstance(value, str):
|
|
1293
2504
|
return False
|
|
@@ -1300,6 +2511,16 @@ def is_localhost_host(value: Any) -> bool:
|
|
|
1300
2511
|
return False
|
|
1301
2512
|
|
|
1302
2513
|
|
|
2514
|
+
def is_loopback_ip_literal(value: Any) -> bool:
|
|
2515
|
+
if not isinstance(value, str):
|
|
2516
|
+
return False
|
|
2517
|
+
host = value.strip().strip("[]").lower().rstrip(".")
|
|
2518
|
+
try:
|
|
2519
|
+
return ipaddress.ip_address(host).is_loopback
|
|
2520
|
+
except ValueError:
|
|
2521
|
+
return False
|
|
2522
|
+
|
|
2523
|
+
|
|
1303
2524
|
def normalize_local_proxy_host(value: Any, *, default: str) -> tuple[str, bool, bool]:
|
|
1304
2525
|
if value is None or str(value).strip() == "":
|
|
1305
2526
|
host = default
|
|
@@ -1321,6 +2542,30 @@ def normalize_local_proxy_port(value: Any, *, default: int) -> tuple[int, bool]:
|
|
|
1321
2542
|
return port, 0 <= port <= 65535
|
|
1322
2543
|
|
|
1323
2544
|
|
|
2545
|
+
def normalize_local_proxy_int_limit(value: Any, *, default: int, maximum: int) -> tuple[int, bool]:
|
|
2546
|
+
if value is None or value == "":
|
|
2547
|
+
return default, True
|
|
2548
|
+
if isinstance(value, bool):
|
|
2549
|
+
return default, False
|
|
2550
|
+
try:
|
|
2551
|
+
parsed = int(value)
|
|
2552
|
+
except (TypeError, ValueError):
|
|
2553
|
+
return default, False
|
|
2554
|
+
return parsed, 1 <= parsed <= maximum
|
|
2555
|
+
|
|
2556
|
+
|
|
2557
|
+
def normalize_local_proxy_timeout(value: Any) -> tuple[float, bool]:
|
|
2558
|
+
if value is None or value == "":
|
|
2559
|
+
return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, True
|
|
2560
|
+
if isinstance(value, bool):
|
|
2561
|
+
return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, False
|
|
2562
|
+
try:
|
|
2563
|
+
parsed = float(value)
|
|
2564
|
+
except (TypeError, ValueError):
|
|
2565
|
+
return LOCAL_PROXY_DEFAULT_TIMEOUT_SECONDS, False
|
|
2566
|
+
return parsed, 0.1 <= parsed <= LOCAL_PROXY_MAX_TIMEOUT_SECONDS
|
|
2567
|
+
|
|
2568
|
+
|
|
1324
2569
|
def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
1325
2570
|
if not args.input:
|
|
1326
2571
|
return {}, {
|
|
@@ -1333,18 +2578,16 @@ def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any],
|
|
|
1333
2578
|
path = Path(args.input)
|
|
1334
2579
|
safe_path = sanitize_local_proxy_value(path)
|
|
1335
2580
|
try:
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
raise RegistryError(f"could not read local-proxy input: {safe_path}: {detail}") from exc
|
|
1343
|
-
if len(raw) > MAX_SELF_HOSTED_METRICS_INPUT_BYTES:
|
|
2581
|
+
loaded = read_bounded_regular_file(path, max_bytes=MAX_SELF_HOSTED_METRICS_INPUT_BYTES, label=f"local-proxy input: {safe_path}")
|
|
2582
|
+
except RegistryError as exc:
|
|
2583
|
+
raise RegistryError(f"could not read local-proxy input: {safe_path}: {exc}") from exc
|
|
2584
|
+
assert loaded is not None
|
|
2585
|
+
raw, loaded_truncated = loaded
|
|
2586
|
+
if loaded_truncated:
|
|
1344
2587
|
return {}, {
|
|
1345
2588
|
"source_label": safe_path,
|
|
1346
2589
|
"bytes": MAX_SELF_HOSTED_METRICS_INPUT_BYTES,
|
|
1347
|
-
"sha256": hashlib.sha256(raw
|
|
2590
|
+
"sha256": hashlib.sha256(raw).hexdigest(),
|
|
1348
2591
|
"truncated": True,
|
|
1349
2592
|
"ignored_keys": [],
|
|
1350
2593
|
}
|
|
@@ -1393,6 +2636,12 @@ def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any],
|
|
|
1393
2636
|
"persist_api_key",
|
|
1394
2637
|
"external_forwarding_intent",
|
|
1395
2638
|
"runtime_gate_ack",
|
|
2639
|
+
"forwarding_gate_ack",
|
|
2640
|
+
"once",
|
|
2641
|
+
"max_request_bytes",
|
|
2642
|
+
"max_response_bytes",
|
|
2643
|
+
"timeout_seconds",
|
|
2644
|
+
"diagnostic_ledger_jsonl",
|
|
1396
2645
|
}
|
|
1397
2646
|
ignored.extend(sanitize_self_hosted_ignored_key(key) for key in envelope if key not in allowed)
|
|
1398
2647
|
return dict(envelope), {
|
|
@@ -1405,14 +2654,34 @@ def read_local_proxy_payload(args: argparse.Namespace) -> tuple[dict[str, Any],
|
|
|
1405
2654
|
|
|
1406
2655
|
|
|
1407
2656
|
def coalesce_local_proxy_value(args: argparse.Namespace, payload: dict[str, Any], attr: str, key: str) -> Any:
|
|
1408
|
-
value = getattr(args, attr)
|
|
2657
|
+
value = getattr(args, attr, None)
|
|
1409
2658
|
return value if value is not None else payload.get(key)
|
|
1410
2659
|
|
|
1411
2660
|
|
|
1412
|
-
def
|
|
1413
|
-
if
|
|
1414
|
-
return True
|
|
1415
|
-
|
|
2661
|
+
def parse_local_proxy_json_bool(value: Any) -> tuple[bool, bool]:
|
|
2662
|
+
if value is None:
|
|
2663
|
+
return False, True
|
|
2664
|
+
if isinstance(value, bool):
|
|
2665
|
+
return value, True
|
|
2666
|
+
if isinstance(value, str):
|
|
2667
|
+
normalized = value.strip().lower()
|
|
2668
|
+
if normalized in LOCAL_PROXY_TRUE_VALUES:
|
|
2669
|
+
return True, True
|
|
2670
|
+
if normalized in LOCAL_PROXY_FALSE_VALUES:
|
|
2671
|
+
return False, True
|
|
2672
|
+
return False, False
|
|
2673
|
+
if isinstance(value, int) and not isinstance(value, bool):
|
|
2674
|
+
if value == 1:
|
|
2675
|
+
return True, True
|
|
2676
|
+
if value == 0:
|
|
2677
|
+
return False, True
|
|
2678
|
+
return False, False
|
|
2679
|
+
|
|
2680
|
+
|
|
2681
|
+
def coalesce_local_proxy_bool(args: argparse.Namespace, payload: dict[str, Any], attr: str, key: str) -> tuple[bool, bool]:
|
|
2682
|
+
if getattr(args, attr, False):
|
|
2683
|
+
return True, True
|
|
2684
|
+
return parse_local_proxy_json_bool(payload.get(key))
|
|
1416
2685
|
|
|
1417
2686
|
|
|
1418
2687
|
def local_proxy_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
@@ -1426,14 +2695,24 @@ def local_proxy_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
|
1426
2695
|
proxy_label_raw = coalesce_local_proxy_value(args, input_payload, "proxy_label", "proxy_label")
|
|
1427
2696
|
api_key_raw = coalesce_local_proxy_value(args, input_payload, "api_key", "api_key")
|
|
1428
2697
|
authorization_raw = coalesce_local_proxy_value(args, input_payload, "authorization_header", "authorization_header")
|
|
1429
|
-
persist_api_key = coalesce_local_proxy_bool(
|
|
1430
|
-
|
|
2698
|
+
persist_api_key, persist_api_key_valid = coalesce_local_proxy_bool(
|
|
2699
|
+
args,
|
|
2700
|
+
input_payload,
|
|
2701
|
+
"persist_api_key",
|
|
2702
|
+
"persist_api_key",
|
|
2703
|
+
)
|
|
2704
|
+
external_forwarding_intent, external_forwarding_intent_valid = coalesce_local_proxy_bool(
|
|
1431
2705
|
args,
|
|
1432
2706
|
input_payload,
|
|
1433
2707
|
"external_forwarding_intent",
|
|
1434
2708
|
"external_forwarding_intent",
|
|
1435
2709
|
)
|
|
1436
|
-
runtime_gate_ack = coalesce_local_proxy_bool(
|
|
2710
|
+
runtime_gate_ack, runtime_gate_ack_valid = coalesce_local_proxy_bool(
|
|
2711
|
+
args,
|
|
2712
|
+
input_payload,
|
|
2713
|
+
"runtime_gate_ack",
|
|
2714
|
+
"runtime_gate_ack",
|
|
2715
|
+
)
|
|
1437
2716
|
|
|
1438
2717
|
upstream_url = sanitize_local_proxy_value(upstream_url_raw) if upstream_url_raw else None
|
|
1439
2718
|
upstream_host = None
|
|
@@ -1502,6 +2781,12 @@ def local_proxy_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
|
1502
2781
|
blockers: list[str] = []
|
|
1503
2782
|
if input_meta["truncated"]:
|
|
1504
2783
|
blockers.append("input_truncated")
|
|
2784
|
+
if not persist_api_key_valid:
|
|
2785
|
+
blockers.append("invalid_persist_api_key")
|
|
2786
|
+
if not external_forwarding_intent_valid:
|
|
2787
|
+
blockers.append("invalid_external_forwarding_intent")
|
|
2788
|
+
if not runtime_gate_ack_valid:
|
|
2789
|
+
blockers.append("invalid_runtime_gate_ack")
|
|
1505
2790
|
if not bind_port_valid:
|
|
1506
2791
|
blockers.append("invalid_bind_port")
|
|
1507
2792
|
if not target_port_valid:
|
|
@@ -1617,6 +2902,610 @@ def command_plan_local_proxy(args: argparse.Namespace) -> int:
|
|
|
1617
2902
|
return 0
|
|
1618
2903
|
|
|
1619
2904
|
|
|
2905
|
+
def local_proxy_gate_row(payload: dict[str, Any]) -> dict[str, Any]:
|
|
2906
|
+
return {
|
|
2907
|
+
"schema_version": LOCAL_PROXY_GATE_SCHEMA_VERSION,
|
|
2908
|
+
"date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
2909
|
+
"experiment_id": "local-proxy",
|
|
2910
|
+
"proxy_label": payload["ledger_preview"]["proxy_label"],
|
|
2911
|
+
"bind": payload["bind"],
|
|
2912
|
+
"target": payload["target"],
|
|
2913
|
+
"policy": {
|
|
2914
|
+
"localhost_only": True,
|
|
2915
|
+
"runtime_gate_acknowledged": payload["policy"]["runtime_gate_acknowledged"],
|
|
2916
|
+
"listener_started": False,
|
|
2917
|
+
"traffic_forwarded": False,
|
|
2918
|
+
"dns_lookup_attempted": False,
|
|
2919
|
+
"api_key_persisted": False,
|
|
2920
|
+
"hidden_external_forwarding": False,
|
|
2921
|
+
},
|
|
2922
|
+
"network_actions": payload["network_actions"],
|
|
2923
|
+
"api_key_persistence": payload["api_key_persistence"],
|
|
2924
|
+
"forwarding": payload["forwarding"],
|
|
2925
|
+
"claim_boundary": {
|
|
2926
|
+
"id": "local_proxy_runtime_gate_not_hosted_savings",
|
|
2927
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
2928
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
2929
|
+
"requires_provider_measured_matched_tasks_for_hosted_claims": True,
|
|
2930
|
+
"reason": "This row records a local proxy runtime gate only; it starts no listener and forwards no traffic.",
|
|
2931
|
+
},
|
|
2932
|
+
"shifted_cost_accounting_required": True,
|
|
2933
|
+
}
|
|
2934
|
+
|
|
2935
|
+
|
|
2936
|
+
def local_proxy_record_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
2937
|
+
payload = local_proxy_plan_payload(args)
|
|
2938
|
+
payload["mode"] = "record"
|
|
2939
|
+
payload["claim_boundary"] = (
|
|
2940
|
+
"Explicit local proxy runtime-gate record only; no listener, forwarding, DNS lookup, API-key persistence, "
|
|
2941
|
+
"external service call, or hosted API token/cost savings claim is performed."
|
|
2942
|
+
)
|
|
2943
|
+
payload["policy"] = dict(payload["policy"])
|
|
2944
|
+
payload["policy"].update({
|
|
2945
|
+
"dry_run_only": False,
|
|
2946
|
+
"runtime_gate_record_only": True,
|
|
2947
|
+
"runtime_gate_recorded": False,
|
|
2948
|
+
"listener_started": False,
|
|
2949
|
+
"traffic_forwarded": False,
|
|
2950
|
+
"stable_runtime_behavior_changed": False,
|
|
2951
|
+
})
|
|
2952
|
+
payload["ledger_record"] = None
|
|
2953
|
+
payload["ledger_jsonl"] = {
|
|
2954
|
+
"path": sanitize_local_proxy_value(args.ledger_jsonl),
|
|
2955
|
+
"write_performed": False,
|
|
2956
|
+
"bytes_written": 0,
|
|
2957
|
+
}
|
|
2958
|
+
blockers = list(payload["review_plan"]["readiness_blockers"])
|
|
2959
|
+
if not payload["policy"]["runtime_gate_acknowledged"]:
|
|
2960
|
+
blockers.append("missing_runtime_gate_ack")
|
|
2961
|
+
blockers = list(dict.fromkeys(blockers))
|
|
2962
|
+
payload["review_plan"]["readiness_blockers"] = blockers
|
|
2963
|
+
payload["ledger_preview"]["schema_version"] = LOCAL_PROXY_GATE_SCHEMA_VERSION
|
|
2964
|
+
payload["ledger_preview"]["ledger_jsonl"] = sanitize_local_proxy_value(args.ledger_jsonl)
|
|
2965
|
+
payload["ledger_preview"]["ledger_write_performed"] = False
|
|
2966
|
+
if blockers:
|
|
2967
|
+
payload["status"] = "blocked_until_local_proxy_gate_ready"
|
|
2968
|
+
return payload
|
|
2969
|
+
|
|
2970
|
+
row = local_proxy_gate_row(payload)
|
|
2971
|
+
bytes_written = append_jsonl_no_follow(Path(args.ledger_jsonl), row, label="local proxy runtime gate ledger")
|
|
2972
|
+
payload["status"] = "recorded"
|
|
2973
|
+
payload["ledger_preview"] = row
|
|
2974
|
+
payload["ledger_record"] = row
|
|
2975
|
+
payload["ledger_jsonl"]["write_performed"] = True
|
|
2976
|
+
payload["ledger_jsonl"]["bytes_written"] = bytes_written
|
|
2977
|
+
payload["policy"]["runtime_gate_recorded"] = True
|
|
2978
|
+
payload["review_plan"]["next_steps"] = [
|
|
2979
|
+
"Use this JSONL row only as a local proxy runtime-gate record.",
|
|
2980
|
+
"Keep any actual proxy listener or forwarding implementation behind a separate reviewed runtime.",
|
|
2981
|
+
"Do not persist API keys or claim hosted token/cost savings from this gate record.",
|
|
2982
|
+
]
|
|
2983
|
+
return payload
|
|
2984
|
+
|
|
2985
|
+
|
|
2986
|
+
def command_record_local_proxy_runtime_gate(args: argparse.Namespace) -> int:
|
|
2987
|
+
payload = local_proxy_record_payload(args)
|
|
2988
|
+
if args.json:
|
|
2989
|
+
emit_json(payload)
|
|
2990
|
+
else:
|
|
2991
|
+
if payload["status"] == "recorded":
|
|
2992
|
+
print("ContextGuard local proxy runtime-gate record written")
|
|
2993
|
+
print(f"Ledger: {payload['ledger_jsonl']['path']} bytes={payload['ledger_jsonl']['bytes_written']}")
|
|
2994
|
+
else:
|
|
2995
|
+
print("ContextGuard local proxy runtime-gate record blocked")
|
|
2996
|
+
print(f"Status: {payload['status']}")
|
|
2997
|
+
if payload["review_plan"]["readiness_blockers"]:
|
|
2998
|
+
print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
|
|
2999
|
+
print(payload["claim_boundary"])
|
|
3000
|
+
return 0 if payload["status"] == "recorded" else 1
|
|
3001
|
+
|
|
3002
|
+
|
|
3003
|
+
def local_proxy_forward_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
3004
|
+
payload = local_proxy_plan_payload(args)
|
|
3005
|
+
input_payload, _input_meta = read_local_proxy_payload(args)
|
|
3006
|
+
forwarding_gate_ack, forwarding_gate_ack_valid = coalesce_local_proxy_bool(
|
|
3007
|
+
args,
|
|
3008
|
+
input_payload,
|
|
3009
|
+
"forwarding_gate_ack",
|
|
3010
|
+
"forwarding_gate_ack",
|
|
3011
|
+
)
|
|
3012
|
+
once, once_valid = coalesce_local_proxy_bool(args, input_payload, "once", "once")
|
|
3013
|
+
max_request_bytes, max_request_valid = normalize_local_proxy_int_limit(
|
|
3014
|
+
coalesce_local_proxy_value(args, input_payload, "max_request_bytes", "max_request_bytes"),
|
|
3015
|
+
default=LOCAL_PROXY_DEFAULT_MAX_REQUEST_BYTES,
|
|
3016
|
+
maximum=LOCAL_PROXY_MAX_FORWARD_BYTES,
|
|
3017
|
+
)
|
|
3018
|
+
max_response_bytes, max_response_valid = normalize_local_proxy_int_limit(
|
|
3019
|
+
coalesce_local_proxy_value(args, input_payload, "max_response_bytes", "max_response_bytes"),
|
|
3020
|
+
default=LOCAL_PROXY_DEFAULT_MAX_RESPONSE_BYTES,
|
|
3021
|
+
maximum=LOCAL_PROXY_MAX_FORWARD_BYTES,
|
|
3022
|
+
)
|
|
3023
|
+
timeout_seconds, timeout_valid = normalize_local_proxy_timeout(
|
|
3024
|
+
coalesce_local_proxy_value(args, input_payload, "timeout_seconds", "timeout_seconds")
|
|
3025
|
+
)
|
|
3026
|
+
diagnostic_ledger_raw = coalesce_local_proxy_value(
|
|
3027
|
+
args,
|
|
3028
|
+
input_payload,
|
|
3029
|
+
"diagnostic_ledger_jsonl",
|
|
3030
|
+
"diagnostic_ledger_jsonl",
|
|
3031
|
+
)
|
|
3032
|
+
diagnostic_ledger_path = sanitize_local_proxy_value(diagnostic_ledger_raw) if diagnostic_ledger_raw else None
|
|
3033
|
+
diagnostic_ledger_write_path = str(diagnostic_ledger_raw) if diagnostic_ledger_raw else None
|
|
3034
|
+
bind_host = payload["bind"]["host"]
|
|
3035
|
+
target_host = payload["target"]["host"]
|
|
3036
|
+
bind_ip_literal = is_loopback_ip_literal(bind_host)
|
|
3037
|
+
target_ip_literal = is_loopback_ip_literal(target_host)
|
|
3038
|
+
upstream_url = payload["target"].get("upstream_url")
|
|
3039
|
+
upstream_scheme = ""
|
|
3040
|
+
if upstream_url:
|
|
3041
|
+
try:
|
|
3042
|
+
upstream_scheme = urlparse(str(upstream_url)).scheme.lower()
|
|
3043
|
+
except ValueError:
|
|
3044
|
+
upstream_scheme = "invalid"
|
|
3045
|
+
|
|
3046
|
+
payload["mode"] = "serve"
|
|
3047
|
+
payload["schema_version"] = LOCAL_PROXY_FORWARD_SCHEMA_VERSION
|
|
3048
|
+
payload["claim_boundary"] = (
|
|
3049
|
+
"Explicit local proxy forwarding MVP only; binds and forwards literal loopback IPs, blocks credential "
|
|
3050
|
+
"material, persists no API keys, performs no DNS lookup, calls no external services, and makes no hosted "
|
|
3051
|
+
"API token/cost savings claim."
|
|
3052
|
+
)
|
|
3053
|
+
payload["policy"] = dict(payload["policy"])
|
|
3054
|
+
payload["policy"].update({
|
|
3055
|
+
"dry_run_only": False,
|
|
3056
|
+
"forwarding_runtime": True,
|
|
3057
|
+
"forwarding_gate_acknowledged": forwarding_gate_ack,
|
|
3058
|
+
"once_required": True,
|
|
3059
|
+
"once": once,
|
|
3060
|
+
"literal_loopback_ip_only": True,
|
|
3061
|
+
"listener_started": False,
|
|
3062
|
+
"traffic_forwarded": False,
|
|
3063
|
+
"stable_runtime_behavior_changed": False,
|
|
3064
|
+
})
|
|
3065
|
+
payload["forwarding"] = dict(payload["forwarding"])
|
|
3066
|
+
payload["forwarding"].update({
|
|
3067
|
+
"actual_local_forwarding_runtime": True,
|
|
3068
|
+
"forwarding_gate_acknowledged": forwarding_gate_ack,
|
|
3069
|
+
"external_forwarding_allowed": False,
|
|
3070
|
+
"connect_tunneling_allowed": False,
|
|
3071
|
+
"https_mitm_allowed": False,
|
|
3072
|
+
})
|
|
3073
|
+
payload["runtime_limits"] = {
|
|
3074
|
+
"once": once,
|
|
3075
|
+
"max_request_bytes": max_request_bytes,
|
|
3076
|
+
"max_response_bytes": max_response_bytes,
|
|
3077
|
+
"timeout_seconds": timeout_seconds,
|
|
3078
|
+
}
|
|
3079
|
+
payload["diagnostic_ledger"] = {
|
|
3080
|
+
"schema_version": LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION,
|
|
3081
|
+
"path": diagnostic_ledger_path,
|
|
3082
|
+
"path_sha256": hashlib.sha256(str(diagnostic_ledger_raw).encode("utf-8", errors="replace")).hexdigest() if diagnostic_ledger_raw else None,
|
|
3083
|
+
"write_requested": bool(diagnostic_ledger_raw),
|
|
3084
|
+
"write_performed": False,
|
|
3085
|
+
"bytes_written": 0,
|
|
3086
|
+
"reason": None if diagnostic_ledger_raw else "not_requested",
|
|
3087
|
+
}
|
|
3088
|
+
payload["_diagnostic_ledger_write_path"] = diagnostic_ledger_write_path
|
|
3089
|
+
payload["forward_result"] = None
|
|
3090
|
+
|
|
3091
|
+
blockers = list(payload["review_plan"]["readiness_blockers"])
|
|
3092
|
+
if diagnostic_ledger_raw is not None and local_proxy_secret_like(diagnostic_ledger_raw):
|
|
3093
|
+
blockers.append("secret_like_diagnostic_ledger_path")
|
|
3094
|
+
if not payload["policy"]["runtime_gate_acknowledged"]:
|
|
3095
|
+
blockers.append("missing_runtime_gate_ack")
|
|
3096
|
+
if not forwarding_gate_ack_valid:
|
|
3097
|
+
blockers.append("invalid_forwarding_gate_ack")
|
|
3098
|
+
if not once_valid:
|
|
3099
|
+
blockers.append("invalid_once")
|
|
3100
|
+
if not forwarding_gate_ack:
|
|
3101
|
+
blockers.append("missing_forwarding_gate_ack")
|
|
3102
|
+
if not once:
|
|
3103
|
+
blockers.append("once_required_for_forwarding_mvp")
|
|
3104
|
+
if payload["bind"]["port"] <= 0:
|
|
3105
|
+
blockers.append("bind_port_required_for_listener")
|
|
3106
|
+
if payload["target"]["port"] <= 0:
|
|
3107
|
+
blockers.append("target_port_required_for_forwarding")
|
|
3108
|
+
if not bind_ip_literal:
|
|
3109
|
+
blockers.append("bind_host_must_be_loopback_ip_literal")
|
|
3110
|
+
if not target_ip_literal:
|
|
3111
|
+
blockers.append("target_host_must_be_loopback_ip_literal")
|
|
3112
|
+
if upstream_scheme and upstream_scheme != "http":
|
|
3113
|
+
blockers.append("unsupported_upstream_url_scheme")
|
|
3114
|
+
if not max_request_valid:
|
|
3115
|
+
blockers.append("invalid_max_request_bytes")
|
|
3116
|
+
if not max_response_valid:
|
|
3117
|
+
blockers.append("invalid_max_response_bytes")
|
|
3118
|
+
if not timeout_valid:
|
|
3119
|
+
blockers.append("invalid_timeout_seconds")
|
|
3120
|
+
blockers = list(dict.fromkeys(blockers))
|
|
3121
|
+
payload["review_plan"]["readiness_blockers"] = blockers
|
|
3122
|
+
payload["review_plan"]["next_steps"] = [
|
|
3123
|
+
"Use this MVP only for local loopback HTTP forwarding.",
|
|
3124
|
+
"Keep external forwarding, CONNECT tunneling, credential persistence, and hosted savings claims behind later gates.",
|
|
3125
|
+
"Use --once plus byte/time limits for bounded operation.",
|
|
3126
|
+
]
|
|
3127
|
+
payload["status"] = "ready_to_serve" if not blockers else "blocked_until_local_proxy_forwarding_ready"
|
|
3128
|
+
return payload
|
|
3129
|
+
|
|
3130
|
+
|
|
3131
|
+
def local_proxy_forward_diagnostic_row(payload: dict[str, Any]) -> dict[str, Any]:
|
|
3132
|
+
result = payload.get("forward_result") or {}
|
|
3133
|
+
return {
|
|
3134
|
+
"schema_version": LOCAL_PROXY_DIAGNOSTIC_SCHEMA_VERSION,
|
|
3135
|
+
"date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
3136
|
+
"experiment_id": "local-proxy",
|
|
3137
|
+
"mode": "serve",
|
|
3138
|
+
"proxy_label": payload["ledger_preview"]["proxy_label"],
|
|
3139
|
+
"bind": payload["bind"],
|
|
3140
|
+
"target": {
|
|
3141
|
+
"host": payload["target"]["host"],
|
|
3142
|
+
"port": payload["target"]["port"],
|
|
3143
|
+
"localhost_only": payload["target"]["localhost_only"],
|
|
3144
|
+
},
|
|
3145
|
+
"request": {
|
|
3146
|
+
"method": result.get("request_method"),
|
|
3147
|
+
"target_sha256": result.get("request_target_sha256"),
|
|
3148
|
+
"target_bytes": result.get("request_target_bytes", 0),
|
|
3149
|
+
"body_bytes": result.get("inbound_request_bytes", 0),
|
|
3150
|
+
"headers_persisted": False,
|
|
3151
|
+
"body_persisted": False,
|
|
3152
|
+
"credential_material_forwarded": False,
|
|
3153
|
+
},
|
|
3154
|
+
"response": {
|
|
3155
|
+
"upstream_status": result.get("upstream_status"),
|
|
3156
|
+
"upstream_response_bytes": result.get("upstream_response_bytes", 0),
|
|
3157
|
+
"body_persisted": False,
|
|
3158
|
+
},
|
|
3159
|
+
"runtime_limits": payload["runtime_limits"],
|
|
3160
|
+
"network_actions": payload["network_actions"],
|
|
3161
|
+
"policy": {
|
|
3162
|
+
"localhost_only": True,
|
|
3163
|
+
"literal_loopback_ip_only": True,
|
|
3164
|
+
"forwarded": bool(result.get("forwarded")),
|
|
3165
|
+
"api_key_persisted": False,
|
|
3166
|
+
"hidden_external_forwarding": False,
|
|
3167
|
+
"external_services_called": False,
|
|
3168
|
+
"dns_lookup_attempted": False,
|
|
3169
|
+
"connect_tunneling_allowed": False,
|
|
3170
|
+
"https_mitm_allowed": False,
|
|
3171
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
3172
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
3173
|
+
},
|
|
3174
|
+
"shifted_cost_accounting": {
|
|
3175
|
+
"required": True,
|
|
3176
|
+
"local_proxy_request": True,
|
|
3177
|
+
"diagnostic_only": True,
|
|
3178
|
+
"provider_measured_matched_tasks_required_for_hosted_claims": True,
|
|
3179
|
+
},
|
|
3180
|
+
"claim_boundary": {
|
|
3181
|
+
"id": "local_proxy_forward_diagnostic_not_hosted_savings",
|
|
3182
|
+
"reason": "This row records one explicit literal-loopback forwarded request as shifted-cost diagnostic evidence only.",
|
|
3183
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
3184
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
3185
|
+
},
|
|
3186
|
+
}
|
|
3187
|
+
|
|
3188
|
+
|
|
3189
|
+
def maybe_write_local_proxy_forward_diagnostic(payload: dict[str, Any]) -> None:
|
|
3190
|
+
ledger = payload.get("diagnostic_ledger")
|
|
3191
|
+
if not isinstance(ledger, dict) or not ledger.get("write_requested"):
|
|
3192
|
+
return
|
|
3193
|
+
if payload.get("status") != "served_once" or not (payload.get("forward_result") or {}).get("forwarded"):
|
|
3194
|
+
if ledger.get("reason") != "preflight_failed":
|
|
3195
|
+
ledger["reason"] = "not_forwarded"
|
|
3196
|
+
return
|
|
3197
|
+
row = local_proxy_forward_diagnostic_row(payload)
|
|
3198
|
+
write_path = payload.get("_diagnostic_ledger_write_path")
|
|
3199
|
+
if not write_path:
|
|
3200
|
+
ledger["reason"] = "not_requested"
|
|
3201
|
+
return
|
|
3202
|
+
bytes_written = append_jsonl_no_follow(Path(str(write_path)), row, label="local proxy forwarding diagnostic ledger")
|
|
3203
|
+
ledger["write_performed"] = True
|
|
3204
|
+
ledger["bytes_written"] = bytes_written
|
|
3205
|
+
ledger["reason"] = None
|
|
3206
|
+
ledger["row_preview"] = row
|
|
3207
|
+
|
|
3208
|
+
|
|
3209
|
+
def local_proxy_has_sensitive_headers(headers: Any) -> list[str]:
|
|
3210
|
+
found: list[str] = []
|
|
3211
|
+
for name, value in headers.items():
|
|
3212
|
+
lower = str(name).lower()
|
|
3213
|
+
if lower in LOCAL_PROXY_SENSITIVE_HEADER_NAMES:
|
|
3214
|
+
found.append(lower)
|
|
3215
|
+
elif local_proxy_secret_like(name):
|
|
3216
|
+
found.append("redacted_sensitive_header")
|
|
3217
|
+
elif local_proxy_secret_like(value):
|
|
3218
|
+
found.append(lower)
|
|
3219
|
+
return sorted(set(found))
|
|
3220
|
+
|
|
3221
|
+
|
|
3222
|
+
def local_proxy_safe_forward_headers(headers: Any, *, target_host: str, target_port: int) -> dict[str, str]:
|
|
3223
|
+
return {
|
|
3224
|
+
"Host": f"{target_host}:{target_port}",
|
|
3225
|
+
"Connection": "close",
|
|
3226
|
+
}
|
|
3227
|
+
|
|
3228
|
+
|
|
3229
|
+
def local_proxy_response_headers(headers: Any) -> list[tuple[str, str]]:
|
|
3230
|
+
result: list[tuple[str, str]] = []
|
|
3231
|
+
for name, value in headers.items():
|
|
3232
|
+
lower = str(name).lower()
|
|
3233
|
+
if lower in LOCAL_PROXY_SENSITIVE_HEADER_NAMES or lower in LOCAL_PROXY_HOP_BY_HOP_HEADERS:
|
|
3234
|
+
continue
|
|
3235
|
+
if lower not in {"content-type"}:
|
|
3236
|
+
continue
|
|
3237
|
+
if local_proxy_secret_like(name) or local_proxy_secret_like(value):
|
|
3238
|
+
continue
|
|
3239
|
+
result.append((str(name), str(value)))
|
|
3240
|
+
return result
|
|
3241
|
+
|
|
3242
|
+
|
|
3243
|
+
def write_local_proxy_ready_file(path: str | None, *, bind_host: str, bind_port: int) -> None:
|
|
3244
|
+
if not path:
|
|
3245
|
+
return
|
|
3246
|
+
ready_payload = {
|
|
3247
|
+
"schema_version": LOCAL_PROXY_READY_SCHEMA_VERSION,
|
|
3248
|
+
"experiment_id": "local-proxy",
|
|
3249
|
+
"mode": "serve",
|
|
3250
|
+
"status": "listener_ready",
|
|
3251
|
+
"diagnostic_only": True,
|
|
3252
|
+
"pid": os.getpid(),
|
|
3253
|
+
"bind": {
|
|
3254
|
+
"host": bind_host,
|
|
3255
|
+
"port": bind_port,
|
|
3256
|
+
},
|
|
3257
|
+
}
|
|
3258
|
+
data = json.dumps(ready_payload, sort_keys=True).encode("utf-8") + b"\n"
|
|
3259
|
+
write_regular_file_no_follow_exclusive(Path(path), data, label="local proxy ready file", mode=0o600)
|
|
3260
|
+
|
|
3261
|
+
|
|
3262
|
+
def serve_local_proxy_once(payload: dict[str, Any], *, ready_file: str | None = None) -> dict[str, Any]:
|
|
3263
|
+
bind_host = payload["bind"]["host"]
|
|
3264
|
+
bind_port = int(payload["bind"]["port"])
|
|
3265
|
+
target_host = payload["target"]["host"]
|
|
3266
|
+
target_port = int(payload["target"]["port"])
|
|
3267
|
+
limits = payload["runtime_limits"]
|
|
3268
|
+
max_request_bytes = int(limits["max_request_bytes"])
|
|
3269
|
+
max_response_bytes = int(limits["max_response_bytes"])
|
|
3270
|
+
timeout_seconds = float(limits["timeout_seconds"])
|
|
3271
|
+
server_result: dict[str, Any] = {
|
|
3272
|
+
"served_once": False,
|
|
3273
|
+
"forwarded": False,
|
|
3274
|
+
"blocked_reason": None,
|
|
3275
|
+
"forward_attempted": False,
|
|
3276
|
+
"request_method": None,
|
|
3277
|
+
"request_target_sha256": None,
|
|
3278
|
+
"request_target_bytes": 0,
|
|
3279
|
+
"inbound_request_bytes": 0,
|
|
3280
|
+
"upstream_status": None,
|
|
3281
|
+
"upstream_response_bytes": 0,
|
|
3282
|
+
"downstream_status": None,
|
|
3283
|
+
"sensitive_headers_blocked": [],
|
|
3284
|
+
"listener_started": False,
|
|
3285
|
+
"ready_file_written": False,
|
|
3286
|
+
}
|
|
3287
|
+
|
|
3288
|
+
def finish_blocked(handler: BaseHTTPRequestHandler, status_code: int, reason: str, *, sensitive: list[str] | None = None) -> None:
|
|
3289
|
+
server_result.update({
|
|
3290
|
+
"served_once": True,
|
|
3291
|
+
"forwarded": False,
|
|
3292
|
+
"blocked_reason": reason,
|
|
3293
|
+
"downstream_status": status_code,
|
|
3294
|
+
"sensitive_headers_blocked": sorted(set(sensitive or [])),
|
|
3295
|
+
})
|
|
3296
|
+
body = json.dumps({"status": "blocked", "reason": reason}, sort_keys=True).encode("utf-8")
|
|
3297
|
+
handler.send_response(status_code)
|
|
3298
|
+
handler.send_header("Content-Type", "application/json")
|
|
3299
|
+
handler.send_header("Content-Length", str(len(body)))
|
|
3300
|
+
handler.send_header("Connection", "close")
|
|
3301
|
+
handler.end_headers()
|
|
3302
|
+
if handler.command != "HEAD":
|
|
3303
|
+
handler.wfile.write(body)
|
|
3304
|
+
|
|
3305
|
+
class LocalProxyHandler(BaseHTTPRequestHandler):
|
|
3306
|
+
server_version = "ContextGuardLocalProxy/0"
|
|
3307
|
+
protocol_version = "HTTP/1.1"
|
|
3308
|
+
|
|
3309
|
+
def log_message(self, format: str, *args: Any) -> None: # noqa: A002 - BaseHTTPRequestHandler API.
|
|
3310
|
+
return
|
|
3311
|
+
|
|
3312
|
+
def do_CONNECT(self) -> None:
|
|
3313
|
+
server_result["request_method"] = "CONNECT"
|
|
3314
|
+
server_result.update(local_proxy_request_target_meta(self.path))
|
|
3315
|
+
finish_blocked(self, 405, "connect_tunneling_not_allowed")
|
|
3316
|
+
|
|
3317
|
+
def do_HEAD(self) -> None:
|
|
3318
|
+
self.forward_request()
|
|
3319
|
+
|
|
3320
|
+
def do_GET(self) -> None:
|
|
3321
|
+
self.forward_request()
|
|
3322
|
+
|
|
3323
|
+
def do_POST(self) -> None:
|
|
3324
|
+
self.block_method()
|
|
3325
|
+
|
|
3326
|
+
def do_PUT(self) -> None:
|
|
3327
|
+
self.block_method()
|
|
3328
|
+
|
|
3329
|
+
def do_PATCH(self) -> None:
|
|
3330
|
+
self.block_method()
|
|
3331
|
+
|
|
3332
|
+
def block_method(self) -> None:
|
|
3333
|
+
server_result["request_method"] = self.command
|
|
3334
|
+
server_result.update(local_proxy_request_target_meta(self.path))
|
|
3335
|
+
finish_blocked(self, 405, "method_not_allowed")
|
|
3336
|
+
|
|
3337
|
+
def do_DELETE(self) -> None:
|
|
3338
|
+
self.block_method()
|
|
3339
|
+
|
|
3340
|
+
def do_OPTIONS(self) -> None:
|
|
3341
|
+
self.block_method()
|
|
3342
|
+
|
|
3343
|
+
def do_TRACE(self) -> None:
|
|
3344
|
+
self.block_method()
|
|
3345
|
+
|
|
3346
|
+
def forward_request(self) -> None:
|
|
3347
|
+
server_result["request_method"] = self.command
|
|
3348
|
+
server_result.update(local_proxy_request_target_meta(self.path))
|
|
3349
|
+
if local_proxy_secret_like(self.path):
|
|
3350
|
+
finish_blocked(self, 400, "secret_like_request_target")
|
|
3351
|
+
return
|
|
3352
|
+
parsed_target = urlparse(self.path)
|
|
3353
|
+
if parsed_target.scheme or parsed_target.netloc:
|
|
3354
|
+
finish_blocked(self, 400, "absolute_proxy_url_not_allowed")
|
|
3355
|
+
return
|
|
3356
|
+
if str(self.headers.get("Transfer-Encoding", "")).strip():
|
|
3357
|
+
finish_blocked(self, 400, "transfer_encoding_not_allowed")
|
|
3358
|
+
return
|
|
3359
|
+
sensitive_headers = local_proxy_has_sensitive_headers(self.headers)
|
|
3360
|
+
if sensitive_headers:
|
|
3361
|
+
finish_blocked(self, 403, "sensitive_request_headers_blocked", sensitive=sensitive_headers)
|
|
3362
|
+
return
|
|
3363
|
+
raw_length = self.headers.get("Content-Length")
|
|
3364
|
+
try:
|
|
3365
|
+
content_length = int(raw_length) if raw_length else 0
|
|
3366
|
+
except ValueError:
|
|
3367
|
+
finish_blocked(self, 400, "invalid_content_length")
|
|
3368
|
+
return
|
|
3369
|
+
if content_length < 0 or content_length > max_request_bytes:
|
|
3370
|
+
finish_blocked(self, 413, "request_body_exceeds_limit")
|
|
3371
|
+
return
|
|
3372
|
+
if content_length:
|
|
3373
|
+
finish_blocked(self, 400, "request_body_not_allowed_for_forwarding_mvp")
|
|
3374
|
+
return
|
|
3375
|
+
body = self.rfile.read(content_length) if content_length else b""
|
|
3376
|
+
server_result["inbound_request_bytes"] = len(body)
|
|
3377
|
+
path = self.path if self.path.startswith("/") else f"/{self.path}"
|
|
3378
|
+
conn = http.client.HTTPConnection(target_host, target_port, timeout=timeout_seconds)
|
|
3379
|
+
try:
|
|
3380
|
+
server_result["forward_attempted"] = True
|
|
3381
|
+
conn.request(
|
|
3382
|
+
self.command,
|
|
3383
|
+
path,
|
|
3384
|
+
body=body,
|
|
3385
|
+
headers=local_proxy_safe_forward_headers(self.headers, target_host=target_host, target_port=target_port),
|
|
3386
|
+
)
|
|
3387
|
+
response = conn.getresponse()
|
|
3388
|
+
response_body = response.read(max_response_bytes + 1)
|
|
3389
|
+
if len(response_body) > max_response_bytes:
|
|
3390
|
+
finish_blocked(self, 502, "upstream_response_exceeds_limit")
|
|
3391
|
+
return
|
|
3392
|
+
if local_proxy_bytes_secret_like(response_body):
|
|
3393
|
+
finish_blocked(self, 502, "upstream_response_sensitive_content_blocked")
|
|
3394
|
+
return
|
|
3395
|
+
self.send_response(response.status, response.reason)
|
|
3396
|
+
for header_name, header_value in local_proxy_response_headers(response.headers):
|
|
3397
|
+
self.send_header(header_name, header_value)
|
|
3398
|
+
self.send_header("Content-Length", str(len(response_body)))
|
|
3399
|
+
self.send_header("Connection", "close")
|
|
3400
|
+
self.end_headers()
|
|
3401
|
+
if self.command != "HEAD":
|
|
3402
|
+
self.wfile.write(response_body)
|
|
3403
|
+
server_result.update({
|
|
3404
|
+
"served_once": True,
|
|
3405
|
+
"forwarded": True,
|
|
3406
|
+
"blocked_reason": None,
|
|
3407
|
+
"upstream_status": response.status,
|
|
3408
|
+
"upstream_response_bytes": len(response_body),
|
|
3409
|
+
"downstream_status": response.status,
|
|
3410
|
+
})
|
|
3411
|
+
except (OSError, http.client.HTTPException, TimeoutError) as exc:
|
|
3412
|
+
finish_blocked(self, 502, "upstream_forward_error")
|
|
3413
|
+
server_result["error"] = sanitize_local_proxy_value(str(exc))
|
|
3414
|
+
finally:
|
|
3415
|
+
conn.close()
|
|
3416
|
+
|
|
3417
|
+
address_family = socket.AF_INET6 if ":" in bind_host else socket.AF_INET
|
|
3418
|
+
class LocalProxyHTTPServer(HTTPServer):
|
|
3419
|
+
def server_bind(self) -> None:
|
|
3420
|
+
TCPServer.server_bind(self)
|
|
3421
|
+
host, port = self.server_address[:2]
|
|
3422
|
+
self.server_name = str(host)
|
|
3423
|
+
self.server_port = int(port)
|
|
3424
|
+
|
|
3425
|
+
def get_request(self) -> tuple[Any, Any]:
|
|
3426
|
+
request, client_address = super().get_request()
|
|
3427
|
+
request.settimeout(timeout_seconds)
|
|
3428
|
+
return request, client_address
|
|
3429
|
+
|
|
3430
|
+
LocalProxyHTTPServer.address_family = address_family
|
|
3431
|
+
try:
|
|
3432
|
+
httpd = LocalProxyHTTPServer((bind_host, bind_port), LocalProxyHandler)
|
|
3433
|
+
except OSError as exc:
|
|
3434
|
+
raise RegistryError(f"could not start local proxy listener: {os_error_detail(exc)}") from exc
|
|
3435
|
+
httpd.timeout = timeout_seconds
|
|
3436
|
+
try:
|
|
3437
|
+
try:
|
|
3438
|
+
write_local_proxy_ready_file(ready_file, bind_host=bind_host, bind_port=bind_port)
|
|
3439
|
+
server_result["ready_file_written"] = bool(ready_file)
|
|
3440
|
+
server_result["listener_started"] = True
|
|
3441
|
+
except RegistryError as exc:
|
|
3442
|
+
server_result.update({
|
|
3443
|
+
"served_once": False,
|
|
3444
|
+
"forwarded": False,
|
|
3445
|
+
"blocked_reason": "ready_file_write_failed",
|
|
3446
|
+
"downstream_status": None,
|
|
3447
|
+
"error": sanitize_local_proxy_value(str(exc)),
|
|
3448
|
+
})
|
|
3449
|
+
return server_result
|
|
3450
|
+
httpd.handle_request()
|
|
3451
|
+
if not server_result["served_once"]:
|
|
3452
|
+
server_result.update({
|
|
3453
|
+
"blocked_reason": "timeout_waiting_for_request",
|
|
3454
|
+
"downstream_status": None,
|
|
3455
|
+
})
|
|
3456
|
+
finally:
|
|
3457
|
+
httpd.server_close()
|
|
3458
|
+
return server_result
|
|
3459
|
+
|
|
3460
|
+
|
|
3461
|
+
def command_serve_local_proxy(args: argparse.Namespace) -> int:
|
|
3462
|
+
payload = local_proxy_forward_payload(args)
|
|
3463
|
+
diagnostic_ledger = payload.get("diagnostic_ledger") if isinstance(payload.get("diagnostic_ledger"), dict) else {}
|
|
3464
|
+
if payload["status"] == "ready_to_serve" and diagnostic_ledger.get("write_requested"):
|
|
3465
|
+
try:
|
|
3466
|
+
preflight_append_jsonl_no_follow(
|
|
3467
|
+
Path(str(payload.get("_diagnostic_ledger_write_path"))),
|
|
3468
|
+
label="local proxy forwarding diagnostic ledger",
|
|
3469
|
+
)
|
|
3470
|
+
except RegistryError as exc:
|
|
3471
|
+
payload["status"] = "blocked_until_local_proxy_forwarding_ready"
|
|
3472
|
+
payload["review_plan"]["readiness_blockers"].append("diagnostic_ledger_preflight_failed")
|
|
3473
|
+
diagnostic_ledger["reason"] = "preflight_failed"
|
|
3474
|
+
diagnostic_ledger["error"] = sanitize_local_proxy_value(str(exc))
|
|
3475
|
+
if payload["status"] == "ready_to_serve":
|
|
3476
|
+
result = serve_local_proxy_once(payload, ready_file=args.ready_file)
|
|
3477
|
+
payload["forward_result"] = result
|
|
3478
|
+
payload["network_actions"]["listener_started"] = bool(result.get("listener_started"))
|
|
3479
|
+
payload["network_actions"]["outbound_forwarding_attempted"] = bool(result["forward_attempted"])
|
|
3480
|
+
payload["network_actions"]["dns_lookup_attempted"] = False
|
|
3481
|
+
payload["network_actions"]["external_services_called"] = False
|
|
3482
|
+
payload["policy"]["listener_started"] = bool(result.get("listener_started"))
|
|
3483
|
+
payload["policy"]["traffic_forwarded"] = bool(result["forwarded"])
|
|
3484
|
+
if result["forwarded"]:
|
|
3485
|
+
payload["status"] = "served_once"
|
|
3486
|
+
elif result.get("blocked_reason") == "ready_file_write_failed":
|
|
3487
|
+
payload["status"] = "blocked_until_local_proxy_forwarding_ready"
|
|
3488
|
+
payload["review_plan"]["readiness_blockers"].append("ready_file_write_failed")
|
|
3489
|
+
else:
|
|
3490
|
+
payload["status"] = "blocked_request"
|
|
3491
|
+
maybe_write_local_proxy_forward_diagnostic(payload)
|
|
3492
|
+
payload.pop("_diagnostic_ledger_write_path", None)
|
|
3493
|
+
if args.json:
|
|
3494
|
+
emit_json(payload)
|
|
3495
|
+
else:
|
|
3496
|
+
if payload["status"] == "served_once":
|
|
3497
|
+
print("ContextGuard local proxy served one loopback request")
|
|
3498
|
+
else:
|
|
3499
|
+
print("ContextGuard local proxy serve blocked")
|
|
3500
|
+
print(f"Status: {payload['status']}")
|
|
3501
|
+
if payload["review_plan"]["readiness_blockers"]:
|
|
3502
|
+
print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
|
|
3503
|
+
if payload.get("forward_result") and payload["forward_result"].get("blocked_reason"):
|
|
3504
|
+
print(f"Request blocker: {payload['forward_result']['blocked_reason']}")
|
|
3505
|
+
print(payload["claim_boundary"])
|
|
3506
|
+
return 0 if payload["status"] == "served_once" else 1
|
|
3507
|
+
|
|
3508
|
+
|
|
1620
3509
|
LEARNED_CODE_FENCE_RE = re.compile(r"(?m)^\s*(?:```|~~~)")
|
|
1621
3510
|
LEARNED_DIFF_RE = re.compile(r"(?m)^\s*(diff --git |@@\s+-|--- |\+\+\+ |[+-].*)")
|
|
1622
3511
|
LEARNED_IDENTIFIER_RE = re.compile(
|
|
@@ -1691,16 +3580,14 @@ def read_learned_input(args: argparse.Namespace) -> tuple[str, dict[str, Any]]:
|
|
|
1691
3580
|
if args.input:
|
|
1692
3581
|
path = Path(args.input)
|
|
1693
3582
|
source_label = source_label or path.name
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
except OSError as exc:
|
|
1698
|
-
raise RegistryError(f"could not read learned-compression input: {path}: {exc}") from exc
|
|
3583
|
+
loaded = read_bounded_regular_file(path, max_bytes=MAX_LEARNED_COMPRESSION_INPUT_BYTES, label="learned-compression input")
|
|
3584
|
+
assert loaded is not None
|
|
3585
|
+
raw, truncated = loaded
|
|
1699
3586
|
else:
|
|
1700
3587
|
source_label = source_label or "stdin"
|
|
1701
3588
|
raw = sys.stdin.buffer.read(MAX_LEARNED_COMPRESSION_INPUT_BYTES + 1)
|
|
1702
|
-
|
|
1703
|
-
|
|
3589
|
+
truncated = len(raw) > MAX_LEARNED_COMPRESSION_INPUT_BYTES
|
|
3590
|
+
raw = raw[:MAX_LEARNED_COMPRESSION_INPUT_BYTES]
|
|
1704
3591
|
text = raw.decode("utf-8", errors="replace")
|
|
1705
3592
|
metadata = {
|
|
1706
3593
|
"source_label": source_label,
|
|
@@ -1771,6 +3658,104 @@ def valid_learned_reexpand_command(receipt_id: str | None, command: str | None)
|
|
|
1771
3658
|
return False, "invalid_reexpand_command"
|
|
1772
3659
|
|
|
1773
3660
|
|
|
3661
|
+
def verify_learned_fallback_artifact(
|
|
3662
|
+
receipt_id: str | None,
|
|
3663
|
+
*,
|
|
3664
|
+
expected_sha256: str,
|
|
3665
|
+
expected_bytes: int,
|
|
3666
|
+
) -> tuple[bool, str | None, dict[str, Any]]:
|
|
3667
|
+
if not receipt_id or not LEARNED_ARTIFACT_ID_RE.fullmatch(receipt_id):
|
|
3668
|
+
return False, "invalid_reexpand_command", {"checked": False, "read_directories": []}
|
|
3669
|
+
read_dirs = context_diff_artifact_read_dirs()
|
|
3670
|
+
details: dict[str, Any] = {
|
|
3671
|
+
"checked": True,
|
|
3672
|
+
"read_directories": [str(path) for path in read_dirs],
|
|
3673
|
+
"matched_directory": None,
|
|
3674
|
+
"content_sha256": None,
|
|
3675
|
+
"content_bytes": None,
|
|
3676
|
+
}
|
|
3677
|
+
for directory in read_dirs:
|
|
3678
|
+
content_path, meta_path = context_diff_artifact_paths(directory, receipt_id)
|
|
3679
|
+
meta_loaded = read_bounded_regular_file(
|
|
3680
|
+
meta_path,
|
|
3681
|
+
max_bytes=MAX_LEARNED_COMPRESSION_ARTIFACT_METADATA_BYTES,
|
|
3682
|
+
label="learned-compression fallback metadata",
|
|
3683
|
+
missing_ok=True,
|
|
3684
|
+
)
|
|
3685
|
+
content_loaded = read_bounded_regular_file(
|
|
3686
|
+
content_path,
|
|
3687
|
+
max_bytes=max(MAX_LEARNED_COMPRESSION_INPUT_BYTES, expected_bytes),
|
|
3688
|
+
label="learned-compression fallback content",
|
|
3689
|
+
missing_ok=True,
|
|
3690
|
+
)
|
|
3691
|
+
if meta_loaded is None and content_loaded is None:
|
|
3692
|
+
continue
|
|
3693
|
+
if meta_loaded is None or content_loaded is None:
|
|
3694
|
+
return False, "fallback_receipt_invalid", details
|
|
3695
|
+
meta_raw, meta_truncated = meta_loaded
|
|
3696
|
+
content_raw, content_truncated = content_loaded
|
|
3697
|
+
if meta_truncated or content_truncated:
|
|
3698
|
+
return False, "fallback_receipt_invalid", details
|
|
3699
|
+
try:
|
|
3700
|
+
metadata = json.loads(meta_raw.decode("utf-8"))
|
|
3701
|
+
except (UnicodeDecodeError, json.JSONDecodeError):
|
|
3702
|
+
return False, "fallback_receipt_invalid", details
|
|
3703
|
+
if not isinstance(metadata, dict) or metadata.get("artifact_id") != receipt_id:
|
|
3704
|
+
return False, "fallback_receipt_invalid", details
|
|
3705
|
+
stored = metadata.get("stored_output")
|
|
3706
|
+
stored_sha = stored.get("sha256") if isinstance(stored, dict) else None
|
|
3707
|
+
stored_bytes = stored.get("bytes") if isinstance(stored, dict) else None
|
|
3708
|
+
actual_sha = hashlib.sha256(content_raw).hexdigest()
|
|
3709
|
+
actual_bytes = len(content_raw)
|
|
3710
|
+
details.update({
|
|
3711
|
+
"matched_directory": str(directory),
|
|
3712
|
+
"content_sha256": actual_sha,
|
|
3713
|
+
"content_bytes": actual_bytes,
|
|
3714
|
+
})
|
|
3715
|
+
if stored_sha != actual_sha or stored_bytes != actual_bytes:
|
|
3716
|
+
return False, "fallback_receipt_invalid", details
|
|
3717
|
+
if actual_sha != expected_sha256 or actual_bytes != expected_bytes:
|
|
3718
|
+
return False, "fallback_content_mismatch", details
|
|
3719
|
+
return True, None, details
|
|
3720
|
+
return False, "fallback_receipt_not_found", details
|
|
3721
|
+
|
|
3722
|
+
|
|
3723
|
+
def read_learned_candidate_replacement(args: argparse.Namespace) -> tuple[str | None, dict[str, Any]]:
|
|
3724
|
+
if args.replacement_text is not None and args.replacement_file:
|
|
3725
|
+
raise RegistryError("learned-compression emit accepts only one of --replacement-text or --replacement-file")
|
|
3726
|
+
if args.replacement_text is not None:
|
|
3727
|
+
text = str(args.replacement_text)
|
|
3728
|
+
raw = text.encode("utf-8")
|
|
3729
|
+
truncated = len(raw) > MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES
|
|
3730
|
+
raw = raw[:MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES]
|
|
3731
|
+
text = raw.decode("utf-8", errors="replace")
|
|
3732
|
+
source_label = "inline"
|
|
3733
|
+
elif args.replacement_file:
|
|
3734
|
+
path = Path(args.replacement_file)
|
|
3735
|
+
loaded = read_bounded_regular_file(
|
|
3736
|
+
path,
|
|
3737
|
+
max_bytes=MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES,
|
|
3738
|
+
label="learned-compression candidate replacement",
|
|
3739
|
+
)
|
|
3740
|
+
assert loaded is not None
|
|
3741
|
+
raw, truncated = loaded
|
|
3742
|
+
text = raw.decode("utf-8", errors="replace")
|
|
3743
|
+
source_label = path.name
|
|
3744
|
+
else:
|
|
3745
|
+
text = None
|
|
3746
|
+
raw = b""
|
|
3747
|
+
truncated = False
|
|
3748
|
+
source_label = None
|
|
3749
|
+
return text, {
|
|
3750
|
+
"source_label": source_label,
|
|
3751
|
+
"bytes": len(raw),
|
|
3752
|
+
"lines": len(text.splitlines()) if text is not None else 0,
|
|
3753
|
+
"sha256": hashlib.sha256(raw).hexdigest() if text is not None else None,
|
|
3754
|
+
"truncated": truncated,
|
|
3755
|
+
"max_bytes": MAX_LEARNED_COMPRESSION_REPLACEMENT_BYTES,
|
|
3756
|
+
}
|
|
3757
|
+
|
|
3758
|
+
|
|
1774
3759
|
def learned_compression_plan_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
1775
3760
|
text, input_meta = read_learned_input(args)
|
|
1776
3761
|
receipt_id = args.exact_fallback_receipt.strip() if args.exact_fallback_receipt else None
|
|
@@ -1864,6 +3849,133 @@ def command_plan_learned_compression(args: argparse.Namespace) -> int:
|
|
|
1864
3849
|
return 0
|
|
1865
3850
|
|
|
1866
3851
|
|
|
3852
|
+
def learned_compression_emit_payload(args: argparse.Namespace) -> dict[str, Any]:
|
|
3853
|
+
payload = learned_compression_plan_payload(args)
|
|
3854
|
+
receipt_id = args.exact_fallback_receipt.strip() if args.exact_fallback_receipt else None
|
|
3855
|
+
reexpand_command = args.reexpand_command.strip() if args.reexpand_command else None
|
|
3856
|
+
reexpand_valid, _fallback_blocker = valid_learned_reexpand_command(receipt_id, reexpand_command)
|
|
3857
|
+
fallback_verified = False
|
|
3858
|
+
fallback_blocker = None
|
|
3859
|
+
fallback_verification: dict[str, Any] = {"checked": False, "read_directories": []}
|
|
3860
|
+
if reexpand_valid:
|
|
3861
|
+
fallback_verified, fallback_blocker, fallback_verification = verify_learned_fallback_artifact(
|
|
3862
|
+
receipt_id,
|
|
3863
|
+
expected_sha256=payload["input"]["sha256"],
|
|
3864
|
+
expected_bytes=payload["input"]["bytes"],
|
|
3865
|
+
)
|
|
3866
|
+
|
|
3867
|
+
candidate_text, candidate_meta = read_learned_candidate_replacement(args)
|
|
3868
|
+
candidate_counts = learned_signal_counts(candidate_text or "")
|
|
3869
|
+
candidate_content_type = learned_content_type(candidate_text or "", candidate_counts)
|
|
3870
|
+
|
|
3871
|
+
blockers = list(payload["review_plan"]["readiness_blockers"])
|
|
3872
|
+
if fallback_blocker:
|
|
3873
|
+
blockers.append(fallback_blocker)
|
|
3874
|
+
if candidate_text is None or not candidate_text.strip():
|
|
3875
|
+
blockers.append("missing_candidate_replacement")
|
|
3876
|
+
if candidate_meta["truncated"]:
|
|
3877
|
+
blockers.append("candidate_replacement_truncated")
|
|
3878
|
+
if (
|
|
3879
|
+
candidate_text is not None
|
|
3880
|
+
and not candidate_meta["truncated"]
|
|
3881
|
+
and candidate_meta["bytes"] >= payload["input"]["bytes"]
|
|
3882
|
+
):
|
|
3883
|
+
blockers.append("candidate_not_smaller_than_input")
|
|
3884
|
+
if candidate_text is not None and candidate_text.strip() and candidate_content_type != "prose":
|
|
3885
|
+
blockers.append("candidate_non_prose_input")
|
|
3886
|
+
for blocker, count in candidate_counts.items():
|
|
3887
|
+
if count:
|
|
3888
|
+
blockers.append(f"candidate_{blocker}")
|
|
3889
|
+
blockers = list(dict.fromkeys(blockers))
|
|
3890
|
+
ready = not blockers
|
|
3891
|
+
|
|
3892
|
+
payload["mode"] = "emit"
|
|
3893
|
+
payload["status"] = "candidate_emitted" if ready else "blocked_until_candidate_ready"
|
|
3894
|
+
payload["policy"] = dict(payload["policy"])
|
|
3895
|
+
payload["policy"].update({
|
|
3896
|
+
"runtime_compression_allowed": False,
|
|
3897
|
+
"caller_supplied_candidate_required": True,
|
|
3898
|
+
"caller_supplied_candidate_allowed": ready,
|
|
3899
|
+
"lossy_replacement_allowed": ready,
|
|
3900
|
+
"learned_compressor_called": False,
|
|
3901
|
+
"embedding_or_reranker_called": False,
|
|
3902
|
+
"model_call_allowed": False,
|
|
3903
|
+
"subprocess_allowed": False,
|
|
3904
|
+
})
|
|
3905
|
+
payload["exact_fallback"] = {
|
|
3906
|
+
"required": True,
|
|
3907
|
+
"available": bool(receipt_id and reexpand_command and reexpand_valid and fallback_verified),
|
|
3908
|
+
"receipt_id": receipt_id,
|
|
3909
|
+
"cli": reexpand_command,
|
|
3910
|
+
"verified": fallback_verified,
|
|
3911
|
+
"valid_command_shape": reexpand_valid,
|
|
3912
|
+
"verification": fallback_verification,
|
|
3913
|
+
"note": "Emit mode validates exact local fallback command shape and verifies local artifact content matches the input prose.",
|
|
3914
|
+
}
|
|
3915
|
+
payload["candidate_scan"] = {
|
|
3916
|
+
"content_type": candidate_content_type,
|
|
3917
|
+
"counts": candidate_counts,
|
|
3918
|
+
"protected_signals": [name for name, count in candidate_counts.items() if count],
|
|
3919
|
+
}
|
|
3920
|
+
payload["replacement"] = candidate_meta
|
|
3921
|
+
payload["review_plan"]["readiness_blockers"] = blockers
|
|
3922
|
+
payload["review_plan"]["protected_signals"] = [name for name, count in payload["protected_signal_scan"]["counts"].items() if count]
|
|
3923
|
+
payload["review_plan"]["candidate_protected_signals"] = [
|
|
3924
|
+
name for name, count in candidate_counts.items() if count
|
|
3925
|
+
]
|
|
3926
|
+
payload["review_plan"]["next_steps"] = [
|
|
3927
|
+
"Human-review the caller-supplied candidate against the exact fallback before using it.",
|
|
3928
|
+
"Reject candidates that omit protected facts, prompt-like text, paths, code, diffs, identifiers, or numeric constants.",
|
|
3929
|
+
"Treat byte reduction as local proxy evidence only; do not claim hosted token/cost savings.",
|
|
3930
|
+
]
|
|
3931
|
+
payload["claim_boundary"] = (
|
|
3932
|
+
"Explicit local learned-compression candidate emission only; ContextGuard does not run a learned compressor, "
|
|
3933
|
+
"model, embedding, reranker, subprocess, or external service, and byte reduction is not hosted API token or cost evidence."
|
|
3934
|
+
)
|
|
3935
|
+
bytes_after = candidate_meta["bytes"] if candidate_text is not None else 0
|
|
3936
|
+
payload["compression_evidence"] = {
|
|
3937
|
+
"bytes_before": payload["input"]["bytes"],
|
|
3938
|
+
"bytes_after": bytes_after,
|
|
3939
|
+
"byte_reduction": max(0, payload["input"]["bytes"] - bytes_after),
|
|
3940
|
+
"byte_reduction_proxy_only": True,
|
|
3941
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
3942
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
3943
|
+
}
|
|
3944
|
+
if ready and candidate_text is not None:
|
|
3945
|
+
payload["candidate_replacement"] = {
|
|
3946
|
+
"text": candidate_text,
|
|
3947
|
+
"bytes": candidate_meta["bytes"],
|
|
3948
|
+
"lines": candidate_meta["lines"],
|
|
3949
|
+
"sha256": candidate_meta["sha256"],
|
|
3950
|
+
"source_label": candidate_meta["source_label"],
|
|
3951
|
+
"caller_supplied": True,
|
|
3952
|
+
}
|
|
3953
|
+
else:
|
|
3954
|
+
payload.pop("candidate_replacement", None)
|
|
3955
|
+
return payload
|
|
3956
|
+
|
|
3957
|
+
|
|
3958
|
+
def command_emit_learned_compression(args: argparse.Namespace) -> int:
|
|
3959
|
+
payload = learned_compression_emit_payload(args)
|
|
3960
|
+
if args.json:
|
|
3961
|
+
emit_json(payload)
|
|
3962
|
+
else:
|
|
3963
|
+
if payload["status"] == "candidate_emitted":
|
|
3964
|
+
print("ContextGuard learned-compression candidate emitted")
|
|
3965
|
+
print(
|
|
3966
|
+
f"Candidate: bytes={payload['replacement']['bytes']} "
|
|
3967
|
+
f"sha256={payload['replacement']['sha256']}"
|
|
3968
|
+
)
|
|
3969
|
+
print(f"Exact fallback: {payload['exact_fallback']['cli']}")
|
|
3970
|
+
else:
|
|
3971
|
+
print("ContextGuard learned-compression candidate blocked")
|
|
3972
|
+
print(f"Status: {payload['status']}")
|
|
3973
|
+
if payload["review_plan"]["readiness_blockers"]:
|
|
3974
|
+
print(f"Readiness blockers: {', '.join(payload['review_plan']['readiness_blockers'])}")
|
|
3975
|
+
print(payload["claim_boundary"])
|
|
3976
|
+
return 0 if payload["status"] == "candidate_emitted" else 1
|
|
3977
|
+
|
|
3978
|
+
|
|
1867
3979
|
def add_common_args(parser: argparse.ArgumentParser) -> None:
|
|
1868
3980
|
parser.add_argument("--root", help="Project root for default project-local experiment config (default: cwd).")
|
|
1869
3981
|
parser.add_argument("--config", help="Project-local config path. Relative paths resolve under --root; absolute paths must stay inside --root.")
|
|
@@ -1982,6 +4094,34 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
1982
4094
|
local_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
1983
4095
|
local_proxy.set_defaults(func=command_plan_local_proxy)
|
|
1984
4096
|
|
|
4097
|
+
external_proxy = plan_sub.add_parser(
|
|
4098
|
+
"local-proxy-external-forwarding",
|
|
4099
|
+
help="Dry-run an external-forwarding opt-in design gate without forwarding traffic.",
|
|
4100
|
+
)
|
|
4101
|
+
external_proxy.add_argument(
|
|
4102
|
+
"--external-forwarding-intent",
|
|
4103
|
+
action="store_true",
|
|
4104
|
+
help="Acknowledge intent to design a future external-forwarding proxy surface.",
|
|
4105
|
+
)
|
|
4106
|
+
external_proxy.add_argument(
|
|
4107
|
+
"--external-forwarding-design-ack",
|
|
4108
|
+
action="store_true",
|
|
4109
|
+
help="Acknowledge this command is design-only and does not enable external forwarding.",
|
|
4110
|
+
)
|
|
4111
|
+
external_proxy.add_argument("--allow-host", action="append", help="Explicit non-wildcard public host allowed by the future design. Repeatable.")
|
|
4112
|
+
external_proxy.add_argument("--allow-scheme", action="append", help="Allowed scheme for the future design; HTTPS is required. Repeatable.")
|
|
4113
|
+
external_proxy.add_argument("--threat-model-note", action="append", help="Threat-model note for the future external-forwarding design. Repeatable.")
|
|
4114
|
+
external_proxy.add_argument(
|
|
4115
|
+
"--credential-redaction-policy",
|
|
4116
|
+
help=f"Required policy: {LOCAL_PROXY_EXTERNAL_CREDENTIAL_REDACTION_POLICY}.",
|
|
4117
|
+
)
|
|
4118
|
+
external_proxy.add_argument(
|
|
4119
|
+
"--provider-evidence-boundary",
|
|
4120
|
+
help=f"Required policy: {LOCAL_PROXY_EXTERNAL_PROVIDER_EVIDENCE_BOUNDARY}.",
|
|
4121
|
+
)
|
|
4122
|
+
external_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
4123
|
+
external_proxy.set_defaults(func=command_plan_local_proxy_external_forwarding)
|
|
4124
|
+
|
|
1985
4125
|
learned = plan_sub.add_parser(
|
|
1986
4126
|
"learned-compression",
|
|
1987
4127
|
help="Dry-run a deny-by-default learned/synthetic compression safety gate.",
|
|
@@ -1995,6 +4135,176 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
1995
4135
|
learned.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
1996
4136
|
learned.set_defaults(func=command_plan_learned_compression)
|
|
1997
4137
|
|
|
4138
|
+
emit_parser = sub.add_parser("emit", help="Emit explicit local runtime outputs for experimental lanes.")
|
|
4139
|
+
emit_sub = emit_parser.add_subparsers(dest="emit_command", required=True)
|
|
4140
|
+
emit_context_diff = emit_sub.add_parser(
|
|
4141
|
+
"context-diff-compaction",
|
|
4142
|
+
help="Emit a caller-supplied compact diff replacement only with exact retrieval metadata.",
|
|
4143
|
+
)
|
|
4144
|
+
emit_context_diff.add_argument("--input", help="Read original diff text from a file instead of stdin.")
|
|
4145
|
+
emit_context_diff.add_argument("--source-label", help="Safe label to use for the diff input source in reports.")
|
|
4146
|
+
emit_context_diff.add_argument("--receipt-id", required=True, help="Exact local artifact receipt id for the original diff.")
|
|
4147
|
+
emit_context_diff.add_argument("--reexpand-command", required=True, help="Exact command that restores the original diff.")
|
|
4148
|
+
replacement_group = emit_context_diff.add_mutually_exclusive_group(required=True)
|
|
4149
|
+
replacement_group.add_argument("--replacement-text", help="Caller-supplied compact replacement text to emit.")
|
|
4150
|
+
replacement_group.add_argument("--replacement-file", help="Read caller-supplied compact replacement text from a file.")
|
|
4151
|
+
emit_context_diff.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
4152
|
+
emit_context_diff.set_defaults(func=command_emit_context_diff_compaction)
|
|
4153
|
+
|
|
4154
|
+
emit_visual_ocr = emit_sub.add_parser(
|
|
4155
|
+
"visual-crop-ocr",
|
|
4156
|
+
help="Emit a caller-supplied visual crop/OCR evidence pack without image/OCR services.",
|
|
4157
|
+
)
|
|
4158
|
+
emit_visual_ocr.add_argument("--full-evidence-receipt", help="User-supplied receipt/id for the original full visual evidence.")
|
|
4159
|
+
emit_visual_ocr.add_argument("--full-evidence-label", help="Safe label for the full visual evidence.")
|
|
4160
|
+
emit_visual_ocr.add_argument("--crop-label", help="Safe label for the cropped region or crop fixture.")
|
|
4161
|
+
emit_visual_ocr.add_argument("--crop-bounds", help="Crop bounds as x,y,width,height integers.")
|
|
4162
|
+
emit_visual_ocr.add_argument("--image-size", help="Original image size as width,height integers.")
|
|
4163
|
+
emit_visual_ocr.add_argument("--ocr-text", help="Bounded OCR fixture text supplied inline.")
|
|
4164
|
+
emit_visual_ocr.add_argument("--ocr-text-file", help="Read bounded OCR fixture text from a UTF-8 text file.")
|
|
4165
|
+
emit_visual_ocr.add_argument("--ocr-source-label", help="Safe label for OCR text source; defaults to inline or file basename.")
|
|
4166
|
+
emit_visual_ocr.add_argument("--ocr-confidence", help="OCR confidence as a finite decimal from 0.0 to 1.0.")
|
|
4167
|
+
emit_visual_ocr.add_argument("--ocr-error-note", action="append", help="Known OCR error/uncertainty note. Repeatable.")
|
|
4168
|
+
emit_visual_ocr.add_argument("--missed-context-note", action="append", help="Potential context outside crop/OCR text. Repeatable.")
|
|
4169
|
+
emit_visual_ocr.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
4170
|
+
emit_visual_ocr.set_defaults(func=command_emit_visual_crop_ocr)
|
|
4171
|
+
|
|
4172
|
+
emit_learned = emit_sub.add_parser(
|
|
4173
|
+
"learned-compression",
|
|
4174
|
+
help="Emit a caller-supplied compact prose candidate only with verified exact fallback.",
|
|
4175
|
+
)
|
|
4176
|
+
emit_learned.add_argument("--input", help="Read original prose text from a file instead of stdin.")
|
|
4177
|
+
emit_learned.add_argument("--source-label", help="Safe label to use for the input source in reports.")
|
|
4178
|
+
emit_learned.add_argument("--sanitized", action="store_true", help="Assert input is already sanitized.")
|
|
4179
|
+
emit_learned.add_argument("--trusted-source", action="store_true", help="Assert input came from a trusted source.")
|
|
4180
|
+
emit_learned.add_argument("--exact-fallback-receipt", required=True, help="Local exact fallback receipt id for the original text.")
|
|
4181
|
+
emit_learned.add_argument("--reexpand-command", required=True, help="Local exact re-expand command bound to the receipt id.")
|
|
4182
|
+
learned_replacement_group = emit_learned.add_mutually_exclusive_group(required=True)
|
|
4183
|
+
learned_replacement_group.add_argument("--replacement-text", help="Caller-supplied compact prose candidate to emit.")
|
|
4184
|
+
learned_replacement_group.add_argument("--replacement-file", help="Read caller-supplied compact prose candidate from a file.")
|
|
4185
|
+
emit_learned.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
4186
|
+
emit_learned.set_defaults(func=command_emit_learned_compression)
|
|
4187
|
+
|
|
4188
|
+
record_parser = sub.add_parser("record", help="Run explicit local runtime recorders for experimental lanes.")
|
|
4189
|
+
record_sub = record_parser.add_subparsers(dest="record_command", required=True)
|
|
4190
|
+
record_self_hosted = record_sub.add_parser(
|
|
4191
|
+
"self-hosted-metrics-ledger",
|
|
4192
|
+
help="Append one self-hosted/local metrics sidecar row to a JSONL ledger.",
|
|
4193
|
+
)
|
|
4194
|
+
record_self_hosted.add_argument("--ledger-jsonl", required=True, help="Local JSONL ledger path to append.")
|
|
4195
|
+
record_self_hosted.add_argument("--input", help="Read an explicit self_hosted_metrics JSON envelope from a file instead of stdin.")
|
|
4196
|
+
record_self_hosted.add_argument("--source-label", help="Safe label to use for the input source in reports.")
|
|
4197
|
+
record_self_hosted.add_argument("--latency-ms", type=float, default=None, help="Local/model-server latency in milliseconds.")
|
|
4198
|
+
record_self_hosted.add_argument("--peak-memory-mb", type=float, default=None, help="Peak local/model-server memory in MiB/MB.")
|
|
4199
|
+
record_self_hosted.add_argument("--quality-score", type=float, default=None, help="Quality score from 0.0 to 1.0.")
|
|
4200
|
+
record_self_hosted.add_argument("--energy-wh", type=float, default=None, help="Diagnostic local energy use in watt-hours.")
|
|
4201
|
+
record_self_hosted.add_argument("--local-cost-usd", type=float, default=None, help="Diagnostic local/self-hosted cost in USD.")
|
|
4202
|
+
record_self_hosted.add_argument("--tokens-per-second", type=float, default=None, help="Diagnostic local throughput.")
|
|
4203
|
+
record_self_hosted.add_argument("--model-server", help="Sanitized label for local model server/runtime.")
|
|
4204
|
+
record_self_hosted.add_argument("--optimization", help="Sanitized label for the local optimization under test.")
|
|
4205
|
+
record_self_hosted.add_argument("--quality-metric", help="Sanitized label for quality metric.")
|
|
4206
|
+
record_self_hosted.add_argument("--hardware", help="Sanitized local hardware label.")
|
|
4207
|
+
record_self_hosted.add_argument("--runtime", help="Sanitized local runtime label.")
|
|
4208
|
+
record_self_hosted.add_argument("--dataset", help="Sanitized dataset label.")
|
|
4209
|
+
record_self_hosted.add_argument("--task-id", default="self-hosted-metrics-manual", help="Sanitized task id for the ledger row.")
|
|
4210
|
+
record_self_hosted.add_argument("--variant", default="self-hosted-metrics-ledger", help="Sanitized variant label for the ledger row.")
|
|
4211
|
+
record_self_hosted.add_argument(
|
|
4212
|
+
"--success",
|
|
4213
|
+
choices=("true", "false", "unknown"),
|
|
4214
|
+
default="unknown",
|
|
4215
|
+
help="Optional success value for the local run; unknown writes JSON null.",
|
|
4216
|
+
)
|
|
4217
|
+
record_self_hosted.add_argument(
|
|
4218
|
+
"--notes",
|
|
4219
|
+
default="explicit self-hosted metrics record; no hosted API savings claim",
|
|
4220
|
+
help="Sanitized note for the ledger row.",
|
|
4221
|
+
)
|
|
4222
|
+
record_self_hosted.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
4223
|
+
record_self_hosted.set_defaults(func=command_record_self_hosted_metrics_ledger)
|
|
4224
|
+
|
|
4225
|
+
record_local_proxy = record_sub.add_parser(
|
|
4226
|
+
"local-proxy-runtime-gate",
|
|
4227
|
+
help="Append one localhost-only local proxy runtime-gate row without starting a proxy.",
|
|
4228
|
+
)
|
|
4229
|
+
record_local_proxy.add_argument("--input", help="Read a local_proxy JSON envelope from a file instead of CLI flags.")
|
|
4230
|
+
record_local_proxy.add_argument("--bind-host", help="Advisory bind host; must be localhost/loopback.")
|
|
4231
|
+
record_local_proxy.add_argument("--bind-port", default=None, help="Advisory bind port; 0 means unspecified/ephemeral.")
|
|
4232
|
+
record_local_proxy.add_argument("--target-host", help="Advisory target host; must be localhost/loopback.")
|
|
4233
|
+
record_local_proxy.add_argument("--target-port", default=None, help="Advisory target port; 0 means unspecified.")
|
|
4234
|
+
record_local_proxy.add_argument("--upstream-url", help="Advisory upstream URL; host must be localhost/loopback.")
|
|
4235
|
+
record_local_proxy.add_argument("--ledger-jsonl", required=True, help="Local JSONL ledger path to append the gate row.")
|
|
4236
|
+
record_local_proxy.add_argument("--proxy-label", help="Safe label for this local proxy gate record.")
|
|
4237
|
+
record_local_proxy.add_argument("--api-key", help="Blocked/redacted API key material; never persisted or emitted raw.")
|
|
4238
|
+
record_local_proxy.add_argument("--authorization-header", help="Blocked/redacted Authorization header; never persisted or emitted raw.")
|
|
4239
|
+
record_local_proxy.add_argument("--persist-api-key", action="store_true", help="Declare API-key persistence intent; blocked.")
|
|
4240
|
+
record_local_proxy.add_argument(
|
|
4241
|
+
"--external-forwarding-intent",
|
|
4242
|
+
action="store_true",
|
|
4243
|
+
help="Declare future external forwarding intent; blocked in this gate recorder.",
|
|
4244
|
+
)
|
|
4245
|
+
record_local_proxy.add_argument(
|
|
4246
|
+
"--runtime-gate-ack",
|
|
4247
|
+
action="store_true",
|
|
4248
|
+
help="Acknowledge this is only a local gate record and any forwarding needs a separate runtime gate.",
|
|
4249
|
+
)
|
|
4250
|
+
record_local_proxy.add_argument("--json", action="store_true", help="Emit JSON output.")
|
|
4251
|
+
record_local_proxy.set_defaults(func=command_record_local_proxy_runtime_gate)
|
|
4252
|
+
|
|
4253
|
+
serve_parser = sub.add_parser("serve", help="Run explicit bounded local servers for experimental lanes.")
|
|
4254
|
+
serve_sub = serve_parser.add_subparsers(dest="serve_command", required=True)
|
|
4255
|
+
serve_local_proxy = serve_sub.add_parser(
|
|
4256
|
+
"local-proxy",
|
|
4257
|
+
help="Serve one bounded localhost-only HTTP forwarding request.",
|
|
4258
|
+
)
|
|
4259
|
+
serve_local_proxy.add_argument("--input", help="Read a local_proxy JSON envelope from a file instead of CLI flags.")
|
|
4260
|
+
serve_local_proxy.add_argument("--bind-host", help="Bind host; actual serving requires a literal loopback IP.")
|
|
4261
|
+
serve_local_proxy.add_argument("--bind-port", default=None, help="Bind port; must be a nonzero explicit port for serving.")
|
|
4262
|
+
serve_local_proxy.add_argument("--target-host", help="Target host; actual forwarding requires a literal loopback IP.")
|
|
4263
|
+
serve_local_proxy.add_argument("--target-port", default=None, help="Target port; must be a nonzero explicit port for forwarding.")
|
|
4264
|
+
serve_local_proxy.add_argument("--upstream-url", help="Optional upstream URL; host must be a literal loopback IP for serving.")
|
|
4265
|
+
serve_local_proxy.add_argument("--proxy-label", help="Safe label for this local proxy serve run.")
|
|
4266
|
+
serve_local_proxy.add_argument(
|
|
4267
|
+
"--diagnostic-ledger-jsonl",
|
|
4268
|
+
help="Append one shifted-cost diagnostic JSONL row only after a successful loopback forwarded request.",
|
|
4269
|
+
)
|
|
4270
|
+
serve_local_proxy.add_argument("--api-key", help="Blocked/redacted API key material; never persisted or emitted raw.")
|
|
4271
|
+
serve_local_proxy.add_argument("--authorization-header", help="Blocked/redacted Authorization header; never persisted or emitted raw.")
|
|
4272
|
+
serve_local_proxy.add_argument("--persist-api-key", action="store_true", help="Declare API-key persistence intent; blocked.")
|
|
4273
|
+
serve_local_proxy.add_argument(
|
|
4274
|
+
"--external-forwarding-intent",
|
|
4275
|
+
action="store_true",
|
|
4276
|
+
help="Declare external forwarding intent; blocked in this local-only runtime.",
|
|
4277
|
+
)
|
|
4278
|
+
serve_local_proxy.add_argument(
|
|
4279
|
+
"--runtime-gate-ack",
|
|
4280
|
+
action="store_true",
|
|
4281
|
+
help="Acknowledge this is an explicit experimental runtime.",
|
|
4282
|
+
)
|
|
4283
|
+
serve_local_proxy.add_argument(
|
|
4284
|
+
"--forwarding-gate-ack",
|
|
4285
|
+
action="store_true",
|
|
4286
|
+
help="Acknowledge this starts a loopback-only forwarding listener for one bounded request.",
|
|
4287
|
+
)
|
|
4288
|
+
serve_local_proxy.add_argument("--once", action="store_true", help="Serve exactly one accepted or blocked request; required for this MVP.")
|
|
4289
|
+
serve_local_proxy.add_argument(
|
|
4290
|
+
"--max-request-bytes",
|
|
4291
|
+
default=None,
|
|
4292
|
+
help=f"Maximum request body bytes, 1..{LOCAL_PROXY_MAX_FORWARD_BYTES}.",
|
|
4293
|
+
)
|
|
4294
|
+
serve_local_proxy.add_argument(
|
|
4295
|
+
"--max-response-bytes",
|
|
4296
|
+
default=None,
|
|
4297
|
+
help=f"Maximum upstream response bytes, 1..{LOCAL_PROXY_MAX_FORWARD_BYTES}.",
|
|
4298
|
+
)
|
|
4299
|
+
serve_local_proxy.add_argument(
|
|
4300
|
+
"--timeout-seconds",
|
|
4301
|
+
default=None,
|
|
4302
|
+
help=f"Listener/upstream timeout seconds, 0.1..{LOCAL_PROXY_MAX_TIMEOUT_SECONDS}.",
|
|
4303
|
+
)
|
|
4304
|
+
serve_local_proxy.add_argument("--ready-file", help=argparse.SUPPRESS)
|
|
4305
|
+
serve_local_proxy.add_argument("--json", action="store_true", help="Emit JSON output after the single request completes.")
|
|
4306
|
+
serve_local_proxy.set_defaults(func=command_serve_local_proxy)
|
|
4307
|
+
|
|
1998
4308
|
return parser
|
|
1999
4309
|
|
|
2000
4310
|
|