@ictechgy/context-guard 0.4.9 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.ko.md +41 -24
  3. package/README.md +66 -26
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  8. package/docs/distribution.md +10 -7
  9. package/docs/experimental-benchmark-fixtures.md +8 -1
  10. package/package.json +3 -6
  11. package/packaging/homebrew/context-guard.rb.template +1 -1
  12. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  13. package/plugins/context-guard/README.ko.md +9 -6
  14. package/plugins/context-guard/README.md +21 -13
  15. package/plugins/context-guard/bin/context-guard +113 -26
  16. package/plugins/context-guard/bin/context-guard-artifact +542 -46
  17. package/plugins/context-guard/bin/context-guard-cache-score +380 -0
  18. package/plugins/context-guard/bin/context-guard-compress +146 -1
  19. package/plugins/context-guard/bin/context-guard-cost +783 -4
  20. package/plugins/context-guard/bin/context-guard-experiments +99 -18
  21. package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
  22. package/plugins/context-guard/bin/context-guard-filter +163 -7
  23. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  24. package/plugins/context-guard/bin/context-guard-pack +602 -43
  25. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  26. package/plugins/context-guard/bin/context-guard-setup +165 -31
  27. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  28. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  29. package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
  30. package/plugins/context-guard/lib/context_guard_commands.py +206 -0
  31. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  32. package/context-guard-kit/README.md +0 -91
  33. package/context-guard-kit/benchmark_runner.py +0 -2401
  34. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  35. package/context-guard-kit/context_compress.py +0 -695
  36. package/context-guard-kit/context_escrow.py +0 -935
  37. package/context-guard-kit/context_filter.py +0 -637
  38. package/context-guard-kit/context_guard_cli.py +0 -325
  39. package/context-guard-kit/context_guard_diet.py +0 -1711
  40. package/context-guard-kit/context_pack.py +0 -2713
  41. package/context-guard-kit/cost_guard.py +0 -2349
  42. package/context-guard-kit/experimental_registry.py +0 -4348
  43. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  44. package/context-guard-kit/guard_large_read.py +0 -690
  45. package/context-guard-kit/hook_secret_patterns.py +0 -43
  46. package/context-guard-kit/read_symbol.py +0 -483
  47. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  48. package/context-guard-kit/sanitize_output.py +0 -725
  49. package/context-guard-kit/settings.example.json +0 -67
  50. package/context-guard-kit/setup_wizard.py +0 -2515
  51. package/context-guard-kit/statusline.sh +0 -362
  52. package/context-guard-kit/statusline_merged.sh +0 -157
  53. package/context-guard-kit/tool_schema_pruner.py +0 -837
  54. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -55,6 +55,42 @@ LEDGER_OPEN_RETRY_SECONDS = 0.01
55
55
  TTL_SECONDS = {"5m": 5 * 60, "1h": 60 * 60}
56
56
  ANTHROPIC_DOCS_URL = "https://docs.anthropic.com/en/build-with-claude/prompt-caching"
57
57
  ANTHROPIC_PRICING_URL = "https://platform.claude.com/docs/en/about-claude/pricing"
58
+ ROUTE_FEATURE_KEYS = ("batch_api", "prompt_cache", "structured_outputs", "lower_cost_models", "tool_search")
59
+ ROUTE_FEATURE_ALIASES = {
60
+ "batch": "batch_api",
61
+ "batch-api": "batch_api",
62
+ "batch_api": "batch_api",
63
+ "batchapi": "batch_api",
64
+ "prompt-cache": "prompt_cache",
65
+ "prompt_cache": "prompt_cache",
66
+ "cache": "prompt_cache",
67
+ "structured-output": "structured_outputs",
68
+ "structured-outputs": "structured_outputs",
69
+ "structured_output": "structured_outputs",
70
+ "structured_outputs": "structured_outputs",
71
+ "json-schema": "structured_outputs",
72
+ "json_schema": "structured_outputs",
73
+ "lower-cost-models": "lower_cost_models",
74
+ "lower_cost_models": "lower_cost_models",
75
+ "cheap-model": "lower_cost_models",
76
+ "cheap_models": "lower_cost_models",
77
+ "tool-search": "tool_search",
78
+ "tool_search": "tool_search",
79
+ }
80
+ ROUTE_ALLOWED_LATENCY_CLASSES = {"interactive", "async", "batch", "offline", "unknown"}
81
+ ROUTE_ALLOWED_RISK_LEVELS = {"low", "medium", "high", "unknown"}
82
+ ROUTE_ALLOWED_QUALITY_GATES = {"pass", "unknown", "fail"}
83
+ ROUTE_STRUCTURED_TASK_KINDS = {
84
+ "classify",
85
+ "classification",
86
+ "extract",
87
+ "extraction",
88
+ "transform",
89
+ "summarize",
90
+ "summary",
91
+ "batch_eval",
92
+ "eval",
93
+ }
58
94
  ALLOWED_FIRST_COMPONENT_SYMLINKS = {
59
95
  "tmp": Path("/private/tmp"),
60
96
  "var": Path("/private/var"),
@@ -1122,8 +1158,12 @@ def load_or_create_hmac_key(store_dir: Path) -> bytes:
1122
1158
  cleanup_key_lock(lock_dir, locked)
1123
1159
 
1124
1160
 
1161
+ def keyed_hmac_bytes(key: bytes, data: bytes) -> str:
1162
+ return hmac.new(key, data, hashlib.sha256).hexdigest()
1163
+
1164
+
1125
1165
  def keyed_hmac(key: bytes, text: str) -> str:
1126
- return hmac.new(key, text.encode("utf-8", errors="replace"), hashlib.sha256).hexdigest()
1166
+ return keyed_hmac_bytes(key, text.encode("utf-8", errors="replace"))
1127
1167
 
1128
1168
 
1129
1169
  def ledger_path(store_dir: Path) -> Path:
@@ -1503,10 +1543,12 @@ def build_fingerprints(breakpoints: list[CacheBreakpoint], key: bytes) -> tuple[
1503
1543
  for bp in breakpoints:
1504
1544
  canonical = json_bytes(bp.prefix)
1505
1545
  section_canonical = json_bytes(bp.section)
1546
+ canonical_bytes = canonical.encode("utf-8", errors="replace")
1547
+ digest = keyed_hmac_bytes(key, canonical_bytes)
1506
1548
  bp_redactions = secret_count_in_text(canonical)
1507
1549
  redactions += bp_redactions
1508
1550
  prefix_tokens = token_proxy_text(canonical)
1509
- prefix_bytes = byte_len_text(canonical)
1551
+ prefix_bytes = len(canonical_bytes)
1510
1552
  prefix_delta_tokens = max(0, prefix_tokens - previous_prefix_tokens)
1511
1553
  prefix_delta_bytes = max(0, prefix_bytes - previous_prefix_bytes)
1512
1554
  previous_prefix_tokens = max(previous_prefix_tokens, prefix_tokens)
@@ -1516,8 +1558,8 @@ def build_fingerprints(breakpoints: list[CacheBreakpoint], key: bytes) -> tuple[
1516
1558
  "breakpoint_id": bp.breakpoint_id,
1517
1559
  "kind": bp.kind,
1518
1560
  "ttl": bp.ttl,
1519
- "hmac": keyed_hmac(key, canonical),
1520
- "display_hmac": "hmac-sha256:" + keyed_hmac(key, canonical)[:16],
1561
+ "hmac": digest,
1562
+ "display_hmac": "hmac-sha256:" + digest[:16],
1521
1563
  "prefix_bytes": prefix_bytes,
1522
1564
  "prefix_delta_bytes": prefix_delta_bytes,
1523
1565
  "section_bytes": byte_len_text(section_canonical),
@@ -1845,6 +1887,718 @@ def preflight_command(args: argparse.Namespace) -> int:
1845
1887
  return 3 if block else 0
1846
1888
 
1847
1889
 
1890
+ def advisory_label(value: Any, *, default: str = "unknown", limit: int = 80) -> str:
1891
+ """Return a bounded identifier-like label without echoing secrets or paths."""
1892
+
1893
+ if value is None:
1894
+ return default
1895
+ text = str(value).strip()
1896
+ if not text:
1897
+ return default
1898
+ if secret_count_in_text(text):
1899
+ return "redacted"
1900
+ if "/" in text or "\\" in text:
1901
+ return "path-redacted"
1902
+ cleaned = re.sub(r"[^A-Za-z0-9_.:-]+", "-", text).strip("-")
1903
+ if not cleaned:
1904
+ return default
1905
+ return cleaned[:limit]
1906
+
1907
+
1908
+ ROUTE_MODEL_LOCAL_PATH_FIRST_SEGMENTS = {
1909
+ "checkpoint",
1910
+ "checkpoints",
1911
+ "ckpt",
1912
+ "data",
1913
+ "dataset",
1914
+ "datasets",
1915
+ "model",
1916
+ "models",
1917
+ "private",
1918
+ "tmp",
1919
+ "weights",
1920
+ }
1921
+ ROUTE_MODEL_LOCAL_PATH_EXTENSIONS = {
1922
+ ".bin",
1923
+ ".ckpt",
1924
+ ".gguf",
1925
+ ".json",
1926
+ ".onnx",
1927
+ ".pt",
1928
+ ".pth",
1929
+ ".safetensors",
1930
+ ".yaml",
1931
+ ".yml",
1932
+ }
1933
+
1934
+
1935
+ def route_model_path_like(text: str) -> bool:
1936
+ lower = text.lower()
1937
+ if (
1938
+ text.startswith(("/", "\\", "~", "./", "../"))
1939
+ or "\\" in text
1940
+ or re.match(r"^[A-Za-z]:[\\/]", text) is not None
1941
+ or "/users/" in lower
1942
+ or "/home/" in lower
1943
+ or "/private/" in lower
1944
+ ):
1945
+ return True
1946
+ if "/" not in text:
1947
+ return False
1948
+ segments = text.split("/")
1949
+ if len(segments) != 2 or any(seg in {"", ".", ".."} for seg in segments):
1950
+ return True
1951
+ first = segments[0].strip().lower()
1952
+ if first in ROUTE_MODEL_LOCAL_PATH_FIRST_SEGMENTS:
1953
+ return True
1954
+ last = segments[-1].strip().lower()
1955
+ return any(last.endswith(ext) for ext in ROUTE_MODEL_LOCAL_PATH_EXTENSIONS)
1956
+
1957
+
1958
+ def route_model_label(value: Any, *, default: str = "unknown", limit: int = 120) -> str:
1959
+ """Return a model identifier label while redacting local-path-like values."""
1960
+
1961
+ if value is None:
1962
+ return default
1963
+ text = str(value).strip()
1964
+ if not text:
1965
+ return default
1966
+ if secret_count_in_text(text):
1967
+ return "redacted"
1968
+ if route_model_path_like(text):
1969
+ return "path-redacted"
1970
+ cleaned = re.sub(r"[^A-Za-z0-9_.:/-]+", "-", text).strip("-")
1971
+ if not cleaned:
1972
+ return default
1973
+ return cleaned[:limit]
1974
+
1975
+
1976
+ def route_model_for_pricing(value: Any, fallback: str) -> str:
1977
+ if value is None:
1978
+ return fallback
1979
+ text = str(value).strip()
1980
+ if not text or secret_count_in_text(text):
1981
+ return fallback
1982
+ return text
1983
+
1984
+
1985
+ def finite_nonnegative_value(value: Any) -> float | None:
1986
+ if value is None or isinstance(value, bool):
1987
+ return None
1988
+ try:
1989
+ number = float(value)
1990
+ except (TypeError, ValueError, OverflowError):
1991
+ return None
1992
+ if not math.isfinite(number) or number < 0:
1993
+ return None
1994
+ return number
1995
+
1996
+
1997
+ def route_bool(value: Any) -> bool | None:
1998
+ if isinstance(value, bool):
1999
+ return value
2000
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
2001
+ if value == 1:
2002
+ return True
2003
+ if value == 0:
2004
+ return False
2005
+ if isinstance(value, str):
2006
+ text = value.strip().lower()
2007
+ if text in {"1", "true", "yes", "y", "on", "supported", "available"}:
2008
+ return True
2009
+ if text in {"0", "false", "no", "n", "off", "unsupported", "unavailable"}:
2010
+ return False
2011
+ if text in {"", "unknown", "unset", "null", "none"}:
2012
+ return None
2013
+ return None
2014
+
2015
+
2016
+ def route_choice(value: Any, allowed: set[str], *, default: str = "unknown") -> str:
2017
+ if value is None:
2018
+ return default
2019
+ text = str(value).strip().lower().replace("-", "_")
2020
+ return text if text in allowed else default
2021
+
2022
+
2023
+ def route_nested_dict(data: dict[str, Any], *keys: str) -> dict[str, Any]:
2024
+ for key in keys:
2025
+ value = data.get(key)
2026
+ if isinstance(value, dict):
2027
+ return value
2028
+ return {}
2029
+
2030
+
2031
+ def first_present_mapping_value(*containers: dict[str, Any], keys: tuple[str, ...]) -> Any:
2032
+ for container in containers:
2033
+ for key in keys:
2034
+ if key in container:
2035
+ return container.get(key)
2036
+ return None
2037
+
2038
+
2039
+ def first_nonnegative_cost(*containers: dict[str, Any], keys: tuple[str, ...]) -> float | None:
2040
+ for container in containers:
2041
+ for key in keys:
2042
+ if key not in container:
2043
+ continue
2044
+ value = finite_nonnegative_value(container.get(key))
2045
+ if value is not None:
2046
+ return value
2047
+ return None
2048
+
2049
+
2050
+ def sum_nonnegative_costs(container: dict[str, Any], keys: tuple[str, ...]) -> tuple[float, list[str]]:
2051
+ total = 0.0
2052
+ observed: list[str] = []
2053
+ for key in keys:
2054
+ value = finite_nonnegative_value(container.get(key))
2055
+ if value is None:
2056
+ continue
2057
+ total += value
2058
+ observed.append(key)
2059
+ return total, observed
2060
+
2061
+
2062
+ def sum_nonnegative_costs_from(*containers: dict[str, Any], keys: tuple[str, ...]) -> tuple[float, list[str]]:
2063
+ total = 0.0
2064
+ observed: list[str] = []
2065
+ for key in keys:
2066
+ value = first_nonnegative_cost(*containers, keys=(key,))
2067
+ if value is None:
2068
+ continue
2069
+ total += value
2070
+ observed.append(key)
2071
+ return total, observed
2072
+
2073
+
2074
+ def parse_feature_overrides(raw_features: list[str] | None) -> dict[str, bool]:
2075
+ out: dict[str, bool] = {}
2076
+ for raw in raw_features or []:
2077
+ if "=" in raw:
2078
+ key, raw_value = raw.split("=", 1)
2079
+ elif ":" in raw:
2080
+ key, raw_value = raw.split(":", 1)
2081
+ else:
2082
+ key, raw_value = raw, "true"
2083
+ normalized_key = ROUTE_FEATURE_ALIASES.get(key.strip().lower().replace("_", "-"))
2084
+ display_key = advisory_label(key, default="redacted-route-feature")
2085
+ if normalized_key is None:
2086
+ fail(f"unknown route feature {display_key!r}; expected one of {', '.join(ROUTE_FEATURE_KEYS)}")
2087
+ parsed = route_bool(raw_value)
2088
+ if parsed is None:
2089
+ fail(f"route feature {display_key!r} must be true or false")
2090
+ out[normalized_key] = parsed
2091
+ return out
2092
+
2093
+
2094
+ def provider_features_for_workload(workload: dict[str, Any], args: argparse.Namespace) -> dict[str, Any]:
2095
+ raw_features = workload.get("provider_features")
2096
+ workload_features = raw_features if isinstance(raw_features, dict) else {}
2097
+ flag_features = parse_feature_overrides(getattr(args, "feature", None))
2098
+ features: dict[str, dict[str, Any]] = {}
2099
+ for key in ROUTE_FEATURE_KEYS:
2100
+ supported: bool | None = None
2101
+ source = "unknown"
2102
+ aliases = {key, key.replace("_", "-")}
2103
+ aliases.update(alias for alias, canonical in ROUTE_FEATURE_ALIASES.items() if canonical == key)
2104
+ for alias in sorted(aliases):
2105
+ if alias in workload_features:
2106
+ parsed = route_bool(workload_features.get(alias))
2107
+ if parsed is not None:
2108
+ supported = parsed
2109
+ source = "workload"
2110
+ break
2111
+ if key in flag_features:
2112
+ supported = flag_features[key]
2113
+ source = "flag"
2114
+ features[key] = {
2115
+ "supported": supported,
2116
+ "source": source,
2117
+ "recheck_required": True,
2118
+ "reason": "provider_features are caller-supplied or unknown; recheck current provider documentation before operational routing",
2119
+ }
2120
+ declared = sum(1 for item in features.values() if item["supported"] is not None)
2121
+ return {
2122
+ "features": features,
2123
+ "declared_feature_count": declared,
2124
+ "unknown_feature_count": len(features) - declared,
2125
+ "caller_supplied": declared > 0,
2126
+ "authoritative_provider_matrix": False,
2127
+ "recheck_required": True,
2128
+ }
2129
+
2130
+
2131
+ def route_usage_object(workload: dict[str, Any]) -> dict[str, Any]:
2132
+ usage = workload.get("usage") or workload.get("provider_usage")
2133
+ if isinstance(usage, dict):
2134
+ return usage.get("usage") if isinstance(usage.get("usage"), dict) else usage
2135
+ response = workload.get("response")
2136
+ if isinstance(response, dict) and isinstance(response.get("usage"), dict):
2137
+ return response["usage"]
2138
+ telemetry = workload.get("telemetry")
2139
+ if isinstance(telemetry, dict):
2140
+ usage = telemetry.get("usage") or telemetry.get("provider_usage")
2141
+ if isinstance(usage, dict):
2142
+ return usage.get("usage") if isinstance(usage.get("usage"), dict) else usage
2143
+ return {}
2144
+
2145
+
2146
+ def usage_has_measured_tokens(usage: dict[str, Any]) -> bool:
2147
+ return any(
2148
+ usage_int(usage, key) > 0
2149
+ for key in (
2150
+ "input_tokens",
2151
+ "output_tokens",
2152
+ "cache_creation_input_tokens",
2153
+ "cache_creation_input_tokens_5m",
2154
+ "cache_creation_input_tokens_1h",
2155
+ "cache_read_input_tokens",
2156
+ )
2157
+ ) or bool(usage.get("cache_creation"))
2158
+
2159
+
2160
+ def cost_from_usage(usage: dict[str, Any], *, profile: dict[str, Any], model: str, exchange: float) -> dict[str, Any]:
2161
+ input_rate, output_rate, model_rate_key = rates_for_model(profile, model)
2162
+ write_mult, read_mult = pricing_multipliers(profile)
2163
+ input_tokens = usage_int(usage, "input_tokens")
2164
+ output_tokens = usage_int(usage, "output_tokens")
2165
+ cache_creation_5m, cache_creation_1h = cache_creation_buckets(usage)
2166
+ cache_read = usage_int(usage, "cache_read_input_tokens")
2167
+ cost_usd = (
2168
+ money(input_tokens, input_rate)
2169
+ + money(output_tokens, output_rate)
2170
+ + money(cache_creation_5m, input_rate, write_mult["5m"])
2171
+ + money(cache_creation_1h, input_rate, write_mult["1h"])
2172
+ + money(cache_read, input_rate, read_mult)
2173
+ )
2174
+ return {
2175
+ "cost_usd": round(cost_usd, 8),
2176
+ "cost_krw": round(krw(cost_usd, exchange), 2),
2177
+ "model_rate_key": model_rate_key,
2178
+ "usage": {
2179
+ "input_tokens": input_tokens,
2180
+ "output_tokens": output_tokens,
2181
+ "cache_creation_input_tokens_5m": cache_creation_5m,
2182
+ "cache_creation_input_tokens_1h": cache_creation_1h,
2183
+ "cache_read_input_tokens": cache_read,
2184
+ },
2185
+ }
2186
+
2187
+
2188
+ def request_profile_for_route(workload: dict[str, Any]) -> dict[str, Any]:
2189
+ request = workload.get("request")
2190
+ if not isinstance(request, dict):
2191
+ return {
2192
+ "present": False,
2193
+ "token_proxy": "unavailable",
2194
+ "prompt_tokens_estimated": None,
2195
+ "cache_breakpoint_count": 0,
2196
+ "cacheable_tokens_estimated": 0,
2197
+ "raw_request_emitted": False,
2198
+ }
2199
+ breakpoints, parse_meta = extract_cache_breakpoints(request)
2200
+ fingerprints, redactions = build_fingerprints(breakpoints, b"\0" * 32)
2201
+ cacheable_tokens = max((int(fp.get("tokens_estimated") or 0) for fp in fingerprints), default=0)
2202
+ return {
2203
+ "present": True,
2204
+ "token_proxy": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}",
2205
+ "prompt_tokens_estimated": token_proxy_obj(strip_known_cache_controls(request)),
2206
+ "cache_breakpoint_count": len(breakpoints),
2207
+ "cacheable_tokens_estimated": cacheable_tokens,
2208
+ "cache_control_markers": int(parse_meta.get("cache_control_markers") or 0),
2209
+ "unsupported_cache_controls": int(parse_meta.get("unsupported_cache_controls") or 0),
2210
+ "secret_like_values_detected": redactions,
2211
+ "raw_request_emitted": False,
2212
+ }
2213
+
2214
+
2215
+ def route_task_metadata(workload: dict[str, Any], args: argparse.Namespace) -> dict[str, Any]:
2216
+ task = route_nested_dict(workload, "task", "task_metadata", "routing")
2217
+ telemetry = route_nested_dict(workload, "telemetry")
2218
+ latency = route_choice(
2219
+ getattr(args, "latency_class", None)
2220
+ or first_present_mapping_value(task, workload, keys=("latency_class", "latency", "mode")),
2221
+ ROUTE_ALLOWED_LATENCY_CLASSES,
2222
+ )
2223
+ risk = route_choice(
2224
+ getattr(args, "risk", None)
2225
+ or first_present_mapping_value(task, workload, keys=("risk", "risk_level")),
2226
+ ROUTE_ALLOWED_RISK_LEVELS,
2227
+ )
2228
+ quality_gate = route_choice(
2229
+ getattr(args, "quality_gate", None)
2230
+ or first_present_mapping_value(task, workload, telemetry, keys=("quality_gate", "quality")),
2231
+ ROUTE_ALLOWED_QUALITY_GATES,
2232
+ )
2233
+ task_kind = advisory_label(
2234
+ getattr(args, "task_kind", None)
2235
+ or first_present_mapping_value(task, workload, keys=("task_kind", "kind", "type")),
2236
+ default="unknown",
2237
+ limit=48,
2238
+ ).lower()
2239
+ deadline_seconds = safe_int(first_present_mapping_value(task, workload, keys=("deadline_seconds", "max_latency_seconds")), 0)
2240
+ return {
2241
+ "latency_class": latency,
2242
+ "risk": risk,
2243
+ "quality_gate": quality_gate,
2244
+ "task_kind": task_kind,
2245
+ "deadline_seconds": deadline_seconds,
2246
+ "requires_interaction": bool(route_bool(first_present_mapping_value(task, workload, keys=("requires_interaction", "interactive_required", "user_blocking")))),
2247
+ "has_external_side_effects": bool(route_bool(first_present_mapping_value(task, workload, keys=("has_external_side_effects", "side_effects")))),
2248
+ "order_sensitive": bool(route_bool(first_present_mapping_value(task, workload, keys=("order_sensitive", "requires_order")))),
2249
+ }
2250
+
2251
+
2252
+ def total_cost_accounting_for_route(
2253
+ workload: dict[str, Any],
2254
+ *,
2255
+ profile: dict[str, Any],
2256
+ model: str,
2257
+ exchange: float,
2258
+ ) -> dict[str, Any]:
2259
+ telemetry = route_nested_dict(workload, "telemetry")
2260
+ shifted = route_nested_dict(workload, "shifted_costs", "shifted_cost", "auxiliary_costs")
2261
+ usage = route_usage_object(workload)
2262
+ usage_cost = cost_from_usage(usage, profile=profile, model=model, exchange=exchange) if usage_has_measured_tokens(usage) else None
2263
+
2264
+ primary_cost = first_nonnegative_cost(
2265
+ telemetry,
2266
+ workload,
2267
+ keys=("primary_cost_usd", "provider_cost_usd", "observed_cost_usd", "cost_usd"),
2268
+ )
2269
+ primary_source = "explicit_telemetry" if primary_cost is not None else "unavailable"
2270
+ if primary_cost is None and usage_cost is not None:
2271
+ primary_cost = float(usage_cost["cost_usd"])
2272
+ primary_source = "estimated_from_provider_usage_fields"
2273
+ if primary_cost is None:
2274
+ primary_cost = 0.0
2275
+
2276
+ external_cost_value = first_nonnegative_cost(telemetry, shifted, workload, keys=("external_cost_usd",))
2277
+ external_component_sum, external_components = sum_nonnegative_costs_from(
2278
+ telemetry,
2279
+ shifted,
2280
+ keys=("subagent_cost_usd", "embedding_cost_usd", "reranker_cost_usd", "tool_call_cost_usd", "retry_cost_usd", "auxiliary_provider_cost_usd"),
2281
+ )
2282
+ external_cost_from_aggregate = external_cost_value is not None
2283
+ if external_cost_value is None:
2284
+ external_cost = external_component_sum
2285
+ else:
2286
+ external_cost = external_cost_value
2287
+
2288
+ local_cost_value = first_nonnegative_cost(
2289
+ telemetry,
2290
+ shifted,
2291
+ workload,
2292
+ keys=("local_cost_usd", "self_hosted_cost_usd", "local_model_cost_usd"),
2293
+ )
2294
+ local_component_sum, local_components = sum_nonnegative_costs_from(
2295
+ telemetry,
2296
+ shifted,
2297
+ keys=("local_server_cost_usd", "local_energy_cost_usd", "storage_cost_usd"),
2298
+ )
2299
+ local_cost_from_aggregate = local_cost_value is not None
2300
+ if local_cost_value is None:
2301
+ local_cost = local_component_sum
2302
+ else:
2303
+ local_cost = local_cost_value
2304
+
2305
+ provided_total = first_nonnegative_cost(
2306
+ telemetry,
2307
+ shifted,
2308
+ workload,
2309
+ keys=("total_cost_with_shift_usd", "total_shifted_cost_usd"),
2310
+ )
2311
+ computed_total = primary_cost + external_cost + local_cost
2312
+ total = provided_total if provided_total is not None else computed_total
2313
+ external_tokens = safe_int(first_present_mapping_value(telemetry, shifted, workload, keys=("external_tokens", "subagent_tokens", "embedding_tokens")), 0)
2314
+ retry_count = safe_int(first_present_mapping_value(telemetry, workload, keys=("retry_count", "retries")), 0)
2315
+ subagent_count = safe_int(first_present_mapping_value(telemetry, workload, keys=("subagent_count", "subagents")), 0)
2316
+ tool_call_count = safe_int(first_present_mapping_value(telemetry, workload, keys=("tool_call_count", "tool_calls")), 0)
2317
+ external_cost_supplied = external_cost_from_aggregate or bool(external_components)
2318
+ local_cost_supplied = local_cost_from_aggregate or bool(local_components)
2319
+ provided_total_supplied = provided_total is not None
2320
+ missing_shifted_cost = bool(
2321
+ (external_tokens or retry_count or subagent_count or tool_call_count)
2322
+ and not (external_cost_supplied or local_cost_supplied or provided_total_supplied)
2323
+ )
2324
+ return {
2325
+ "currency": "USD",
2326
+ "primary_cost_usd": round(primary_cost, 8),
2327
+ "primary_cost_source": primary_source,
2328
+ "external_cost_usd": round(external_cost, 8),
2329
+ "local_cost_usd": round(local_cost, 8),
2330
+ "external_cost_supplied": external_cost_supplied,
2331
+ "local_cost_supplied": local_cost_supplied,
2332
+ "external_component_breakdown_usd": round(external_component_sum, 8),
2333
+ "local_component_breakdown_usd": round(local_component_sum, 8),
2334
+ "computed_total_cost_with_shift_usd": round(computed_total, 8),
2335
+ "total_cost_with_shift_usd": round(total, 8),
2336
+ "total_cost_with_shift_krw": round(krw(total, exchange), 2),
2337
+ "provided_total_cost_with_shift_usd": round(provided_total, 8) if provided_total is not None else None,
2338
+ "pricing": {
2339
+ "profile": str(profile.get("name") or "custom"),
2340
+ "release_recheck_required": bool(profile.get("release_recheck_required", True)),
2341
+ "source_urls": profile.get("source_urls", [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL]),
2342
+ "usd_to_krw": exchange,
2343
+ },
2344
+ "usage_cost_estimate": usage_cost,
2345
+ "components_observed": sorted(set(external_components + local_components)),
2346
+ "run_counters": {
2347
+ "external_tokens": external_tokens,
2348
+ "retry_count": retry_count,
2349
+ "subagent_count": subagent_count,
2350
+ "tool_call_count": tool_call_count,
2351
+ },
2352
+ "measurement_availability": {
2353
+ "provider_usage_tokens": usage_has_measured_tokens(usage),
2354
+ "primary_cost": primary_source != "unavailable",
2355
+ "external_cost": external_cost_supplied,
2356
+ "local_cost": local_cost_supplied,
2357
+ "shifted_cost": bool(external_cost_supplied or local_cost_supplied or provided_total_supplied),
2358
+ },
2359
+ "shifted_cost_accounting": {
2360
+ "required": True,
2361
+ "diagnostic_only": True,
2362
+ "includes_external_or_local_components": bool(external_cost_supplied or local_cost_supplied),
2363
+ "missing_shifted_cost_warning": missing_shifted_cost,
2364
+ "claim_boundary": "total-cost routing is advisory; hosted savings claims require matched successful tasks with non-inferior quality and measured shifted costs",
2365
+ },
2366
+ }
2367
+
2368
+
2369
+ def batchability_for_route(task: dict[str, Any], provider_features: dict[str, Any]) -> dict[str, Any]:
2370
+ feature = provider_features["features"]["batch_api"]
2371
+ batch_supported = feature["supported"]
2372
+ blockers: list[str] = []
2373
+ reasons: list[str] = []
2374
+ latency = str(task.get("latency_class") or "unknown")
2375
+ deadline = int(task.get("deadline_seconds") or 0)
2376
+ if latency == "interactive":
2377
+ blockers.append("interactive_latency")
2378
+ elif latency in {"async", "batch", "offline"}:
2379
+ reasons.append(f"latency_class_{latency}")
2380
+ elif deadline >= 3600:
2381
+ reasons.append("deadline_allows_batch_window")
2382
+ else:
2383
+ reasons.append("latency_unknown")
2384
+ if task.get("requires_interaction"):
2385
+ blockers.append("requires_user_interaction")
2386
+ if task.get("has_external_side_effects"):
2387
+ blockers.append("external_side_effects_need_idempotency_review")
2388
+ if task.get("order_sensitive"):
2389
+ blockers.append("order_sensitive")
2390
+ if task.get("risk") == "high":
2391
+ blockers.append("high_risk_route")
2392
+ if task.get("quality_gate") == "fail":
2393
+ blockers.append("quality_gate_failed")
2394
+ if batch_supported is False:
2395
+ blockers.append("provider_batch_api_not_declared")
2396
+ elif batch_supported is None:
2397
+ reasons.append("provider_batch_api_unknown_recheck_required")
2398
+ else:
2399
+ reasons.append("provider_batch_api_declared")
2400
+ if blockers:
2401
+ level = "not_recommended"
2402
+ eligible = False
2403
+ elif batch_supported is True and (latency in {"async", "batch", "offline"} or deadline >= 3600):
2404
+ level = "candidate"
2405
+ eligible = True
2406
+ else:
2407
+ level = "conditional"
2408
+ eligible = False
2409
+ return {
2410
+ "eligible": eligible,
2411
+ "level": level,
2412
+ "latency_class": latency,
2413
+ "deadline_seconds": deadline,
2414
+ "reasons": sorted(set(reasons)),
2415
+ "blockers": sorted(set(blockers)),
2416
+ "requires_current_provider_docs_check": batch_supported is None,
2417
+ }
2418
+
2419
+
2420
+ def recommendation(
2421
+ rec_id: str,
2422
+ *,
2423
+ decision: str,
2424
+ priority: str,
2425
+ rationale: str,
2426
+ prerequisites: list[str],
2427
+ ) -> dict[str, Any]:
2428
+ return {
2429
+ "id": rec_id,
2430
+ "decision": decision,
2431
+ "priority": priority,
2432
+ "rationale": rationale,
2433
+ "prerequisites": prerequisites,
2434
+ "claim_boundary": "candidate routing advice only; validate on matched successful tasks before claiming token or cost savings",
2435
+ }
2436
+
2437
+
2438
+ def route_recommendations(
2439
+ *,
2440
+ task: dict[str, Any],
2441
+ provider_features: dict[str, Any],
2442
+ request_profile: dict[str, Any],
2443
+ batchability: dict[str, Any],
2444
+ total_cost: dict[str, Any],
2445
+ ) -> list[dict[str, Any]]:
2446
+ recs: list[dict[str, Any]] = [
2447
+ recommendation(
2448
+ "measure-before-claim",
2449
+ decision="required",
2450
+ priority="P0",
2451
+ rationale="Route changes can shift work into retries, subagents, batch queues, local servers, or provider cache writes; measure total cost with quality gates before claims.",
2452
+ prerequisites=["matched_successful_tasks", "non_inferior_quality", "shifted_cost_accounting"],
2453
+ )
2454
+ ]
2455
+ batch_decision = "candidate" if batchability.get("eligible") else str(batchability.get("level") or "conditional")
2456
+ recs.append(
2457
+ recommendation(
2458
+ "use-batch-api-for-noninteractive-work",
2459
+ decision=batch_decision,
2460
+ priority="P1" if batch_decision == "candidate" else "P2",
2461
+ rationale="Batch APIs can reduce cost for non-interactive work only when provider support, latency tolerance, idempotency, and quality gates are satisfied.",
2462
+ prerequisites=["provider_batch_support_current", "async_or_offline_latency", "idempotency_review", "matched_replay"],
2463
+ )
2464
+ )
2465
+
2466
+ prompt_cache_feature = provider_features["features"]["prompt_cache"]["supported"]
2467
+ cache_breakpoints = int(request_profile.get("cache_breakpoint_count") or 0)
2468
+ cacheable_tokens = int(request_profile.get("cacheable_tokens_estimated") or 0)
2469
+ if prompt_cache_feature is False:
2470
+ cache_decision = "not_recommended"
2471
+ elif cache_breakpoints or cacheable_tokens:
2472
+ cache_decision = "candidate" if prompt_cache_feature is True else "conditional"
2473
+ else:
2474
+ cache_decision = "needs_request_evidence"
2475
+ recs.append(
2476
+ recommendation(
2477
+ "preserve-prompt-cache-prefix",
2478
+ decision=cache_decision,
2479
+ priority="P1" if cache_decision == "candidate" else "P2",
2480
+ rationale="Stable-prefix prompt caching is useful only when current provider support and repeated cacheable request prefixes are verified.",
2481
+ prerequisites=["stable_prefix_first", "volatile_tail", "provider_usage_cache_telemetry"],
2482
+ )
2483
+ )
2484
+
2485
+ structured_feature = provider_features["features"]["structured_outputs"]["supported"]
2486
+ task_kind = str(task.get("task_kind") or "unknown")
2487
+ if structured_feature is False:
2488
+ structured_decision = "not_recommended"
2489
+ elif task_kind in ROUTE_STRUCTURED_TASK_KINDS:
2490
+ structured_decision = "candidate" if structured_feature is True else "conditional"
2491
+ else:
2492
+ structured_decision = "needs_task_fit"
2493
+ recs.append(
2494
+ recommendation(
2495
+ "use-structured-outputs-when-task-fits",
2496
+ decision=structured_decision,
2497
+ priority="P2",
2498
+ rationale="Structured outputs can reduce retries and parsing repairs for extraction/classification style work, but they are not a token-savings proof.",
2499
+ prerequisites=["schema_fit_review", "retry_rate_measurement", "quality_non_regression"],
2500
+ )
2501
+ )
2502
+
2503
+ lower_cost_feature = provider_features["features"]["lower_cost_models"]["supported"]
2504
+ risk = str(task.get("risk") or "unknown")
2505
+ quality_gate = str(task.get("quality_gate") or "unknown")
2506
+ if lower_cost_feature is False or risk == "high" or quality_gate == "fail":
2507
+ cheaper_decision = "not_recommended"
2508
+ elif risk == "low" and quality_gate in {"pass", "unknown"}:
2509
+ cheaper_decision = "candidate" if lower_cost_feature is True else "conditional"
2510
+ else:
2511
+ cheaper_decision = "conditional"
2512
+ recs.append(
2513
+ recommendation(
2514
+ "evaluate-cheaper-model-route",
2515
+ decision=cheaper_decision,
2516
+ priority="P2",
2517
+ rationale="Lower-cost model routing is acceptable only for low-risk or well-gated work and must include corrections, retries, and shifted cost.",
2518
+ prerequisites=["risk_tier_low_or_reviewed", "matched_replay", "corrections_guardrail", "retry_cost_accounting"],
2519
+ )
2520
+ )
2521
+
2522
+ if total_cost["shifted_cost_accounting"].get("missing_shifted_cost_warning"):
2523
+ recs.append(
2524
+ recommendation(
2525
+ "record-missing-shifted-costs",
2526
+ decision="required",
2527
+ priority="P1",
2528
+ rationale="Telemetry indicates external tokens, retries, or subagents but no shifted external/local cost component was supplied.",
2529
+ prerequisites=["external_cost_usd_or_local_cost_usd", "retry_or_subagent_cost_measurement"],
2530
+ )
2531
+ )
2532
+ return recs
2533
+
2534
+
2535
+ def route_advisor_command(args: argparse.Namespace) -> int:
2536
+ workload_raw, _truncated = load_json_input(args.workload, max_bytes=args.max_bytes)
2537
+ workload = require_json_object(workload_raw.get("workload") if isinstance(workload_raw, dict) and isinstance(workload_raw.get("workload"), dict) else workload_raw, "workload")
2538
+ profile = load_pricing_profile(args.pricing_profile, max_bytes=args.max_bytes)
2539
+ if args.usd_to_krw is not None:
2540
+ profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
2541
+ exchange = usd_to_krw(profile, None)
2542
+ request = workload.get("request") if isinstance(workload.get("request"), dict) else {}
2543
+ provider = advisory_label(getattr(args, "provider", None) or workload.get("provider") or (request.get("provider") if isinstance(request, dict) else None))
2544
+ model_raw = getattr(args, "model", None) or workload.get("model") or (request.get("model") if isinstance(request, dict) else None)
2545
+ model = route_model_label(model_raw)
2546
+ model_for_pricing = route_model_for_pricing(model_raw, model)
2547
+ provider_features = provider_features_for_workload(workload, args)
2548
+ task = route_task_metadata(workload, args)
2549
+ request_profile = request_profile_for_route(workload)
2550
+ total_cost = total_cost_accounting_for_route(workload, profile=profile, model=model_for_pricing, exchange=exchange)
2551
+ batchability = batchability_for_route(task, provider_features)
2552
+ recommendations = route_recommendations(
2553
+ task=task,
2554
+ provider_features=provider_features,
2555
+ request_profile=request_profile,
2556
+ batchability=batchability,
2557
+ total_cost=total_cost,
2558
+ )
2559
+ report = {
2560
+ "schema_version": SCHEMA_VERSION,
2561
+ "tool": TOOL_NAME,
2562
+ "mode": "route_advisor",
2563
+ "provider": {
2564
+ "name": provider,
2565
+ "model": model,
2566
+ "feature_matrix_authoritative": False,
2567
+ "feature_recheck_required": True,
2568
+ },
2569
+ "provider_features": provider_features,
2570
+ "task": task,
2571
+ "request_profile": request_profile,
2572
+ "total_cost_accounting": total_cost,
2573
+ "batchability": batchability,
2574
+ "route_recommendations": recommendations,
2575
+ "routing_decision": {
2576
+ "best_current_action": "measure_before_claim" if any(rec["decision"] == "required" for rec in recommendations) else "review_candidates",
2577
+ "candidate_count": sum(1 for rec in recommendations if rec.get("decision") == "candidate"),
2578
+ "conditional_count": sum(1 for rec in recommendations if rec.get("decision") == "conditional"),
2579
+ "not_recommended_count": sum(1 for rec in recommendations if rec.get("decision") == "not_recommended"),
2580
+ },
2581
+ "claim_boundary": {
2582
+ "hosted_api_token_savings_claim_allowed": False,
2583
+ "hosted_api_cost_savings_claim_allowed": False,
2584
+ "requires_matched_successful_tasks": True,
2585
+ "requires_non_inferior_quality": True,
2586
+ "requires_shifted_cost_accounting": True,
2587
+ "provider_features_are_caller_supplied_or_unknown": True,
2588
+ },
2589
+ "privacy": {
2590
+ "raw_prompt_emitted": False,
2591
+ "raw_request_emitted": False,
2592
+ "raw_paths_emitted": False,
2593
+ "workload_stored": False,
2594
+ "provider_call_performed": False,
2595
+ "queue_started": False,
2596
+ },
2597
+ }
2598
+ emit(report, json_mode=args.json)
2599
+ return 0
2600
+
2601
+
1848
2602
  def usage_int(data: dict[str, Any], key: str) -> int:
1849
2603
  value = data.get(key, 0)
1850
2604
  try:
@@ -2276,6 +3030,15 @@ def emit(data: dict[str, Any], *, json_mode: bool) -> None:
2276
3030
  elif mode == "compile":
2277
3031
  findings = data.get("findings", []) if isinstance(data.get("findings"), list) else []
2278
3032
  print(f"{TOOL_NAME}: compile findings={len(findings)}")
3033
+ elif mode == "route_advisor":
3034
+ batchability = data.get("batchability", {}) if isinstance(data.get("batchability"), dict) else {}
3035
+ routing = data.get("routing_decision", {}) if isinstance(data.get("routing_decision"), dict) else {}
3036
+ total = data.get("total_cost_accounting", {}) if isinstance(data.get("total_cost_accounting"), dict) else {}
3037
+ print(
3038
+ f"{TOOL_NAME}: route-advisor batch={batchability.get('level', 'unknown')} "
3039
+ f"candidates={routing.get('candidate_count', 0)} conditional={routing.get('conditional_count', 0)} "
3040
+ f"total_with_shift=${total.get('total_cost_with_shift_usd', 0)}"
3041
+ )
2279
3042
  else:
2280
3043
  summary = data.get("summary", {}) if isinstance(data.get("summary"), dict) else {}
2281
3044
  print(f"{TOOL_NAME}: ledger entries={summary.get('entries', 0)}")
@@ -2329,6 +3092,22 @@ def build_parser() -> argparse.ArgumentParser:
2329
3092
  compile_parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
2330
3093
  compile_parser.set_defaults(func=compile_command)
2331
3094
 
3095
+ route = sub.add_parser(
3096
+ "route-advisor",
3097
+ help="advise on batchability, provider features, total cost, and route candidates",
3098
+ description="advise on batchability, provider features, total cost, and route candidates without provider calls or queue runtime",
3099
+ )
3100
+ route.add_argument("--workload", default="-", help="workload JSON path, or '-' for stdin")
3101
+ route.add_argument("--provider", help="provider label override; advisory only")
3102
+ route.add_argument("--model", help="model label override for pricing lookup; advisory only")
3103
+ route.add_argument("--feature", action="append", default=[], help="provider feature override such as batch_api=true or structured_outputs=false")
3104
+ route.add_argument("--latency-class", choices=sorted(ROUTE_ALLOWED_LATENCY_CLASSES), help="latency class override")
3105
+ route.add_argument("--risk", choices=sorted(ROUTE_ALLOWED_RISK_LEVELS), help="risk tier override")
3106
+ route.add_argument("--quality-gate", choices=sorted(ROUTE_ALLOWED_QUALITY_GATES), help="quality gate override")
3107
+ route.add_argument("--task-kind", help="task kind label such as extract, summarize, code_edit, or unknown")
3108
+ add_common_cost_args(route)
3109
+ route.set_defaults(func=route_advisor_command)
3110
+
2332
3111
  return parser
2333
3112
 
2334
3113