@ictechgy/context-guard 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.ko.md +41 -24
- package/README.md +66 -26
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +8 -1
- package/package.json +3 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +9 -6
- package/plugins/context-guard/README.md +21 -13
- package/plugins/context-guard/bin/context-guard +113 -26
- package/plugins/context-guard/bin/context-guard-artifact +542 -46
- package/plugins/context-guard/bin/context-guard-cache-score +380 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +783 -4
- package/plugins/context-guard/bin/context-guard-experiments +99 -18
- package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +602 -43
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
- package/plugins/context-guard/lib/context_guard_commands.py +206 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -55,6 +55,42 @@ LEDGER_OPEN_RETRY_SECONDS = 0.01
|
|
|
55
55
|
TTL_SECONDS = {"5m": 5 * 60, "1h": 60 * 60}
|
|
56
56
|
ANTHROPIC_DOCS_URL = "https://docs.anthropic.com/en/build-with-claude/prompt-caching"
|
|
57
57
|
ANTHROPIC_PRICING_URL = "https://platform.claude.com/docs/en/about-claude/pricing"
|
|
58
|
+
ROUTE_FEATURE_KEYS = ("batch_api", "prompt_cache", "structured_outputs", "lower_cost_models", "tool_search")
|
|
59
|
+
ROUTE_FEATURE_ALIASES = {
|
|
60
|
+
"batch": "batch_api",
|
|
61
|
+
"batch-api": "batch_api",
|
|
62
|
+
"batch_api": "batch_api",
|
|
63
|
+
"batchapi": "batch_api",
|
|
64
|
+
"prompt-cache": "prompt_cache",
|
|
65
|
+
"prompt_cache": "prompt_cache",
|
|
66
|
+
"cache": "prompt_cache",
|
|
67
|
+
"structured-output": "structured_outputs",
|
|
68
|
+
"structured-outputs": "structured_outputs",
|
|
69
|
+
"structured_output": "structured_outputs",
|
|
70
|
+
"structured_outputs": "structured_outputs",
|
|
71
|
+
"json-schema": "structured_outputs",
|
|
72
|
+
"json_schema": "structured_outputs",
|
|
73
|
+
"lower-cost-models": "lower_cost_models",
|
|
74
|
+
"lower_cost_models": "lower_cost_models",
|
|
75
|
+
"cheap-model": "lower_cost_models",
|
|
76
|
+
"cheap_models": "lower_cost_models",
|
|
77
|
+
"tool-search": "tool_search",
|
|
78
|
+
"tool_search": "tool_search",
|
|
79
|
+
}
|
|
80
|
+
ROUTE_ALLOWED_LATENCY_CLASSES = {"interactive", "async", "batch", "offline", "unknown"}
|
|
81
|
+
ROUTE_ALLOWED_RISK_LEVELS = {"low", "medium", "high", "unknown"}
|
|
82
|
+
ROUTE_ALLOWED_QUALITY_GATES = {"pass", "unknown", "fail"}
|
|
83
|
+
ROUTE_STRUCTURED_TASK_KINDS = {
|
|
84
|
+
"classify",
|
|
85
|
+
"classification",
|
|
86
|
+
"extract",
|
|
87
|
+
"extraction",
|
|
88
|
+
"transform",
|
|
89
|
+
"summarize",
|
|
90
|
+
"summary",
|
|
91
|
+
"batch_eval",
|
|
92
|
+
"eval",
|
|
93
|
+
}
|
|
58
94
|
ALLOWED_FIRST_COMPONENT_SYMLINKS = {
|
|
59
95
|
"tmp": Path("/private/tmp"),
|
|
60
96
|
"var": Path("/private/var"),
|
|
@@ -1122,8 +1158,12 @@ def load_or_create_hmac_key(store_dir: Path) -> bytes:
|
|
|
1122
1158
|
cleanup_key_lock(lock_dir, locked)
|
|
1123
1159
|
|
|
1124
1160
|
|
|
1161
|
+
def keyed_hmac_bytes(key: bytes, data: bytes) -> str:
|
|
1162
|
+
return hmac.new(key, data, hashlib.sha256).hexdigest()
|
|
1163
|
+
|
|
1164
|
+
|
|
1125
1165
|
def keyed_hmac(key: bytes, text: str) -> str:
|
|
1126
|
-
return
|
|
1166
|
+
return keyed_hmac_bytes(key, text.encode("utf-8", errors="replace"))
|
|
1127
1167
|
|
|
1128
1168
|
|
|
1129
1169
|
def ledger_path(store_dir: Path) -> Path:
|
|
@@ -1503,10 +1543,12 @@ def build_fingerprints(breakpoints: list[CacheBreakpoint], key: bytes) -> tuple[
|
|
|
1503
1543
|
for bp in breakpoints:
|
|
1504
1544
|
canonical = json_bytes(bp.prefix)
|
|
1505
1545
|
section_canonical = json_bytes(bp.section)
|
|
1546
|
+
canonical_bytes = canonical.encode("utf-8", errors="replace")
|
|
1547
|
+
digest = keyed_hmac_bytes(key, canonical_bytes)
|
|
1506
1548
|
bp_redactions = secret_count_in_text(canonical)
|
|
1507
1549
|
redactions += bp_redactions
|
|
1508
1550
|
prefix_tokens = token_proxy_text(canonical)
|
|
1509
|
-
prefix_bytes =
|
|
1551
|
+
prefix_bytes = len(canonical_bytes)
|
|
1510
1552
|
prefix_delta_tokens = max(0, prefix_tokens - previous_prefix_tokens)
|
|
1511
1553
|
prefix_delta_bytes = max(0, prefix_bytes - previous_prefix_bytes)
|
|
1512
1554
|
previous_prefix_tokens = max(previous_prefix_tokens, prefix_tokens)
|
|
@@ -1516,8 +1558,8 @@ def build_fingerprints(breakpoints: list[CacheBreakpoint], key: bytes) -> tuple[
|
|
|
1516
1558
|
"breakpoint_id": bp.breakpoint_id,
|
|
1517
1559
|
"kind": bp.kind,
|
|
1518
1560
|
"ttl": bp.ttl,
|
|
1519
|
-
"hmac":
|
|
1520
|
-
"display_hmac": "hmac-sha256:" +
|
|
1561
|
+
"hmac": digest,
|
|
1562
|
+
"display_hmac": "hmac-sha256:" + digest[:16],
|
|
1521
1563
|
"prefix_bytes": prefix_bytes,
|
|
1522
1564
|
"prefix_delta_bytes": prefix_delta_bytes,
|
|
1523
1565
|
"section_bytes": byte_len_text(section_canonical),
|
|
@@ -1845,6 +1887,718 @@ def preflight_command(args: argparse.Namespace) -> int:
|
|
|
1845
1887
|
return 3 if block else 0
|
|
1846
1888
|
|
|
1847
1889
|
|
|
1890
|
+
def advisory_label(value: Any, *, default: str = "unknown", limit: int = 80) -> str:
|
|
1891
|
+
"""Return a bounded identifier-like label without echoing secrets or paths."""
|
|
1892
|
+
|
|
1893
|
+
if value is None:
|
|
1894
|
+
return default
|
|
1895
|
+
text = str(value).strip()
|
|
1896
|
+
if not text:
|
|
1897
|
+
return default
|
|
1898
|
+
if secret_count_in_text(text):
|
|
1899
|
+
return "redacted"
|
|
1900
|
+
if "/" in text or "\\" in text:
|
|
1901
|
+
return "path-redacted"
|
|
1902
|
+
cleaned = re.sub(r"[^A-Za-z0-9_.:-]+", "-", text).strip("-")
|
|
1903
|
+
if not cleaned:
|
|
1904
|
+
return default
|
|
1905
|
+
return cleaned[:limit]
|
|
1906
|
+
|
|
1907
|
+
|
|
1908
|
+
ROUTE_MODEL_LOCAL_PATH_FIRST_SEGMENTS = {
|
|
1909
|
+
"checkpoint",
|
|
1910
|
+
"checkpoints",
|
|
1911
|
+
"ckpt",
|
|
1912
|
+
"data",
|
|
1913
|
+
"dataset",
|
|
1914
|
+
"datasets",
|
|
1915
|
+
"model",
|
|
1916
|
+
"models",
|
|
1917
|
+
"private",
|
|
1918
|
+
"tmp",
|
|
1919
|
+
"weights",
|
|
1920
|
+
}
|
|
1921
|
+
ROUTE_MODEL_LOCAL_PATH_EXTENSIONS = {
|
|
1922
|
+
".bin",
|
|
1923
|
+
".ckpt",
|
|
1924
|
+
".gguf",
|
|
1925
|
+
".json",
|
|
1926
|
+
".onnx",
|
|
1927
|
+
".pt",
|
|
1928
|
+
".pth",
|
|
1929
|
+
".safetensors",
|
|
1930
|
+
".yaml",
|
|
1931
|
+
".yml",
|
|
1932
|
+
}
|
|
1933
|
+
|
|
1934
|
+
|
|
1935
|
+
def route_model_path_like(text: str) -> bool:
|
|
1936
|
+
lower = text.lower()
|
|
1937
|
+
if (
|
|
1938
|
+
text.startswith(("/", "\\", "~", "./", "../"))
|
|
1939
|
+
or "\\" in text
|
|
1940
|
+
or re.match(r"^[A-Za-z]:[\\/]", text) is not None
|
|
1941
|
+
or "/users/" in lower
|
|
1942
|
+
or "/home/" in lower
|
|
1943
|
+
or "/private/" in lower
|
|
1944
|
+
):
|
|
1945
|
+
return True
|
|
1946
|
+
if "/" not in text:
|
|
1947
|
+
return False
|
|
1948
|
+
segments = text.split("/")
|
|
1949
|
+
if len(segments) != 2 or any(seg in {"", ".", ".."} for seg in segments):
|
|
1950
|
+
return True
|
|
1951
|
+
first = segments[0].strip().lower()
|
|
1952
|
+
if first in ROUTE_MODEL_LOCAL_PATH_FIRST_SEGMENTS:
|
|
1953
|
+
return True
|
|
1954
|
+
last = segments[-1].strip().lower()
|
|
1955
|
+
return any(last.endswith(ext) for ext in ROUTE_MODEL_LOCAL_PATH_EXTENSIONS)
|
|
1956
|
+
|
|
1957
|
+
|
|
1958
|
+
def route_model_label(value: Any, *, default: str = "unknown", limit: int = 120) -> str:
|
|
1959
|
+
"""Return a model identifier label while redacting local-path-like values."""
|
|
1960
|
+
|
|
1961
|
+
if value is None:
|
|
1962
|
+
return default
|
|
1963
|
+
text = str(value).strip()
|
|
1964
|
+
if not text:
|
|
1965
|
+
return default
|
|
1966
|
+
if secret_count_in_text(text):
|
|
1967
|
+
return "redacted"
|
|
1968
|
+
if route_model_path_like(text):
|
|
1969
|
+
return "path-redacted"
|
|
1970
|
+
cleaned = re.sub(r"[^A-Za-z0-9_.:/-]+", "-", text).strip("-")
|
|
1971
|
+
if not cleaned:
|
|
1972
|
+
return default
|
|
1973
|
+
return cleaned[:limit]
|
|
1974
|
+
|
|
1975
|
+
|
|
1976
|
+
def route_model_for_pricing(value: Any, fallback: str) -> str:
|
|
1977
|
+
if value is None:
|
|
1978
|
+
return fallback
|
|
1979
|
+
text = str(value).strip()
|
|
1980
|
+
if not text or secret_count_in_text(text):
|
|
1981
|
+
return fallback
|
|
1982
|
+
return text
|
|
1983
|
+
|
|
1984
|
+
|
|
1985
|
+
def finite_nonnegative_value(value: Any) -> float | None:
|
|
1986
|
+
if value is None or isinstance(value, bool):
|
|
1987
|
+
return None
|
|
1988
|
+
try:
|
|
1989
|
+
number = float(value)
|
|
1990
|
+
except (TypeError, ValueError, OverflowError):
|
|
1991
|
+
return None
|
|
1992
|
+
if not math.isfinite(number) or number < 0:
|
|
1993
|
+
return None
|
|
1994
|
+
return number
|
|
1995
|
+
|
|
1996
|
+
|
|
1997
|
+
def route_bool(value: Any) -> bool | None:
|
|
1998
|
+
if isinstance(value, bool):
|
|
1999
|
+
return value
|
|
2000
|
+
if isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
2001
|
+
if value == 1:
|
|
2002
|
+
return True
|
|
2003
|
+
if value == 0:
|
|
2004
|
+
return False
|
|
2005
|
+
if isinstance(value, str):
|
|
2006
|
+
text = value.strip().lower()
|
|
2007
|
+
if text in {"1", "true", "yes", "y", "on", "supported", "available"}:
|
|
2008
|
+
return True
|
|
2009
|
+
if text in {"0", "false", "no", "n", "off", "unsupported", "unavailable"}:
|
|
2010
|
+
return False
|
|
2011
|
+
if text in {"", "unknown", "unset", "null", "none"}:
|
|
2012
|
+
return None
|
|
2013
|
+
return None
|
|
2014
|
+
|
|
2015
|
+
|
|
2016
|
+
def route_choice(value: Any, allowed: set[str], *, default: str = "unknown") -> str:
|
|
2017
|
+
if value is None:
|
|
2018
|
+
return default
|
|
2019
|
+
text = str(value).strip().lower().replace("-", "_")
|
|
2020
|
+
return text if text in allowed else default
|
|
2021
|
+
|
|
2022
|
+
|
|
2023
|
+
def route_nested_dict(data: dict[str, Any], *keys: str) -> dict[str, Any]:
|
|
2024
|
+
for key in keys:
|
|
2025
|
+
value = data.get(key)
|
|
2026
|
+
if isinstance(value, dict):
|
|
2027
|
+
return value
|
|
2028
|
+
return {}
|
|
2029
|
+
|
|
2030
|
+
|
|
2031
|
+
def first_present_mapping_value(*containers: dict[str, Any], keys: tuple[str, ...]) -> Any:
|
|
2032
|
+
for container in containers:
|
|
2033
|
+
for key in keys:
|
|
2034
|
+
if key in container:
|
|
2035
|
+
return container.get(key)
|
|
2036
|
+
return None
|
|
2037
|
+
|
|
2038
|
+
|
|
2039
|
+
def first_nonnegative_cost(*containers: dict[str, Any], keys: tuple[str, ...]) -> float | None:
|
|
2040
|
+
for container in containers:
|
|
2041
|
+
for key in keys:
|
|
2042
|
+
if key not in container:
|
|
2043
|
+
continue
|
|
2044
|
+
value = finite_nonnegative_value(container.get(key))
|
|
2045
|
+
if value is not None:
|
|
2046
|
+
return value
|
|
2047
|
+
return None
|
|
2048
|
+
|
|
2049
|
+
|
|
2050
|
+
def sum_nonnegative_costs(container: dict[str, Any], keys: tuple[str, ...]) -> tuple[float, list[str]]:
|
|
2051
|
+
total = 0.0
|
|
2052
|
+
observed: list[str] = []
|
|
2053
|
+
for key in keys:
|
|
2054
|
+
value = finite_nonnegative_value(container.get(key))
|
|
2055
|
+
if value is None:
|
|
2056
|
+
continue
|
|
2057
|
+
total += value
|
|
2058
|
+
observed.append(key)
|
|
2059
|
+
return total, observed
|
|
2060
|
+
|
|
2061
|
+
|
|
2062
|
+
def sum_nonnegative_costs_from(*containers: dict[str, Any], keys: tuple[str, ...]) -> tuple[float, list[str]]:
|
|
2063
|
+
total = 0.0
|
|
2064
|
+
observed: list[str] = []
|
|
2065
|
+
for key in keys:
|
|
2066
|
+
value = first_nonnegative_cost(*containers, keys=(key,))
|
|
2067
|
+
if value is None:
|
|
2068
|
+
continue
|
|
2069
|
+
total += value
|
|
2070
|
+
observed.append(key)
|
|
2071
|
+
return total, observed
|
|
2072
|
+
|
|
2073
|
+
|
|
2074
|
+
def parse_feature_overrides(raw_features: list[str] | None) -> dict[str, bool]:
|
|
2075
|
+
out: dict[str, bool] = {}
|
|
2076
|
+
for raw in raw_features or []:
|
|
2077
|
+
if "=" in raw:
|
|
2078
|
+
key, raw_value = raw.split("=", 1)
|
|
2079
|
+
elif ":" in raw:
|
|
2080
|
+
key, raw_value = raw.split(":", 1)
|
|
2081
|
+
else:
|
|
2082
|
+
key, raw_value = raw, "true"
|
|
2083
|
+
normalized_key = ROUTE_FEATURE_ALIASES.get(key.strip().lower().replace("_", "-"))
|
|
2084
|
+
display_key = advisory_label(key, default="redacted-route-feature")
|
|
2085
|
+
if normalized_key is None:
|
|
2086
|
+
fail(f"unknown route feature {display_key!r}; expected one of {', '.join(ROUTE_FEATURE_KEYS)}")
|
|
2087
|
+
parsed = route_bool(raw_value)
|
|
2088
|
+
if parsed is None:
|
|
2089
|
+
fail(f"route feature {display_key!r} must be true or false")
|
|
2090
|
+
out[normalized_key] = parsed
|
|
2091
|
+
return out
|
|
2092
|
+
|
|
2093
|
+
|
|
2094
|
+
def provider_features_for_workload(workload: dict[str, Any], args: argparse.Namespace) -> dict[str, Any]:
|
|
2095
|
+
raw_features = workload.get("provider_features")
|
|
2096
|
+
workload_features = raw_features if isinstance(raw_features, dict) else {}
|
|
2097
|
+
flag_features = parse_feature_overrides(getattr(args, "feature", None))
|
|
2098
|
+
features: dict[str, dict[str, Any]] = {}
|
|
2099
|
+
for key in ROUTE_FEATURE_KEYS:
|
|
2100
|
+
supported: bool | None = None
|
|
2101
|
+
source = "unknown"
|
|
2102
|
+
aliases = {key, key.replace("_", "-")}
|
|
2103
|
+
aliases.update(alias for alias, canonical in ROUTE_FEATURE_ALIASES.items() if canonical == key)
|
|
2104
|
+
for alias in sorted(aliases):
|
|
2105
|
+
if alias in workload_features:
|
|
2106
|
+
parsed = route_bool(workload_features.get(alias))
|
|
2107
|
+
if parsed is not None:
|
|
2108
|
+
supported = parsed
|
|
2109
|
+
source = "workload"
|
|
2110
|
+
break
|
|
2111
|
+
if key in flag_features:
|
|
2112
|
+
supported = flag_features[key]
|
|
2113
|
+
source = "flag"
|
|
2114
|
+
features[key] = {
|
|
2115
|
+
"supported": supported,
|
|
2116
|
+
"source": source,
|
|
2117
|
+
"recheck_required": True,
|
|
2118
|
+
"reason": "provider_features are caller-supplied or unknown; recheck current provider documentation before operational routing",
|
|
2119
|
+
}
|
|
2120
|
+
declared = sum(1 for item in features.values() if item["supported"] is not None)
|
|
2121
|
+
return {
|
|
2122
|
+
"features": features,
|
|
2123
|
+
"declared_feature_count": declared,
|
|
2124
|
+
"unknown_feature_count": len(features) - declared,
|
|
2125
|
+
"caller_supplied": declared > 0,
|
|
2126
|
+
"authoritative_provider_matrix": False,
|
|
2127
|
+
"recheck_required": True,
|
|
2128
|
+
}
|
|
2129
|
+
|
|
2130
|
+
|
|
2131
|
+
def route_usage_object(workload: dict[str, Any]) -> dict[str, Any]:
|
|
2132
|
+
usage = workload.get("usage") or workload.get("provider_usage")
|
|
2133
|
+
if isinstance(usage, dict):
|
|
2134
|
+
return usage.get("usage") if isinstance(usage.get("usage"), dict) else usage
|
|
2135
|
+
response = workload.get("response")
|
|
2136
|
+
if isinstance(response, dict) and isinstance(response.get("usage"), dict):
|
|
2137
|
+
return response["usage"]
|
|
2138
|
+
telemetry = workload.get("telemetry")
|
|
2139
|
+
if isinstance(telemetry, dict):
|
|
2140
|
+
usage = telemetry.get("usage") or telemetry.get("provider_usage")
|
|
2141
|
+
if isinstance(usage, dict):
|
|
2142
|
+
return usage.get("usage") if isinstance(usage.get("usage"), dict) else usage
|
|
2143
|
+
return {}
|
|
2144
|
+
|
|
2145
|
+
|
|
2146
|
+
def usage_has_measured_tokens(usage: dict[str, Any]) -> bool:
|
|
2147
|
+
return any(
|
|
2148
|
+
usage_int(usage, key) > 0
|
|
2149
|
+
for key in (
|
|
2150
|
+
"input_tokens",
|
|
2151
|
+
"output_tokens",
|
|
2152
|
+
"cache_creation_input_tokens",
|
|
2153
|
+
"cache_creation_input_tokens_5m",
|
|
2154
|
+
"cache_creation_input_tokens_1h",
|
|
2155
|
+
"cache_read_input_tokens",
|
|
2156
|
+
)
|
|
2157
|
+
) or bool(usage.get("cache_creation"))
|
|
2158
|
+
|
|
2159
|
+
|
|
2160
|
+
def cost_from_usage(usage: dict[str, Any], *, profile: dict[str, Any], model: str, exchange: float) -> dict[str, Any]:
|
|
2161
|
+
input_rate, output_rate, model_rate_key = rates_for_model(profile, model)
|
|
2162
|
+
write_mult, read_mult = pricing_multipliers(profile)
|
|
2163
|
+
input_tokens = usage_int(usage, "input_tokens")
|
|
2164
|
+
output_tokens = usage_int(usage, "output_tokens")
|
|
2165
|
+
cache_creation_5m, cache_creation_1h = cache_creation_buckets(usage)
|
|
2166
|
+
cache_read = usage_int(usage, "cache_read_input_tokens")
|
|
2167
|
+
cost_usd = (
|
|
2168
|
+
money(input_tokens, input_rate)
|
|
2169
|
+
+ money(output_tokens, output_rate)
|
|
2170
|
+
+ money(cache_creation_5m, input_rate, write_mult["5m"])
|
|
2171
|
+
+ money(cache_creation_1h, input_rate, write_mult["1h"])
|
|
2172
|
+
+ money(cache_read, input_rate, read_mult)
|
|
2173
|
+
)
|
|
2174
|
+
return {
|
|
2175
|
+
"cost_usd": round(cost_usd, 8),
|
|
2176
|
+
"cost_krw": round(krw(cost_usd, exchange), 2),
|
|
2177
|
+
"model_rate_key": model_rate_key,
|
|
2178
|
+
"usage": {
|
|
2179
|
+
"input_tokens": input_tokens,
|
|
2180
|
+
"output_tokens": output_tokens,
|
|
2181
|
+
"cache_creation_input_tokens_5m": cache_creation_5m,
|
|
2182
|
+
"cache_creation_input_tokens_1h": cache_creation_1h,
|
|
2183
|
+
"cache_read_input_tokens": cache_read,
|
|
2184
|
+
},
|
|
2185
|
+
}
|
|
2186
|
+
|
|
2187
|
+
|
|
2188
|
+
def request_profile_for_route(workload: dict[str, Any]) -> dict[str, Any]:
|
|
2189
|
+
request = workload.get("request")
|
|
2190
|
+
if not isinstance(request, dict):
|
|
2191
|
+
return {
|
|
2192
|
+
"present": False,
|
|
2193
|
+
"token_proxy": "unavailable",
|
|
2194
|
+
"prompt_tokens_estimated": None,
|
|
2195
|
+
"cache_breakpoint_count": 0,
|
|
2196
|
+
"cacheable_tokens_estimated": 0,
|
|
2197
|
+
"raw_request_emitted": False,
|
|
2198
|
+
}
|
|
2199
|
+
breakpoints, parse_meta = extract_cache_breakpoints(request)
|
|
2200
|
+
fingerprints, redactions = build_fingerprints(breakpoints, b"\0" * 32)
|
|
2201
|
+
cacheable_tokens = max((int(fp.get("tokens_estimated") or 0) for fp in fingerprints), default=0)
|
|
2202
|
+
return {
|
|
2203
|
+
"present": True,
|
|
2204
|
+
"token_proxy": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}",
|
|
2205
|
+
"prompt_tokens_estimated": token_proxy_obj(strip_known_cache_controls(request)),
|
|
2206
|
+
"cache_breakpoint_count": len(breakpoints),
|
|
2207
|
+
"cacheable_tokens_estimated": cacheable_tokens,
|
|
2208
|
+
"cache_control_markers": int(parse_meta.get("cache_control_markers") or 0),
|
|
2209
|
+
"unsupported_cache_controls": int(parse_meta.get("unsupported_cache_controls") or 0),
|
|
2210
|
+
"secret_like_values_detected": redactions,
|
|
2211
|
+
"raw_request_emitted": False,
|
|
2212
|
+
}
|
|
2213
|
+
|
|
2214
|
+
|
|
2215
|
+
def route_task_metadata(workload: dict[str, Any], args: argparse.Namespace) -> dict[str, Any]:
|
|
2216
|
+
task = route_nested_dict(workload, "task", "task_metadata", "routing")
|
|
2217
|
+
telemetry = route_nested_dict(workload, "telemetry")
|
|
2218
|
+
latency = route_choice(
|
|
2219
|
+
getattr(args, "latency_class", None)
|
|
2220
|
+
or first_present_mapping_value(task, workload, keys=("latency_class", "latency", "mode")),
|
|
2221
|
+
ROUTE_ALLOWED_LATENCY_CLASSES,
|
|
2222
|
+
)
|
|
2223
|
+
risk = route_choice(
|
|
2224
|
+
getattr(args, "risk", None)
|
|
2225
|
+
or first_present_mapping_value(task, workload, keys=("risk", "risk_level")),
|
|
2226
|
+
ROUTE_ALLOWED_RISK_LEVELS,
|
|
2227
|
+
)
|
|
2228
|
+
quality_gate = route_choice(
|
|
2229
|
+
getattr(args, "quality_gate", None)
|
|
2230
|
+
or first_present_mapping_value(task, workload, telemetry, keys=("quality_gate", "quality")),
|
|
2231
|
+
ROUTE_ALLOWED_QUALITY_GATES,
|
|
2232
|
+
)
|
|
2233
|
+
task_kind = advisory_label(
|
|
2234
|
+
getattr(args, "task_kind", None)
|
|
2235
|
+
or first_present_mapping_value(task, workload, keys=("task_kind", "kind", "type")),
|
|
2236
|
+
default="unknown",
|
|
2237
|
+
limit=48,
|
|
2238
|
+
).lower()
|
|
2239
|
+
deadline_seconds = safe_int(first_present_mapping_value(task, workload, keys=("deadline_seconds", "max_latency_seconds")), 0)
|
|
2240
|
+
return {
|
|
2241
|
+
"latency_class": latency,
|
|
2242
|
+
"risk": risk,
|
|
2243
|
+
"quality_gate": quality_gate,
|
|
2244
|
+
"task_kind": task_kind,
|
|
2245
|
+
"deadline_seconds": deadline_seconds,
|
|
2246
|
+
"requires_interaction": bool(route_bool(first_present_mapping_value(task, workload, keys=("requires_interaction", "interactive_required", "user_blocking")))),
|
|
2247
|
+
"has_external_side_effects": bool(route_bool(first_present_mapping_value(task, workload, keys=("has_external_side_effects", "side_effects")))),
|
|
2248
|
+
"order_sensitive": bool(route_bool(first_present_mapping_value(task, workload, keys=("order_sensitive", "requires_order")))),
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
|
|
2252
|
+
def total_cost_accounting_for_route(
|
|
2253
|
+
workload: dict[str, Any],
|
|
2254
|
+
*,
|
|
2255
|
+
profile: dict[str, Any],
|
|
2256
|
+
model: str,
|
|
2257
|
+
exchange: float,
|
|
2258
|
+
) -> dict[str, Any]:
|
|
2259
|
+
telemetry = route_nested_dict(workload, "telemetry")
|
|
2260
|
+
shifted = route_nested_dict(workload, "shifted_costs", "shifted_cost", "auxiliary_costs")
|
|
2261
|
+
usage = route_usage_object(workload)
|
|
2262
|
+
usage_cost = cost_from_usage(usage, profile=profile, model=model, exchange=exchange) if usage_has_measured_tokens(usage) else None
|
|
2263
|
+
|
|
2264
|
+
primary_cost = first_nonnegative_cost(
|
|
2265
|
+
telemetry,
|
|
2266
|
+
workload,
|
|
2267
|
+
keys=("primary_cost_usd", "provider_cost_usd", "observed_cost_usd", "cost_usd"),
|
|
2268
|
+
)
|
|
2269
|
+
primary_source = "explicit_telemetry" if primary_cost is not None else "unavailable"
|
|
2270
|
+
if primary_cost is None and usage_cost is not None:
|
|
2271
|
+
primary_cost = float(usage_cost["cost_usd"])
|
|
2272
|
+
primary_source = "estimated_from_provider_usage_fields"
|
|
2273
|
+
if primary_cost is None:
|
|
2274
|
+
primary_cost = 0.0
|
|
2275
|
+
|
|
2276
|
+
external_cost_value = first_nonnegative_cost(telemetry, shifted, workload, keys=("external_cost_usd",))
|
|
2277
|
+
external_component_sum, external_components = sum_nonnegative_costs_from(
|
|
2278
|
+
telemetry,
|
|
2279
|
+
shifted,
|
|
2280
|
+
keys=("subagent_cost_usd", "embedding_cost_usd", "reranker_cost_usd", "tool_call_cost_usd", "retry_cost_usd", "auxiliary_provider_cost_usd"),
|
|
2281
|
+
)
|
|
2282
|
+
external_cost_from_aggregate = external_cost_value is not None
|
|
2283
|
+
if external_cost_value is None:
|
|
2284
|
+
external_cost = external_component_sum
|
|
2285
|
+
else:
|
|
2286
|
+
external_cost = external_cost_value
|
|
2287
|
+
|
|
2288
|
+
local_cost_value = first_nonnegative_cost(
|
|
2289
|
+
telemetry,
|
|
2290
|
+
shifted,
|
|
2291
|
+
workload,
|
|
2292
|
+
keys=("local_cost_usd", "self_hosted_cost_usd", "local_model_cost_usd"),
|
|
2293
|
+
)
|
|
2294
|
+
local_component_sum, local_components = sum_nonnegative_costs_from(
|
|
2295
|
+
telemetry,
|
|
2296
|
+
shifted,
|
|
2297
|
+
keys=("local_server_cost_usd", "local_energy_cost_usd", "storage_cost_usd"),
|
|
2298
|
+
)
|
|
2299
|
+
local_cost_from_aggregate = local_cost_value is not None
|
|
2300
|
+
if local_cost_value is None:
|
|
2301
|
+
local_cost = local_component_sum
|
|
2302
|
+
else:
|
|
2303
|
+
local_cost = local_cost_value
|
|
2304
|
+
|
|
2305
|
+
provided_total = first_nonnegative_cost(
|
|
2306
|
+
telemetry,
|
|
2307
|
+
shifted,
|
|
2308
|
+
workload,
|
|
2309
|
+
keys=("total_cost_with_shift_usd", "total_shifted_cost_usd"),
|
|
2310
|
+
)
|
|
2311
|
+
computed_total = primary_cost + external_cost + local_cost
|
|
2312
|
+
total = provided_total if provided_total is not None else computed_total
|
|
2313
|
+
external_tokens = safe_int(first_present_mapping_value(telemetry, shifted, workload, keys=("external_tokens", "subagent_tokens", "embedding_tokens")), 0)
|
|
2314
|
+
retry_count = safe_int(first_present_mapping_value(telemetry, workload, keys=("retry_count", "retries")), 0)
|
|
2315
|
+
subagent_count = safe_int(first_present_mapping_value(telemetry, workload, keys=("subagent_count", "subagents")), 0)
|
|
2316
|
+
tool_call_count = safe_int(first_present_mapping_value(telemetry, workload, keys=("tool_call_count", "tool_calls")), 0)
|
|
2317
|
+
external_cost_supplied = external_cost_from_aggregate or bool(external_components)
|
|
2318
|
+
local_cost_supplied = local_cost_from_aggregate or bool(local_components)
|
|
2319
|
+
provided_total_supplied = provided_total is not None
|
|
2320
|
+
missing_shifted_cost = bool(
|
|
2321
|
+
(external_tokens or retry_count or subagent_count or tool_call_count)
|
|
2322
|
+
and not (external_cost_supplied or local_cost_supplied or provided_total_supplied)
|
|
2323
|
+
)
|
|
2324
|
+
return {
|
|
2325
|
+
"currency": "USD",
|
|
2326
|
+
"primary_cost_usd": round(primary_cost, 8),
|
|
2327
|
+
"primary_cost_source": primary_source,
|
|
2328
|
+
"external_cost_usd": round(external_cost, 8),
|
|
2329
|
+
"local_cost_usd": round(local_cost, 8),
|
|
2330
|
+
"external_cost_supplied": external_cost_supplied,
|
|
2331
|
+
"local_cost_supplied": local_cost_supplied,
|
|
2332
|
+
"external_component_breakdown_usd": round(external_component_sum, 8),
|
|
2333
|
+
"local_component_breakdown_usd": round(local_component_sum, 8),
|
|
2334
|
+
"computed_total_cost_with_shift_usd": round(computed_total, 8),
|
|
2335
|
+
"total_cost_with_shift_usd": round(total, 8),
|
|
2336
|
+
"total_cost_with_shift_krw": round(krw(total, exchange), 2),
|
|
2337
|
+
"provided_total_cost_with_shift_usd": round(provided_total, 8) if provided_total is not None else None,
|
|
2338
|
+
"pricing": {
|
|
2339
|
+
"profile": str(profile.get("name") or "custom"),
|
|
2340
|
+
"release_recheck_required": bool(profile.get("release_recheck_required", True)),
|
|
2341
|
+
"source_urls": profile.get("source_urls", [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL]),
|
|
2342
|
+
"usd_to_krw": exchange,
|
|
2343
|
+
},
|
|
2344
|
+
"usage_cost_estimate": usage_cost,
|
|
2345
|
+
"components_observed": sorted(set(external_components + local_components)),
|
|
2346
|
+
"run_counters": {
|
|
2347
|
+
"external_tokens": external_tokens,
|
|
2348
|
+
"retry_count": retry_count,
|
|
2349
|
+
"subagent_count": subagent_count,
|
|
2350
|
+
"tool_call_count": tool_call_count,
|
|
2351
|
+
},
|
|
2352
|
+
"measurement_availability": {
|
|
2353
|
+
"provider_usage_tokens": usage_has_measured_tokens(usage),
|
|
2354
|
+
"primary_cost": primary_source != "unavailable",
|
|
2355
|
+
"external_cost": external_cost_supplied,
|
|
2356
|
+
"local_cost": local_cost_supplied,
|
|
2357
|
+
"shifted_cost": bool(external_cost_supplied or local_cost_supplied or provided_total_supplied),
|
|
2358
|
+
},
|
|
2359
|
+
"shifted_cost_accounting": {
|
|
2360
|
+
"required": True,
|
|
2361
|
+
"diagnostic_only": True,
|
|
2362
|
+
"includes_external_or_local_components": bool(external_cost_supplied or local_cost_supplied),
|
|
2363
|
+
"missing_shifted_cost_warning": missing_shifted_cost,
|
|
2364
|
+
"claim_boundary": "total-cost routing is advisory; hosted savings claims require matched successful tasks with non-inferior quality and measured shifted costs",
|
|
2365
|
+
},
|
|
2366
|
+
}
|
|
2367
|
+
|
|
2368
|
+
|
|
2369
|
+
def batchability_for_route(task: dict[str, Any], provider_features: dict[str, Any]) -> dict[str, Any]:
|
|
2370
|
+
feature = provider_features["features"]["batch_api"]
|
|
2371
|
+
batch_supported = feature["supported"]
|
|
2372
|
+
blockers: list[str] = []
|
|
2373
|
+
reasons: list[str] = []
|
|
2374
|
+
latency = str(task.get("latency_class") or "unknown")
|
|
2375
|
+
deadline = int(task.get("deadline_seconds") or 0)
|
|
2376
|
+
if latency == "interactive":
|
|
2377
|
+
blockers.append("interactive_latency")
|
|
2378
|
+
elif latency in {"async", "batch", "offline"}:
|
|
2379
|
+
reasons.append(f"latency_class_{latency}")
|
|
2380
|
+
elif deadline >= 3600:
|
|
2381
|
+
reasons.append("deadline_allows_batch_window")
|
|
2382
|
+
else:
|
|
2383
|
+
reasons.append("latency_unknown")
|
|
2384
|
+
if task.get("requires_interaction"):
|
|
2385
|
+
blockers.append("requires_user_interaction")
|
|
2386
|
+
if task.get("has_external_side_effects"):
|
|
2387
|
+
blockers.append("external_side_effects_need_idempotency_review")
|
|
2388
|
+
if task.get("order_sensitive"):
|
|
2389
|
+
blockers.append("order_sensitive")
|
|
2390
|
+
if task.get("risk") == "high":
|
|
2391
|
+
blockers.append("high_risk_route")
|
|
2392
|
+
if task.get("quality_gate") == "fail":
|
|
2393
|
+
blockers.append("quality_gate_failed")
|
|
2394
|
+
if batch_supported is False:
|
|
2395
|
+
blockers.append("provider_batch_api_not_declared")
|
|
2396
|
+
elif batch_supported is None:
|
|
2397
|
+
reasons.append("provider_batch_api_unknown_recheck_required")
|
|
2398
|
+
else:
|
|
2399
|
+
reasons.append("provider_batch_api_declared")
|
|
2400
|
+
if blockers:
|
|
2401
|
+
level = "not_recommended"
|
|
2402
|
+
eligible = False
|
|
2403
|
+
elif batch_supported is True and (latency in {"async", "batch", "offline"} or deadline >= 3600):
|
|
2404
|
+
level = "candidate"
|
|
2405
|
+
eligible = True
|
|
2406
|
+
else:
|
|
2407
|
+
level = "conditional"
|
|
2408
|
+
eligible = False
|
|
2409
|
+
return {
|
|
2410
|
+
"eligible": eligible,
|
|
2411
|
+
"level": level,
|
|
2412
|
+
"latency_class": latency,
|
|
2413
|
+
"deadline_seconds": deadline,
|
|
2414
|
+
"reasons": sorted(set(reasons)),
|
|
2415
|
+
"blockers": sorted(set(blockers)),
|
|
2416
|
+
"requires_current_provider_docs_check": batch_supported is None,
|
|
2417
|
+
}
|
|
2418
|
+
|
|
2419
|
+
|
|
2420
|
+
def recommendation(
|
|
2421
|
+
rec_id: str,
|
|
2422
|
+
*,
|
|
2423
|
+
decision: str,
|
|
2424
|
+
priority: str,
|
|
2425
|
+
rationale: str,
|
|
2426
|
+
prerequisites: list[str],
|
|
2427
|
+
) -> dict[str, Any]:
|
|
2428
|
+
return {
|
|
2429
|
+
"id": rec_id,
|
|
2430
|
+
"decision": decision,
|
|
2431
|
+
"priority": priority,
|
|
2432
|
+
"rationale": rationale,
|
|
2433
|
+
"prerequisites": prerequisites,
|
|
2434
|
+
"claim_boundary": "candidate routing advice only; validate on matched successful tasks before claiming token or cost savings",
|
|
2435
|
+
}
|
|
2436
|
+
|
|
2437
|
+
|
|
2438
|
+
def route_recommendations(
|
|
2439
|
+
*,
|
|
2440
|
+
task: dict[str, Any],
|
|
2441
|
+
provider_features: dict[str, Any],
|
|
2442
|
+
request_profile: dict[str, Any],
|
|
2443
|
+
batchability: dict[str, Any],
|
|
2444
|
+
total_cost: dict[str, Any],
|
|
2445
|
+
) -> list[dict[str, Any]]:
|
|
2446
|
+
recs: list[dict[str, Any]] = [
|
|
2447
|
+
recommendation(
|
|
2448
|
+
"measure-before-claim",
|
|
2449
|
+
decision="required",
|
|
2450
|
+
priority="P0",
|
|
2451
|
+
rationale="Route changes can shift work into retries, subagents, batch queues, local servers, or provider cache writes; measure total cost with quality gates before claims.",
|
|
2452
|
+
prerequisites=["matched_successful_tasks", "non_inferior_quality", "shifted_cost_accounting"],
|
|
2453
|
+
)
|
|
2454
|
+
]
|
|
2455
|
+
batch_decision = "candidate" if batchability.get("eligible") else str(batchability.get("level") or "conditional")
|
|
2456
|
+
recs.append(
|
|
2457
|
+
recommendation(
|
|
2458
|
+
"use-batch-api-for-noninteractive-work",
|
|
2459
|
+
decision=batch_decision,
|
|
2460
|
+
priority="P1" if batch_decision == "candidate" else "P2",
|
|
2461
|
+
rationale="Batch APIs can reduce cost for non-interactive work only when provider support, latency tolerance, idempotency, and quality gates are satisfied.",
|
|
2462
|
+
prerequisites=["provider_batch_support_current", "async_or_offline_latency", "idempotency_review", "matched_replay"],
|
|
2463
|
+
)
|
|
2464
|
+
)
|
|
2465
|
+
|
|
2466
|
+
prompt_cache_feature = provider_features["features"]["prompt_cache"]["supported"]
|
|
2467
|
+
cache_breakpoints = int(request_profile.get("cache_breakpoint_count") or 0)
|
|
2468
|
+
cacheable_tokens = int(request_profile.get("cacheable_tokens_estimated") or 0)
|
|
2469
|
+
if prompt_cache_feature is False:
|
|
2470
|
+
cache_decision = "not_recommended"
|
|
2471
|
+
elif cache_breakpoints or cacheable_tokens:
|
|
2472
|
+
cache_decision = "candidate" if prompt_cache_feature is True else "conditional"
|
|
2473
|
+
else:
|
|
2474
|
+
cache_decision = "needs_request_evidence"
|
|
2475
|
+
recs.append(
|
|
2476
|
+
recommendation(
|
|
2477
|
+
"preserve-prompt-cache-prefix",
|
|
2478
|
+
decision=cache_decision,
|
|
2479
|
+
priority="P1" if cache_decision == "candidate" else "P2",
|
|
2480
|
+
rationale="Stable-prefix prompt caching is useful only when current provider support and repeated cacheable request prefixes are verified.",
|
|
2481
|
+
prerequisites=["stable_prefix_first", "volatile_tail", "provider_usage_cache_telemetry"],
|
|
2482
|
+
)
|
|
2483
|
+
)
|
|
2484
|
+
|
|
2485
|
+
structured_feature = provider_features["features"]["structured_outputs"]["supported"]
|
|
2486
|
+
task_kind = str(task.get("task_kind") or "unknown")
|
|
2487
|
+
if structured_feature is False:
|
|
2488
|
+
structured_decision = "not_recommended"
|
|
2489
|
+
elif task_kind in ROUTE_STRUCTURED_TASK_KINDS:
|
|
2490
|
+
structured_decision = "candidate" if structured_feature is True else "conditional"
|
|
2491
|
+
else:
|
|
2492
|
+
structured_decision = "needs_task_fit"
|
|
2493
|
+
recs.append(
|
|
2494
|
+
recommendation(
|
|
2495
|
+
"use-structured-outputs-when-task-fits",
|
|
2496
|
+
decision=structured_decision,
|
|
2497
|
+
priority="P2",
|
|
2498
|
+
rationale="Structured outputs can reduce retries and parsing repairs for extraction/classification style work, but they are not a token-savings proof.",
|
|
2499
|
+
prerequisites=["schema_fit_review", "retry_rate_measurement", "quality_non_regression"],
|
|
2500
|
+
)
|
|
2501
|
+
)
|
|
2502
|
+
|
|
2503
|
+
lower_cost_feature = provider_features["features"]["lower_cost_models"]["supported"]
|
|
2504
|
+
risk = str(task.get("risk") or "unknown")
|
|
2505
|
+
quality_gate = str(task.get("quality_gate") or "unknown")
|
|
2506
|
+
if lower_cost_feature is False or risk == "high" or quality_gate == "fail":
|
|
2507
|
+
cheaper_decision = "not_recommended"
|
|
2508
|
+
elif risk == "low" and quality_gate in {"pass", "unknown"}:
|
|
2509
|
+
cheaper_decision = "candidate" if lower_cost_feature is True else "conditional"
|
|
2510
|
+
else:
|
|
2511
|
+
cheaper_decision = "conditional"
|
|
2512
|
+
recs.append(
|
|
2513
|
+
recommendation(
|
|
2514
|
+
"evaluate-cheaper-model-route",
|
|
2515
|
+
decision=cheaper_decision,
|
|
2516
|
+
priority="P2",
|
|
2517
|
+
rationale="Lower-cost model routing is acceptable only for low-risk or well-gated work and must include corrections, retries, and shifted cost.",
|
|
2518
|
+
prerequisites=["risk_tier_low_or_reviewed", "matched_replay", "corrections_guardrail", "retry_cost_accounting"],
|
|
2519
|
+
)
|
|
2520
|
+
)
|
|
2521
|
+
|
|
2522
|
+
if total_cost["shifted_cost_accounting"].get("missing_shifted_cost_warning"):
|
|
2523
|
+
recs.append(
|
|
2524
|
+
recommendation(
|
|
2525
|
+
"record-missing-shifted-costs",
|
|
2526
|
+
decision="required",
|
|
2527
|
+
priority="P1",
|
|
2528
|
+
rationale="Telemetry indicates external tokens, retries, or subagents but no shifted external/local cost component was supplied.",
|
|
2529
|
+
prerequisites=["external_cost_usd_or_local_cost_usd", "retry_or_subagent_cost_measurement"],
|
|
2530
|
+
)
|
|
2531
|
+
)
|
|
2532
|
+
return recs
|
|
2533
|
+
|
|
2534
|
+
|
|
2535
|
+
def route_advisor_command(args: argparse.Namespace) -> int:
|
|
2536
|
+
workload_raw, _truncated = load_json_input(args.workload, max_bytes=args.max_bytes)
|
|
2537
|
+
workload = require_json_object(workload_raw.get("workload") if isinstance(workload_raw, dict) and isinstance(workload_raw.get("workload"), dict) else workload_raw, "workload")
|
|
2538
|
+
profile = load_pricing_profile(args.pricing_profile, max_bytes=args.max_bytes)
|
|
2539
|
+
if args.usd_to_krw is not None:
|
|
2540
|
+
profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
|
|
2541
|
+
exchange = usd_to_krw(profile, None)
|
|
2542
|
+
request = workload.get("request") if isinstance(workload.get("request"), dict) else {}
|
|
2543
|
+
provider = advisory_label(getattr(args, "provider", None) or workload.get("provider") or (request.get("provider") if isinstance(request, dict) else None))
|
|
2544
|
+
model_raw = getattr(args, "model", None) or workload.get("model") or (request.get("model") if isinstance(request, dict) else None)
|
|
2545
|
+
model = route_model_label(model_raw)
|
|
2546
|
+
model_for_pricing = route_model_for_pricing(model_raw, model)
|
|
2547
|
+
provider_features = provider_features_for_workload(workload, args)
|
|
2548
|
+
task = route_task_metadata(workload, args)
|
|
2549
|
+
request_profile = request_profile_for_route(workload)
|
|
2550
|
+
total_cost = total_cost_accounting_for_route(workload, profile=profile, model=model_for_pricing, exchange=exchange)
|
|
2551
|
+
batchability = batchability_for_route(task, provider_features)
|
|
2552
|
+
recommendations = route_recommendations(
|
|
2553
|
+
task=task,
|
|
2554
|
+
provider_features=provider_features,
|
|
2555
|
+
request_profile=request_profile,
|
|
2556
|
+
batchability=batchability,
|
|
2557
|
+
total_cost=total_cost,
|
|
2558
|
+
)
|
|
2559
|
+
report = {
|
|
2560
|
+
"schema_version": SCHEMA_VERSION,
|
|
2561
|
+
"tool": TOOL_NAME,
|
|
2562
|
+
"mode": "route_advisor",
|
|
2563
|
+
"provider": {
|
|
2564
|
+
"name": provider,
|
|
2565
|
+
"model": model,
|
|
2566
|
+
"feature_matrix_authoritative": False,
|
|
2567
|
+
"feature_recheck_required": True,
|
|
2568
|
+
},
|
|
2569
|
+
"provider_features": provider_features,
|
|
2570
|
+
"task": task,
|
|
2571
|
+
"request_profile": request_profile,
|
|
2572
|
+
"total_cost_accounting": total_cost,
|
|
2573
|
+
"batchability": batchability,
|
|
2574
|
+
"route_recommendations": recommendations,
|
|
2575
|
+
"routing_decision": {
|
|
2576
|
+
"best_current_action": "measure_before_claim" if any(rec["decision"] == "required" for rec in recommendations) else "review_candidates",
|
|
2577
|
+
"candidate_count": sum(1 for rec in recommendations if rec.get("decision") == "candidate"),
|
|
2578
|
+
"conditional_count": sum(1 for rec in recommendations if rec.get("decision") == "conditional"),
|
|
2579
|
+
"not_recommended_count": sum(1 for rec in recommendations if rec.get("decision") == "not_recommended"),
|
|
2580
|
+
},
|
|
2581
|
+
"claim_boundary": {
|
|
2582
|
+
"hosted_api_token_savings_claim_allowed": False,
|
|
2583
|
+
"hosted_api_cost_savings_claim_allowed": False,
|
|
2584
|
+
"requires_matched_successful_tasks": True,
|
|
2585
|
+
"requires_non_inferior_quality": True,
|
|
2586
|
+
"requires_shifted_cost_accounting": True,
|
|
2587
|
+
"provider_features_are_caller_supplied_or_unknown": True,
|
|
2588
|
+
},
|
|
2589
|
+
"privacy": {
|
|
2590
|
+
"raw_prompt_emitted": False,
|
|
2591
|
+
"raw_request_emitted": False,
|
|
2592
|
+
"raw_paths_emitted": False,
|
|
2593
|
+
"workload_stored": False,
|
|
2594
|
+
"provider_call_performed": False,
|
|
2595
|
+
"queue_started": False,
|
|
2596
|
+
},
|
|
2597
|
+
}
|
|
2598
|
+
emit(report, json_mode=args.json)
|
|
2599
|
+
return 0
|
|
2600
|
+
|
|
2601
|
+
|
|
1848
2602
|
def usage_int(data: dict[str, Any], key: str) -> int:
|
|
1849
2603
|
value = data.get(key, 0)
|
|
1850
2604
|
try:
|
|
@@ -2276,6 +3030,15 @@ def emit(data: dict[str, Any], *, json_mode: bool) -> None:
|
|
|
2276
3030
|
elif mode == "compile":
|
|
2277
3031
|
findings = data.get("findings", []) if isinstance(data.get("findings"), list) else []
|
|
2278
3032
|
print(f"{TOOL_NAME}: compile findings={len(findings)}")
|
|
3033
|
+
elif mode == "route_advisor":
|
|
3034
|
+
batchability = data.get("batchability", {}) if isinstance(data.get("batchability"), dict) else {}
|
|
3035
|
+
routing = data.get("routing_decision", {}) if isinstance(data.get("routing_decision"), dict) else {}
|
|
3036
|
+
total = data.get("total_cost_accounting", {}) if isinstance(data.get("total_cost_accounting"), dict) else {}
|
|
3037
|
+
print(
|
|
3038
|
+
f"{TOOL_NAME}: route-advisor batch={batchability.get('level', 'unknown')} "
|
|
3039
|
+
f"candidates={routing.get('candidate_count', 0)} conditional={routing.get('conditional_count', 0)} "
|
|
3040
|
+
f"total_with_shift=${total.get('total_cost_with_shift_usd', 0)}"
|
|
3041
|
+
)
|
|
2279
3042
|
else:
|
|
2280
3043
|
summary = data.get("summary", {}) if isinstance(data.get("summary"), dict) else {}
|
|
2281
3044
|
print(f"{TOOL_NAME}: ledger entries={summary.get('entries', 0)}")
|
|
@@ -2329,6 +3092,22 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
2329
3092
|
compile_parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
|
|
2330
3093
|
compile_parser.set_defaults(func=compile_command)
|
|
2331
3094
|
|
|
3095
|
+
route = sub.add_parser(
|
|
3096
|
+
"route-advisor",
|
|
3097
|
+
help="advise on batchability, provider features, total cost, and route candidates",
|
|
3098
|
+
description="advise on batchability, provider features, total cost, and route candidates without provider calls or queue runtime",
|
|
3099
|
+
)
|
|
3100
|
+
route.add_argument("--workload", default="-", help="workload JSON path, or '-' for stdin")
|
|
3101
|
+
route.add_argument("--provider", help="provider label override; advisory only")
|
|
3102
|
+
route.add_argument("--model", help="model label override for pricing lookup; advisory only")
|
|
3103
|
+
route.add_argument("--feature", action="append", default=[], help="provider feature override such as batch_api=true or structured_outputs=false")
|
|
3104
|
+
route.add_argument("--latency-class", choices=sorted(ROUTE_ALLOWED_LATENCY_CLASSES), help="latency class override")
|
|
3105
|
+
route.add_argument("--risk", choices=sorted(ROUTE_ALLOWED_RISK_LEVELS), help="risk tier override")
|
|
3106
|
+
route.add_argument("--quality-gate", choices=sorted(ROUTE_ALLOWED_QUALITY_GATES), help="quality gate override")
|
|
3107
|
+
route.add_argument("--task-kind", help="task kind label such as extract, summarize, code_edit, or unknown")
|
|
3108
|
+
add_common_cost_args(route)
|
|
3109
|
+
route.set_defaults(func=route_advisor_command)
|
|
3110
|
+
|
|
2332
3111
|
return parser
|
|
2333
3112
|
|
|
2334
3113
|
|