costwright 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- costwright/__init__.py +1 -0
- costwright/caps.py +157 -0
- costwright/cli.py +265 -0
- costwright/extract.py +214 -0
- costwright/fusion.py +676 -0
- costwright/mapper.py +101 -0
- costwright/pack.py +69 -0
- costwright/report.py +115 -0
- costwright-0.1.0.dist-info/METADATA +164 -0
- costwright-0.1.0.dist-info/RECORD +14 -0
- costwright-0.1.0.dist-info/WHEEL +5 -0
- costwright-0.1.0.dist-info/entry_points.txt +2 -0
- costwright-0.1.0.dist-info/licenses/LICENSE +202 -0
- costwright-0.1.0.dist-info/top_level.txt +1 -0
costwright/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
costwright/caps.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""costwright caps — detección de constructores LLM sin cap de tokens + sugerencia por provider.
|
|
2
|
+
|
|
3
|
+
La tabla provider→parámetro proviene de §3.2 del paper (verificada contra docs primarias jun-2026):
|
|
4
|
+
el cap correcto es PARAMETER-specific, no provider-specific. NUNCA edita archivos: emite hallazgos
|
|
5
|
+
y, con --patch, un unified diff aplicable con `git apply` (decisión del council 002: P0-2).
|
|
6
|
+
"""
|
|
7
|
+
import ast
|
|
8
|
+
import difflib
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
# constructor → (provider, kwarg correcto, nota de degradación si aplica)
|
|
12
|
+
# Fuente: paper §3.2, docs primarias accedidas jun-2026.
|
|
13
|
+
PROVIDER_CAPS = {
|
|
14
|
+
# OpenAI / Azure (langchain + SDKs): chat completions usa max_tokens (no-reasoning) o
|
|
15
|
+
# max_completion_tokens (reasoning); Responses API usa max_output_tokens.
|
|
16
|
+
"ChatOpenAI": ("openai", "max_tokens", "reasoning models: usar max_completion_tokens (Chat) / max_output_tokens (Responses)"),
|
|
17
|
+
"AzureChatOpenAI": ("azure", "max_tokens", "reasoning models: max_completion_tokens — reasoning_tokens ⊆ completion_tokens (cap REAL)"),
|
|
18
|
+
"OpenAI": ("openai", "max_output_tokens", "Responses API: bounds reasoning+output"),
|
|
19
|
+
# Anthropic
|
|
20
|
+
"ChatAnthropic": ("anthropic", "max_tokens", "standard: budget_tokens < max_tokens ⟹ techo real. interleaved/adaptive thinking: el budget puede EXCEDER max_tokens (cap degrada)"),
|
|
21
|
+
"Anthropic": ("anthropic", "max_tokens", "ídem ChatAnthropic"),
|
|
22
|
+
# Google
|
|
23
|
+
"ChatGoogleGenerativeAI": ("gemini", "max_output_tokens", "thinking on: fijar TAMBIÉN thinking_budget — maxOutputTokens NO acota thinking (se factura aparte)"),
|
|
24
|
+
"ChatVertexAI": ("gemini", "max_output_tokens", "ídem Gemini"),
|
|
25
|
+
# otros (langchain)
|
|
26
|
+
"ChatBedrock": ("bedrock", "max_tokens", "replica la semántica Anthropic en modelos Claude"),
|
|
27
|
+
"ChatGroq": ("groq", "max_tokens", None),
|
|
28
|
+
"ChatMistralAI": ("mistral", "max_tokens", None),
|
|
29
|
+
"ChatOllama": ("ollama", "num_predict", "Ollama usa num_predict, no max_tokens"),
|
|
30
|
+
"init_chat_model": ("generic", "max_tokens", "el kwarg efectivo depende del provider resuelto en runtime — verificar"),
|
|
31
|
+
"LLM": ("crewai", "max_tokens", "CrewAI LLM wrapper"),
|
|
32
|
+
}
|
|
33
|
+
CAP_KWARGS = {"max_tokens", "max_output_tokens", "max_completion_tokens", "budget_tokens",
|
|
34
|
+
"max_tokens_to_sample", "maxOutputTokens", "num_predict", "thinking_budget"}
|
|
35
|
+
EXCLUDE_DIRS = {".venv", "venv", "node_modules", "site-packages", ".git", "__pycache__"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def call_name(n: ast.Call) -> str:
|
|
39
|
+
f = n.func
|
|
40
|
+
if isinstance(f, ast.Name):
|
|
41
|
+
return f.id
|
|
42
|
+
if isinstance(f, ast.Attribute):
|
|
43
|
+
return f.attr
|
|
44
|
+
return ""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def scan_file(path: Path):
|
|
48
|
+
"""Devuelve CapFindings: constructores LLM sin ningún cap kwarg."""
|
|
49
|
+
try:
|
|
50
|
+
src = path.read_text(encoding="utf-8", errors="ignore")
|
|
51
|
+
tree = ast.parse(src)
|
|
52
|
+
except (SyntaxError, OSError):
|
|
53
|
+
return [], None
|
|
54
|
+
findings = []
|
|
55
|
+
for node in ast.walk(tree):
|
|
56
|
+
if not isinstance(node, ast.Call):
|
|
57
|
+
continue
|
|
58
|
+
name = call_name(node)
|
|
59
|
+
if name not in PROVIDER_CAPS:
|
|
60
|
+
continue
|
|
61
|
+
kwargs_present = {k.arg for k in node.keywords if k.arg}
|
|
62
|
+
provider, kwarg, note = PROVIDER_CAPS[name]
|
|
63
|
+
# detección best-effort de reasoning model por el kwarg `model` (audit-3 gpt-5.5 P0):
|
|
64
|
+
# en Chat API los o-series/GPT-5 ignoran max_tokens; el cap real es max_completion_tokens
|
|
65
|
+
model_val = next((k.value.value for k in node.keywords
|
|
66
|
+
if k.arg == "model" and isinstance(k.value, ast.Constant)
|
|
67
|
+
and isinstance(k.value.value, str)), "")
|
|
68
|
+
reasoning = any(model_val.startswith(p) for p in
|
|
69
|
+
("o1", "o3", "o4", "gpt-5")) if model_val else False
|
|
70
|
+
# SOLO Chat-API constructors (audit-3 R2 gpt-5.5): el constructor `OpenAI` es
|
|
71
|
+
# Responses API y su cap correcto sigue siendo max_output_tokens, reasoning o no
|
|
72
|
+
if name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning:
|
|
73
|
+
kwarg = "max_completion_tokens"
|
|
74
|
+
note = "reasoning model en Chat API: max_tokens es IGNORADO; usar max_completion_tokens"
|
|
75
|
+
if kwargs_present & CAP_KWARGS:
|
|
76
|
+
# tiene algún cap — chequear degradaciones conocidas (§3.2)
|
|
77
|
+
if provider == "gemini" and "thinking_budget" not in kwargs_present:
|
|
78
|
+
findings.append({
|
|
79
|
+
"kind": "degraded", "constructor": name, "provider": provider,
|
|
80
|
+
"line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
|
|
81
|
+
"suggest_kwarg": "thinking_budget",
|
|
82
|
+
"why": "Gemini: maxOutputTokens NO acota thinking tokens (se facturan como output); fijar thinking_budget",
|
|
83
|
+
})
|
|
84
|
+
elif provider in ("anthropic", "bedrock"):
|
|
85
|
+
# audit-3 (gemini P0): Anthropic con cap igual degrada bajo interleaved/adaptive
|
|
86
|
+
findings.append({
|
|
87
|
+
"kind": "degraded", "constructor": name, "provider": provider,
|
|
88
|
+
"line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
|
|
89
|
+
"suggest_kwarg": None,
|
|
90
|
+
"why": "Anthropic: con interleaved/adaptive thinking el budget puede EXCEDER max_tokens — el techo solo vale en modo standard (budget_tokens < max_tokens)",
|
|
91
|
+
})
|
|
92
|
+
elif name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning and "max_completion_tokens" not in kwargs_present:
|
|
93
|
+
findings.append({
|
|
94
|
+
"kind": "degraded", "constructor": name, "provider": provider,
|
|
95
|
+
"line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
|
|
96
|
+
"suggest_kwarg": "max_completion_tokens",
|
|
97
|
+
"why": "reasoning model: max_tokens es ignorado en Chat API; el techo real es max_completion_tokens",
|
|
98
|
+
})
|
|
99
|
+
continue
|
|
100
|
+
findings.append({
|
|
101
|
+
"kind": "missing", "constructor": name, "provider": provider,
|
|
102
|
+
"line": node.lineno, "suggest_kwarg": kwarg, "note": note,
|
|
103
|
+
})
|
|
104
|
+
return findings, src
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def make_patch(path: Path, src: str, findings, cap_value: int) -> str:
|
|
108
|
+
"""Unified diff que agrega `kwarg=cap_value` a cada constructor sin cap.
|
|
109
|
+
Edición textual mínima: insertar el kwarg tras el paréntesis de apertura del call.
|
|
110
|
+
NUNCA escribe el archivo — solo el diff (council 002 P0-2)."""
|
|
111
|
+
lines = src.splitlines(keepends=True)
|
|
112
|
+
new_lines = list(lines)
|
|
113
|
+
# de abajo hacia arriba para no correr line numbers
|
|
114
|
+
for f in sorted((f for f in findings if f["kind"] == "missing"),
|
|
115
|
+
key=lambda x: -x["line"]):
|
|
116
|
+
i = f["line"] - 1
|
|
117
|
+
if i >= len(new_lines):
|
|
118
|
+
continue
|
|
119
|
+
line = new_lines[i]
|
|
120
|
+
ctor = f["constructor"]
|
|
121
|
+
# audit-3 (gemini P0): si hay >1 ocurrencia del constructor en la línea, NO parchear
|
|
122
|
+
# (la inserción textual no sabe cuál es cuál) — conservador, el hallazgo igual se reporta
|
|
123
|
+
if line.count(ctor + "(") != 1:
|
|
124
|
+
continue
|
|
125
|
+
idx = line.find(ctor + "(")
|
|
126
|
+
if idx < 0:
|
|
127
|
+
continue # constructor multilínea: skip (conservador)
|
|
128
|
+
insert_at = idx + len(ctor) + 1
|
|
129
|
+
rest = line[insert_at:]
|
|
130
|
+
sep = "" if rest.lstrip().startswith(")") else ", "
|
|
131
|
+
new_lines[i] = line[:insert_at] + f"{f['suggest_kwarg']}={cap_value}{sep}" + rest
|
|
132
|
+
if new_lines == lines:
|
|
133
|
+
return ""
|
|
134
|
+
rel = str(path)
|
|
135
|
+
return "".join(difflib.unified_diff(lines, new_lines,
|
|
136
|
+
fromfile=f"a/{rel}", tofile=f"b/{rel}"))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def scan_path(root: Path, max_files: int = 5000):
|
|
140
|
+
"""Escanea un árbol; devuelve (findings_por_archivo, n_escaneados)."""
|
|
141
|
+
out = {}
|
|
142
|
+
n = 0
|
|
143
|
+
for py in sorted(root.rglob("*.py")):
|
|
144
|
+
if any(part in EXCLUDE_DIRS for part in py.parts):
|
|
145
|
+
continue
|
|
146
|
+
# NO seguir symlinks — un repo hostil podría apuntar fuera del árbol escaneado
|
|
147
|
+
# (path traversal del scanner). Mismo guard que cli._find_units y pack.build_tarball.
|
|
148
|
+
if py.is_symlink() or any(p.is_symlink() for p in py.parents
|
|
149
|
+
if root in p.parents or p == root):
|
|
150
|
+
continue
|
|
151
|
+
n += 1
|
|
152
|
+
if n > max_files:
|
|
153
|
+
break
|
|
154
|
+
findings, src = scan_file(py)
|
|
155
|
+
if findings:
|
|
156
|
+
out[py] = (findings, src)
|
|
157
|
+
return out, min(n, max_files)
|
costwright/cli.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""costwright CLI — `costwright check` y `costwright caps`.
|
|
2
|
+
|
|
3
|
+
Exit codes (council 002 P0-1):
|
|
4
|
+
0 = el tool corrió (hallazgos = warnings, salvo política)
|
|
5
|
+
1 = la política --fail-on se violó
|
|
6
|
+
2 = error de infraestructura (path inválido, crash) — nunca severidad de hallazgo
|
|
7
|
+
"""
|
|
8
|
+
import argparse
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from costwright import __version__
|
|
14
|
+
from costwright import caps as caps_mod
|
|
15
|
+
from costwright import report as report_mod
|
|
16
|
+
from costwright.extract import extract_unit
|
|
17
|
+
from costwright.mapper import map_unit
|
|
18
|
+
import ast as _ast
|
|
19
|
+
|
|
20
|
+
EXCLUDE_DIRS = {".venv", "venv", "node_modules", "site-packages", ".git", "__pycache__"}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _find_units(root: Path, max_files: int):
|
|
24
|
+
"""Detecta graph units (constructores LangGraph/CrewAI/AgentsSDK) en el árbol."""
|
|
25
|
+
units = []
|
|
26
|
+
n = 0
|
|
27
|
+
for py in sorted(root.rglob("*.py")):
|
|
28
|
+
if any(part in EXCLUDE_DIRS for part in py.parts):
|
|
29
|
+
continue
|
|
30
|
+
# audit-3 (deepseek P0): NO seguir symlinks — un repo hostil podría apuntar
|
|
31
|
+
# fuera del árbol escaneado (path traversal del scanner)
|
|
32
|
+
if py.is_symlink() or any(p.is_symlink() for p in py.parents
|
|
33
|
+
if root in p.parents or p == root):
|
|
34
|
+
continue
|
|
35
|
+
n += 1
|
|
36
|
+
if n > max_files:
|
|
37
|
+
break
|
|
38
|
+
try:
|
|
39
|
+
src = py.read_text(encoding="utf-8", errors="ignore")
|
|
40
|
+
except OSError:
|
|
41
|
+
continue
|
|
42
|
+
# precheck laxo (audit-3: "Crew (" con espacio se perdía con "Crew(")
|
|
43
|
+
if not any(k in src for k in ("StateGraph", "Crew", "Runner.run")):
|
|
44
|
+
continue
|
|
45
|
+
try:
|
|
46
|
+
tree = _ast.parse(src)
|
|
47
|
+
except SyntaxError:
|
|
48
|
+
units.append({"file": py, "kind": "unknown", "line": 0, "syntax_error": True})
|
|
49
|
+
continue
|
|
50
|
+
for node in _ast.walk(tree):
|
|
51
|
+
if not isinstance(node, _ast.Call):
|
|
52
|
+
continue
|
|
53
|
+
f = node.func
|
|
54
|
+
nm = f.id if isinstance(f, _ast.Name) else (
|
|
55
|
+
f"{f.value.id}.{f.attr}" if isinstance(f, _ast.Attribute)
|
|
56
|
+
and isinstance(f.value, _ast.Name) else
|
|
57
|
+
(f.attr if isinstance(f, _ast.Attribute) else ""))
|
|
58
|
+
kind = None
|
|
59
|
+
if nm == "StateGraph":
|
|
60
|
+
kind = "langgraph"
|
|
61
|
+
elif nm == "Crew":
|
|
62
|
+
kind = "crewai"
|
|
63
|
+
elif nm in ("Runner.run", "Runner.run_sync", "Runner.run_streamed"):
|
|
64
|
+
kind = "agents_sdk"
|
|
65
|
+
if kind:
|
|
66
|
+
units.append({"file": py, "kind": kind, "line": node.lineno})
|
|
67
|
+
return units
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def cmd_check(args) -> int:
|
|
71
|
+
root = Path(args.path).resolve()
|
|
72
|
+
if not root.exists():
|
|
73
|
+
print(f"costwright: path not found: {root}", file=sys.stderr)
|
|
74
|
+
return 2
|
|
75
|
+
try:
|
|
76
|
+
found = _find_units(root, args.max_files)
|
|
77
|
+
mapped = []
|
|
78
|
+
for u in found:
|
|
79
|
+
rel = str(u["file"].relative_to(root))
|
|
80
|
+
if u.get("syntax_error"):
|
|
81
|
+
mapped.append({"category": "extractor-failure", "reason": "syntax",
|
|
82
|
+
"kind": u["kind"], "rel_path": rel, "line": 0})
|
|
83
|
+
continue
|
|
84
|
+
meta = {"unit_id": rel, "file": u["file"].name, "kind": u["kind"]}
|
|
85
|
+
ex = extract_unit(u["file"].parent, meta)
|
|
86
|
+
r = map_unit(ex, meta)
|
|
87
|
+
r["rel_path"] = rel
|
|
88
|
+
r["line"] = u["line"]
|
|
89
|
+
mapped.append(r)
|
|
90
|
+
rep = report_mod.to_v1(mapped)
|
|
91
|
+
if args.json:
|
|
92
|
+
print(report_mod.dumps(rep))
|
|
93
|
+
else:
|
|
94
|
+
if not rep["units"]:
|
|
95
|
+
print("costwright: no graph units found")
|
|
96
|
+
return 0
|
|
97
|
+
print(report_mod.pretty(rep, verbose=args.verbose))
|
|
98
|
+
# política opt-in (council 002 P0-1)
|
|
99
|
+
s = rep["summary"]
|
|
100
|
+
viol = {"reject": s["runaway"] > 0,
|
|
101
|
+
"default-dependent": s["runaway"] > 0 or s["default_dependent"] > 0,
|
|
102
|
+
"non-certifiable": (s["runaway"] > 0 or s["default_dependent"] > 0
|
|
103
|
+
or s["non_certifiable"] > 0)}
|
|
104
|
+
if args.fail_on and viol.get(args.fail_on, False):
|
|
105
|
+
print(f"costwright: policy --fail-on {args.fail_on} violated", file=sys.stderr)
|
|
106
|
+
return 1
|
|
107
|
+
return 0
|
|
108
|
+
except Exception as e: # noqa: BLE001
|
|
109
|
+
print(f"costwright: internal error: {type(e).__name__}: {e}", file=sys.stderr)
|
|
110
|
+
return 2
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def cmd_caps(args) -> int:
|
|
114
|
+
root = Path(args.path).resolve()
|
|
115
|
+
if not root.exists():
|
|
116
|
+
print(f"costwright: path not found: {root}", file=sys.stderr)
|
|
117
|
+
return 2
|
|
118
|
+
try:
|
|
119
|
+
per_file, scanned = caps_mod.scan_path(root, args.max_files)
|
|
120
|
+
if args.json:
|
|
121
|
+
out = {"schema": "costwright.caps.v1", "files_scanned": scanned, "findings": [
|
|
122
|
+
{**f, "file": str(p.relative_to(root))}
|
|
123
|
+
for p, (fs, _) in sorted(per_file.items()) for f in fs]}
|
|
124
|
+
print(json.dumps(out, indent=1, ensure_ascii=False, sort_keys=True))
|
|
125
|
+
else:
|
|
126
|
+
total = sum(len(fs) for fs, _ in per_file.values())
|
|
127
|
+
if not total:
|
|
128
|
+
print(f"costwright caps: all LLM constructors capped ({scanned} files scanned)")
|
|
129
|
+
return 0
|
|
130
|
+
for p, (fs, _) in sorted(per_file.items()):
|
|
131
|
+
rel = p.relative_to(root)
|
|
132
|
+
for f in fs:
|
|
133
|
+
if f["kind"] == "missing":
|
|
134
|
+
print(f" ✗ {rel}:{f['line']} {f['constructor']}(...) sin cap "
|
|
135
|
+
f"→ agregar {f['suggest_kwarg']}=<N>"
|
|
136
|
+
+ (f" [{f['note']}]" if f.get("note") else ""))
|
|
137
|
+
else:
|
|
138
|
+
print(f" ▲ {rel}:{f['line']} {f['constructor']}: {f['why']}")
|
|
139
|
+
print(f"\n {total} finding(s) in {len(per_file)} file(s) "
|
|
140
|
+
f"({scanned} scanned). Use --patch to emit a unified diff.")
|
|
141
|
+
if args.patch:
|
|
142
|
+
chunks = []
|
|
143
|
+
for p, (fs, src) in sorted(per_file.items()):
|
|
144
|
+
d = caps_mod.make_patch(p.relative_to(root), src, fs, args.cap)
|
|
145
|
+
if d:
|
|
146
|
+
chunks.append(d)
|
|
147
|
+
patch = "".join(chunks)
|
|
148
|
+
if args.patch == "-":
|
|
149
|
+
sys.stdout.write(patch)
|
|
150
|
+
else:
|
|
151
|
+
Path(args.patch).write_text(patch)
|
|
152
|
+
print(f" patch written to {args.patch} (apply with: git apply {args.patch})")
|
|
153
|
+
return 0
|
|
154
|
+
except Exception as e: # noqa: BLE001
|
|
155
|
+
print(f"costwright: internal error: {type(e).__name__}: {e}", file=sys.stderr)
|
|
156
|
+
return 2
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _load_json(path: str):
|
|
160
|
+
return json.loads(Path(path).read_text(encoding="utf-8"))
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _workflow_digest(path: Path) -> str:
|
|
164
|
+
"""Bind the bundle to the analyzed artifact (anti-substitution). Hash the EXACT file bytes (not
|
|
165
|
+
decoded text — `errors='ignore'` could let two byte-distinct files collide). A single file →
|
|
166
|
+
digest its bytes; a directory → a deterministic manifest {rel_path: sha256(bytes)} over the *.py."""
|
|
167
|
+
from costwright import fusion
|
|
168
|
+
if path.is_dir():
|
|
169
|
+
manifest = {}
|
|
170
|
+
for py in sorted(path.rglob("*.py")):
|
|
171
|
+
if any(part in EXCLUDE_DIRS for part in py.parts):
|
|
172
|
+
continue
|
|
173
|
+
manifest[str(py.relative_to(path))] = fusion.digest_bytes(py.read_bytes())
|
|
174
|
+
return fusion.digest(manifest)
|
|
175
|
+
return fusion.digest_bytes(path.read_bytes())
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def cmd_fuse(args) -> int:
|
|
179
|
+
from costwright import __version__, fusion
|
|
180
|
+
try:
|
|
181
|
+
cost = _load_json(args.cost)
|
|
182
|
+
risk = _load_json(args.risk)
|
|
183
|
+
except (OSError, ValueError) as e: # ValueError covers json.JSONDecodeError
|
|
184
|
+
print(f"costwright: cannot read input JSON: {type(e).__name__}: {e}", file=sys.stderr)
|
|
185
|
+
return 2
|
|
186
|
+
claim = None
|
|
187
|
+
if args.claim_file:
|
|
188
|
+
try:
|
|
189
|
+
claim = Path(args.claim_file).read_text(encoding="utf-8")
|
|
190
|
+
except OSError as e:
|
|
191
|
+
print(f"costwright: cannot read --claim-file: {e}", file=sys.stderr)
|
|
192
|
+
return 2
|
|
193
|
+
wf_digest = None
|
|
194
|
+
if args.workflow:
|
|
195
|
+
try:
|
|
196
|
+
wf_digest = _workflow_digest(Path(args.workflow))
|
|
197
|
+
except OSError as e:
|
|
198
|
+
print(f"costwright: cannot read --workflow: {e}", file=sys.stderr)
|
|
199
|
+
return 2
|
|
200
|
+
try:
|
|
201
|
+
bundle = fusion.fuse(cost, risk, run_id=args.run_id,
|
|
202
|
+
costwright_version=args.costwright_version or __version__,
|
|
203
|
+
verify_version=args.verify_version,
|
|
204
|
+
created_unix=args.created_unix,
|
|
205
|
+
workflow_digest=wf_digest,
|
|
206
|
+
calibrator_digest=args.calibrator_digest, claim=claim)
|
|
207
|
+
except ValueError as e:
|
|
208
|
+
print(f"costwright: invalid certificate input: {e}", file=sys.stderr)
|
|
209
|
+
return 2
|
|
210
|
+
print(fusion.dumps(bundle) if args.json else fusion.pretty(bundle))
|
|
211
|
+
return 0
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def main(argv=None) -> int:
|
|
215
|
+
p = argparse.ArgumentParser(
|
|
216
|
+
prog="costwright",
|
|
217
|
+
description="Static budget certificates for LLM-agent workflows "
|
|
218
|
+
"(LangGraph / CrewAI / OpenAI Agents SDK). Never executes your code.")
|
|
219
|
+
p.add_argument("--version", action="version", version=f"costwright {__version__}")
|
|
220
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
221
|
+
|
|
222
|
+
c = sub.add_parser("check", help="map workflows to the typed-budget calculus and report bounds")
|
|
223
|
+
c.add_argument("path", nargs="?", default=".")
|
|
224
|
+
c.add_argument("--json", action="store_true", help="emit costwright.v1 JSON")
|
|
225
|
+
c.add_argument("--verbose", action="store_true", help="also list certifiable units")
|
|
226
|
+
c.add_argument("--fail-on", choices=["reject", "default-dependent", "non-certifiable"],
|
|
227
|
+
help="severity threshold: exit 1 on findings of this severity OR WORSE "
|
|
228
|
+
"(reject ⊂ default-dependent ⊂ non-certifiable). Default: never fail")
|
|
229
|
+
c.add_argument("--max-files", type=int, default=5000)
|
|
230
|
+
c.set_defaults(fn=cmd_check)
|
|
231
|
+
|
|
232
|
+
k = sub.add_parser("caps", help="find LLM constructors without a token cap; suggest the right kwarg per provider")
|
|
233
|
+
k.add_argument("path", nargs="?", default=".")
|
|
234
|
+
k.add_argument("--json", action="store_true")
|
|
235
|
+
k.add_argument("--patch", metavar="FILE", help="write a unified diff adding caps ('-' = stdout); NEVER edits files")
|
|
236
|
+
k.add_argument("--cap", type=int, default=1024, help="cap value used in --patch (default 1024)")
|
|
237
|
+
k.add_argument("--max-files", type=int, default=5000)
|
|
238
|
+
k.set_defaults(fn=cmd_caps)
|
|
239
|
+
|
|
240
|
+
from costwright.pack import cmd_pack
|
|
241
|
+
pk = sub.add_parser("pack", help="build a deterministic .py-only tarball for server-side certification")
|
|
242
|
+
pk.add_argument("path", nargs="?", default=".")
|
|
243
|
+
pk.add_argument("-o", "--output", default="costwright-artifact.tgz")
|
|
244
|
+
pk.set_defaults(fn=cmd_pack)
|
|
245
|
+
|
|
246
|
+
fz = sub.add_parser("fuse", help="bundle a costwright.v1 cost cert + an eleata-verify risk cert into a "
|
|
247
|
+
"costwright.fusion.v1 audit record (the cartesian product — NOT a joint guarantee)")
|
|
248
|
+
fz.add_argument("--cost", required=True, metavar="FILE", help="costwright.v1 JSON (from `costwright check --json`)")
|
|
249
|
+
fz.add_argument("--risk", required=True, metavar="FILE", help="eleata-verify VerifyResult.to_dict() JSON")
|
|
250
|
+
fz.add_argument("--run-id", required=True, help="binds both certificates to the same run")
|
|
251
|
+
fz.add_argument("--costwright-version", default=None, help="costwright that produced --cost (default: this costwright)")
|
|
252
|
+
fz.add_argument("--verify-version", default="unknown", help="pinned eleata-verify version that produced --risk")
|
|
253
|
+
fz.add_argument("--workflow", metavar="PATH", help="file/dir of the analyzed workflow → workflow_digest (binding)")
|
|
254
|
+
fz.add_argument("--claim-file", metavar="FILE", help="the verified claim text → claim_digest (binding)")
|
|
255
|
+
fz.add_argument("--calibrator-digest", default=None, help="digest/id of the calibrator used (binding)")
|
|
256
|
+
fz.add_argument("--created-unix", type=int, default=None, help="caller-stamped run timestamp (optional)")
|
|
257
|
+
fz.add_argument("--json", action="store_true", help="emit costwright.fusion.v1 JSON")
|
|
258
|
+
fz.set_defaults(fn=cmd_fuse)
|
|
259
|
+
|
|
260
|
+
args = p.parse_args(argv)
|
|
261
|
+
return args.fn(args)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
if __name__ == "__main__":
|
|
265
|
+
sys.exit(main())
|
costwright/extract.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""F2a — extract: por graph unit, AST → ExtractionResult.
|
|
2
|
+
|
|
3
|
+
Emite: nodos, edges (static/conditional-literal/conditional-fn/dynamic-goto/send), ciclos,
|
|
4
|
+
bounds con fuente (D2/D8), caps de tokens, features no soportadas. 100% estático (D3).
|
|
5
|
+
"""
|
|
6
|
+
import ast, json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
# D8 — tabla verificada 2026-06-12 (fuentes en spec.md)
|
|
10
|
+
DEFAULTS = {
|
|
11
|
+
"langgraph_recursion_limit_modern": 1000, # >=1.0.6
|
|
12
|
+
"langgraph_recursion_limit_legacy": 25, # <1.0.6
|
|
13
|
+
"crewai_max_iter": 20,
|
|
14
|
+
"agents_sdk_max_turns": 10,
|
|
15
|
+
}
|
|
16
|
+
CAP_KWARGS = {"max_tokens", "max_output_tokens", "max_completion_tokens", "budget_tokens",
|
|
17
|
+
"max_tokens_to_sample", "maxOutputTokens"}
|
|
18
|
+
|
|
19
|
+
def call_name(n: ast.Call) -> str:
|
|
20
|
+
f = n.func
|
|
21
|
+
if isinstance(f, ast.Name): return f.id
|
|
22
|
+
if isinstance(f, ast.Attribute):
|
|
23
|
+
parts = []
|
|
24
|
+
cur = f
|
|
25
|
+
while isinstance(cur, ast.Attribute):
|
|
26
|
+
parts.append(cur.attr); cur = cur.value
|
|
27
|
+
if isinstance(cur, ast.Name): parts.append(cur.id)
|
|
28
|
+
return ".".join(reversed(parts))
|
|
29
|
+
return ""
|
|
30
|
+
|
|
31
|
+
def const_of(node):
|
|
32
|
+
if isinstance(node, ast.Constant): return node.value
|
|
33
|
+
if isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.USub) and isinstance(node.operand, ast.Constant):
|
|
34
|
+
return -node.operand.value
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
class Extractor(ast.NodeVisitor):
|
|
38
|
+
def __init__(s, src):
|
|
39
|
+
s.src = src
|
|
40
|
+
s.nodes = [] # (name|None, lineno)
|
|
41
|
+
s.edges = [] # dicts {kind, src, dst, line}
|
|
42
|
+
s.bounds = [] # {param, value|None, source, line}
|
|
43
|
+
s.caps = [] # {kwarg, value|None, line}
|
|
44
|
+
s.features = [] # {feature, line} no-soportadas / señales
|
|
45
|
+
s.llm_calls = 0 # heurística: invocaciones a modelos dentro del archivo
|
|
46
|
+
s.while_true_invokes = []
|
|
47
|
+
s._in_while_true = 0
|
|
48
|
+
|
|
49
|
+
def visit_While(s, n):
|
|
50
|
+
is_true = isinstance(n.test, ast.Constant) and n.test.value is True
|
|
51
|
+
# REPL interactivo: while True con input() en el cuerpo — el humano es el loop,
|
|
52
|
+
# NO es un driver runaway autónomo (revisión D5: u082/u229 eran chat-REPLs)
|
|
53
|
+
if is_true:
|
|
54
|
+
body_src = ast.dump(n)
|
|
55
|
+
if "id='input'" in body_src or 'id="input"' in body_src:
|
|
56
|
+
s.features.append({"feature": "interactive-repl", "line": n.lineno})
|
|
57
|
+
s.generic_visit(n); return
|
|
58
|
+
if is_true: s._in_while_true += 1
|
|
59
|
+
s.generic_visit(n)
|
|
60
|
+
if is_true: s._in_while_true -= 1
|
|
61
|
+
|
|
62
|
+
def visit_Call(s, n):
|
|
63
|
+
name = call_name(n)
|
|
64
|
+
last = name.split(".")[-1]
|
|
65
|
+
|
|
66
|
+
if last == "add_node":
|
|
67
|
+
arg0 = n.args[0] if n.args else None
|
|
68
|
+
nname = const_of(arg0) if arg0 is not None else None
|
|
69
|
+
if not isinstance(nname, str) and len(n.args) == 1:
|
|
70
|
+
# LangGraph permite add_node(fn) — 1 SOLO arg: el nombre se infiere de
|
|
71
|
+
# fn.__name__ → nodo nombrado estáticamente (rev D5). Con 2 args, arg0 variable
|
|
72
|
+
# = NOMBRE dinámico (string en runtime) → queda None (dinámico).
|
|
73
|
+
if isinstance(arg0, ast.Name): nname = arg0.id
|
|
74
|
+
elif isinstance(arg0, ast.Attribute): nname = arg0.attr
|
|
75
|
+
s.nodes.append((nname if isinstance(nname, str) else None, n.lineno))
|
|
76
|
+
# subgraph como nodo: add_node(name, X.compile()) — el handler es OTRO grafo;
|
|
77
|
+
# el costo del nodo no es 1 call (rev D5: u139). delegate() lo cubriría; el
|
|
78
|
+
# harness v1 no lo implementa → feature medida.
|
|
79
|
+
for a in list(n.args[1:]) + [k.value for k in n.keywords]:
|
|
80
|
+
if isinstance(a, ast.Call) and call_name(a).split(".")[-1] == "compile":
|
|
81
|
+
s.features.append({"feature": "subgraph-node", "line": n.lineno})
|
|
82
|
+
elif last == "add_edge":
|
|
83
|
+
a = const_or_endref(n.args[0]) if len(n.args) > 0 else None
|
|
84
|
+
b = const_or_endref(n.args[1]) if len(n.args) > 1 else None
|
|
85
|
+
s.edges.append({"kind": "static", "src": a, "dst": b, "line": n.lineno})
|
|
86
|
+
elif last == "add_conditional_edges":
|
|
87
|
+
# dst enumerable si hay dict literal en args/kwargs
|
|
88
|
+
mapping = None
|
|
89
|
+
for x in list(n.args) + [k.value for k in n.keywords]:
|
|
90
|
+
if isinstance(x, ast.Dict): mapping = x
|
|
91
|
+
if mapping is not None:
|
|
92
|
+
dsts = [const_or_endref(v) for v in mapping.values]
|
|
93
|
+
s.edges.append({"kind": "conditional-literal", "src": None, "dsts": dsts, "line": n.lineno})
|
|
94
|
+
else:
|
|
95
|
+
s.edges.append({"kind": "conditional-fn", "src": None, "dsts": None, "line": n.lineno})
|
|
96
|
+
elif last == "Send":
|
|
97
|
+
s.features.append({"feature": "send-fanout", "line": n.lineno})
|
|
98
|
+
elif last == "Command":
|
|
99
|
+
goto = next((k.value for k in n.keywords if k.arg == "goto"), None)
|
|
100
|
+
if goto is not None and const_of(goto) is None and not isinstance(goto, ast.List):
|
|
101
|
+
s.features.append({"feature": "dynamic-goto", "line": n.lineno})
|
|
102
|
+
elif goto is not None:
|
|
103
|
+
s.edges.append({"kind": "static", "src": None, "dst": const_of(goto), "line": n.lineno})
|
|
104
|
+
elif (last == "interrupt" or last == "NodeInterrupt"
|
|
105
|
+
or name.endswith("interrupt_before") or name.endswith("interrupt_after")):
|
|
106
|
+
s.features.append({"feature": "interrupt-human-in-loop", "line": n.lineno})
|
|
107
|
+
elif last in ("invoke", "stream", "ainvoke", "astream", "batch", "abatch", "kickoff",
|
|
108
|
+
"run", "run_sync", "run_streamed"):
|
|
109
|
+
s._scan_invoke(n)
|
|
110
|
+
if s._in_while_true: s.while_true_invokes.append(n.lineno)
|
|
111
|
+
elif last == "compile":
|
|
112
|
+
for k in n.keywords:
|
|
113
|
+
if k.arg in ("interrupt_before", "interrupt_after"):
|
|
114
|
+
s.features.append({"feature": "interrupt-human-in-loop", "line": n.lineno})
|
|
115
|
+
elif last in ("Agent",):
|
|
116
|
+
mi = next((k for k in n.keywords if k.arg == "max_iter"), None)
|
|
117
|
+
if mi is not None:
|
|
118
|
+
s.bounds.append({"param": "max_iter", "value": const_of(mi.value),
|
|
119
|
+
"source": "explicit", "line": n.lineno})
|
|
120
|
+
# CrewAI Agent sin max_iter → default 20 (lo decide el mapper por-kind)
|
|
121
|
+
elif last == "Crew":
|
|
122
|
+
proc = next((k for k in n.keywords if k.arg == "process"), None)
|
|
123
|
+
if proc is not None and "hierarchical" in ast.dump(proc.value):
|
|
124
|
+
s.features.append({"feature": "hierarchical-manager", "line": n.lineno})
|
|
125
|
+
|
|
126
|
+
# caps de tokens en cualquier call (constructores de modelos, llamadas)
|
|
127
|
+
for k in n.keywords:
|
|
128
|
+
if k.arg in CAP_KWARGS:
|
|
129
|
+
s.caps.append({"kwarg": k.arg, "value": const_of(k.value), "line": n.lineno})
|
|
130
|
+
# heurística de llamadas a LLM
|
|
131
|
+
if last in ("ChatOpenAI", "ChatAnthropic", "ChatGoogleGenerativeAI", "ChatBedrock",
|
|
132
|
+
"AzureChatOpenAI", "ChatVertexAI", "OpenAI", "Anthropic", "LLM",
|
|
133
|
+
"init_chat_model", "ChatGroq", "ChatMistralAI", "ChatOllama"):
|
|
134
|
+
s.llm_calls += 1
|
|
135
|
+
s.generic_visit(n)
|
|
136
|
+
|
|
137
|
+
def _scan_invoke(s, n):
|
|
138
|
+
"""Busca recursion_limit / max_turns en el config del call-site (D2)."""
|
|
139
|
+
for k in n.keywords:
|
|
140
|
+
if k.arg == "max_turns":
|
|
141
|
+
# distinguir None LITERAL (desactivación deliberada) de expresión no-constante
|
|
142
|
+
# (bound real irrecuperable estáticamente) — revisión D5: u087 era settings.max_turns
|
|
143
|
+
none_lit = isinstance(k.value, ast.Constant) and k.value.value is None
|
|
144
|
+
s.bounds.append({"param": "max_turns", "value": const_of(k.value),
|
|
145
|
+
"none_literal": none_lit,
|
|
146
|
+
"source": "explicit", "line": n.lineno})
|
|
147
|
+
if k.arg == "config" and isinstance(k.value, ast.Dict):
|
|
148
|
+
for kk, vv in zip(k.value.keys, k.value.values):
|
|
149
|
+
if const_of(kk) == "recursion_limit":
|
|
150
|
+
s.bounds.append({"param": "recursion_limit", "value": const_of(vv),
|
|
151
|
+
"source": "explicit", "line": n.lineno})
|
|
152
|
+
|
|
153
|
+
def visit_Dict(s, n):
|
|
154
|
+
# config dicts armados aparte: {"recursion_limit": N, ...}
|
|
155
|
+
for kk, vv in zip(n.keys, n.values):
|
|
156
|
+
if const_of(kk) == "recursion_limit":
|
|
157
|
+
s.bounds.append({"param": "recursion_limit", "value": const_of(vv),
|
|
158
|
+
"source": "explicit", "line": n.lineno})
|
|
159
|
+
s.generic_visit(n)
|
|
160
|
+
|
|
161
|
+
def const_or_endref(node):
|
|
162
|
+
v = const_of(node)
|
|
163
|
+
if v is not None: return v
|
|
164
|
+
if isinstance(node, ast.Name) and node.id in ("START", "END"): return node.id
|
|
165
|
+
if isinstance(node, ast.Attribute) and node.attr in ("START", "END"): return node.attr
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
def find_cycles(nodes, edges):
|
|
169
|
+
"""DFS sobre edges con dst resuelto. Conservador: edges no resueltos no crean ciclo
|
|
170
|
+
(el mapper los trata como dynamic)."""
|
|
171
|
+
g = {}
|
|
172
|
+
for e in edges:
|
|
173
|
+
if e["kind"] == "static" and e.get("src") and e.get("dst") and e["dst"] != "END":
|
|
174
|
+
g.setdefault(e["src"], set()).add(e["dst"])
|
|
175
|
+
elif e["kind"] == "conditional-literal" and e.get("dsts"):
|
|
176
|
+
# src desconocido en muchos casos; si no hay src, no podemos cerrar ciclo → skip
|
|
177
|
+
if e.get("src"):
|
|
178
|
+
for d in e["dsts"]:
|
|
179
|
+
if d and d != "END": g.setdefault(e["src"], set()).add(d)
|
|
180
|
+
WHITE, GRAY, BLACK = 0, 1, 2
|
|
181
|
+
color = {u: WHITE for u in g}
|
|
182
|
+
cyc = False
|
|
183
|
+
def dfs(u):
|
|
184
|
+
nonlocal cyc
|
|
185
|
+
color[u] = GRAY
|
|
186
|
+
for v in g.get(u, ()):
|
|
187
|
+
if color.get(v, WHITE) == GRAY: cyc = True
|
|
188
|
+
elif color.get(v, WHITE) == WHITE: dfs(v)
|
|
189
|
+
color[u] = BLACK
|
|
190
|
+
for u in list(g):
|
|
191
|
+
if color[u] == WHITE: dfs(u)
|
|
192
|
+
return cyc
|
|
193
|
+
|
|
194
|
+
def extract_unit(unit_dir: Path, meta: dict) -> dict:
|
|
195
|
+
f = unit_dir / meta["file"]
|
|
196
|
+
src = f.read_text(encoding="utf-8", errors="ignore")
|
|
197
|
+
try:
|
|
198
|
+
tree = ast.parse(src)
|
|
199
|
+
except SyntaxError:
|
|
200
|
+
return {"unit_id": meta["unit_id"], "status": "extractor-failure", "reason": "syntax"}
|
|
201
|
+
ex = Extractor(src); ex.visit(tree)
|
|
202
|
+
has_cycle = find_cycles(ex.nodes, ex.edges)
|
|
203
|
+
# ciclo "implícito" típico LangGraph: conditional edges que vuelven a un nodo previo —
|
|
204
|
+
# si hay conditional-literal cuyos dsts incluyen un nodo definido, lo tratamos como posible ciclo
|
|
205
|
+
cond_back = any(e["kind"] == "conditional-literal" and e.get("dsts") and
|
|
206
|
+
any(d for d in e["dsts"] if d and d != "END") for e in ex.edges)
|
|
207
|
+
return {
|
|
208
|
+
"unit_id": meta["unit_id"], "kind": meta["kind"], "status": "ok",
|
|
209
|
+
"n_nodes": len(ex.nodes), "n_nodes_named": sum(1 for n, _ in ex.nodes if n),
|
|
210
|
+
"n_nodes_dynamic": sum(1 for n, _ in ex.nodes if n is None),
|
|
211
|
+
"edges": ex.edges, "has_static_cycle": has_cycle, "cond_may_cycle": cond_back,
|
|
212
|
+
"bounds": ex.bounds, "caps": ex.caps, "features": ex.features,
|
|
213
|
+
"llm_constructors": ex.llm_calls, "while_true_invokes": ex.while_true_invokes,
|
|
214
|
+
}
|