costwright 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
costwright/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
costwright/caps.py ADDED
@@ -0,0 +1,157 @@
1
+ """costwright caps — detección de constructores LLM sin cap de tokens + sugerencia por provider.
2
+
3
+ La tabla provider→parámetro proviene de §3.2 del paper (verificada contra docs primarias jun-2026):
4
+ el cap correcto es PARAMETER-specific, no provider-specific. NUNCA edita archivos: emite hallazgos
5
+ y, con --patch, un unified diff aplicable con `git apply` (decisión del council 002: P0-2).
6
+ """
7
+ import ast
8
+ import difflib
9
+ from pathlib import Path
10
+
11
+ # constructor → (provider, kwarg correcto, nota de degradación si aplica)
12
+ # Fuente: paper §3.2, docs primarias accedidas jun-2026.
13
+ PROVIDER_CAPS = {
14
+ # OpenAI / Azure (langchain + SDKs): chat completions usa max_tokens (no-reasoning) o
15
+ # max_completion_tokens (reasoning); Responses API usa max_output_tokens.
16
+ "ChatOpenAI": ("openai", "max_tokens", "reasoning models: usar max_completion_tokens (Chat) / max_output_tokens (Responses)"),
17
+ "AzureChatOpenAI": ("azure", "max_tokens", "reasoning models: max_completion_tokens — reasoning_tokens ⊆ completion_tokens (cap REAL)"),
18
+ "OpenAI": ("openai", "max_output_tokens", "Responses API: bounds reasoning+output"),
19
+ # Anthropic
20
+ "ChatAnthropic": ("anthropic", "max_tokens", "standard: budget_tokens < max_tokens ⟹ techo real. interleaved/adaptive thinking: el budget puede EXCEDER max_tokens (cap degrada)"),
21
+ "Anthropic": ("anthropic", "max_tokens", "ídem ChatAnthropic"),
22
+ # Google
23
+ "ChatGoogleGenerativeAI": ("gemini", "max_output_tokens", "thinking on: fijar TAMBIÉN thinking_budget — maxOutputTokens NO acota thinking (se factura aparte)"),
24
+ "ChatVertexAI": ("gemini", "max_output_tokens", "ídem Gemini"),
25
+ # otros (langchain)
26
+ "ChatBedrock": ("bedrock", "max_tokens", "replica la semántica Anthropic en modelos Claude"),
27
+ "ChatGroq": ("groq", "max_tokens", None),
28
+ "ChatMistralAI": ("mistral", "max_tokens", None),
29
+ "ChatOllama": ("ollama", "num_predict", "Ollama usa num_predict, no max_tokens"),
30
+ "init_chat_model": ("generic", "max_tokens", "el kwarg efectivo depende del provider resuelto en runtime — verificar"),
31
+ "LLM": ("crewai", "max_tokens", "CrewAI LLM wrapper"),
32
+ }
33
+ CAP_KWARGS = {"max_tokens", "max_output_tokens", "max_completion_tokens", "budget_tokens",
34
+ "max_tokens_to_sample", "maxOutputTokens", "num_predict", "thinking_budget"}
35
+ EXCLUDE_DIRS = {".venv", "venv", "node_modules", "site-packages", ".git", "__pycache__"}
36
+
37
+
38
+ def call_name(n: ast.Call) -> str:
39
+ f = n.func
40
+ if isinstance(f, ast.Name):
41
+ return f.id
42
+ if isinstance(f, ast.Attribute):
43
+ return f.attr
44
+ return ""
45
+
46
+
47
+ def scan_file(path: Path):
48
+ """Devuelve CapFindings: constructores LLM sin ningún cap kwarg."""
49
+ try:
50
+ src = path.read_text(encoding="utf-8", errors="ignore")
51
+ tree = ast.parse(src)
52
+ except (SyntaxError, OSError):
53
+ return [], None
54
+ findings = []
55
+ for node in ast.walk(tree):
56
+ if not isinstance(node, ast.Call):
57
+ continue
58
+ name = call_name(node)
59
+ if name not in PROVIDER_CAPS:
60
+ continue
61
+ kwargs_present = {k.arg for k in node.keywords if k.arg}
62
+ provider, kwarg, note = PROVIDER_CAPS[name]
63
+ # detección best-effort de reasoning model por el kwarg `model` (audit-3 gpt-5.5 P0):
64
+ # en Chat API los o-series/GPT-5 ignoran max_tokens; el cap real es max_completion_tokens
65
+ model_val = next((k.value.value for k in node.keywords
66
+ if k.arg == "model" and isinstance(k.value, ast.Constant)
67
+ and isinstance(k.value.value, str)), "")
68
+ reasoning = any(model_val.startswith(p) for p in
69
+ ("o1", "o3", "o4", "gpt-5")) if model_val else False
70
+ # SOLO Chat-API constructors (audit-3 R2 gpt-5.5): el constructor `OpenAI` es
71
+ # Responses API y su cap correcto sigue siendo max_output_tokens, reasoning o no
72
+ if name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning:
73
+ kwarg = "max_completion_tokens"
74
+ note = "reasoning model en Chat API: max_tokens es IGNORADO; usar max_completion_tokens"
75
+ if kwargs_present & CAP_KWARGS:
76
+ # tiene algún cap — chequear degradaciones conocidas (§3.2)
77
+ if provider == "gemini" and "thinking_budget" not in kwargs_present:
78
+ findings.append({
79
+ "kind": "degraded", "constructor": name, "provider": provider,
80
+ "line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
81
+ "suggest_kwarg": "thinking_budget",
82
+ "why": "Gemini: maxOutputTokens NO acota thinking tokens (se facturan como output); fijar thinking_budget",
83
+ })
84
+ elif provider in ("anthropic", "bedrock"):
85
+ # audit-3 (gemini P0): Anthropic con cap igual degrada bajo interleaved/adaptive
86
+ findings.append({
87
+ "kind": "degraded", "constructor": name, "provider": provider,
88
+ "line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
89
+ "suggest_kwarg": None,
90
+ "why": "Anthropic: con interleaved/adaptive thinking el budget puede EXCEDER max_tokens — el techo solo vale en modo standard (budget_tokens < max_tokens)",
91
+ })
92
+ elif name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning and "max_completion_tokens" not in kwargs_present:
93
+ findings.append({
94
+ "kind": "degraded", "constructor": name, "provider": provider,
95
+ "line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
96
+ "suggest_kwarg": "max_completion_tokens",
97
+ "why": "reasoning model: max_tokens es ignorado en Chat API; el techo real es max_completion_tokens",
98
+ })
99
+ continue
100
+ findings.append({
101
+ "kind": "missing", "constructor": name, "provider": provider,
102
+ "line": node.lineno, "suggest_kwarg": kwarg, "note": note,
103
+ })
104
+ return findings, src
105
+
106
+
107
+ def make_patch(path: Path, src: str, findings, cap_value: int) -> str:
108
+ """Unified diff que agrega `kwarg=cap_value` a cada constructor sin cap.
109
+ Edición textual mínima: insertar el kwarg tras el paréntesis de apertura del call.
110
+ NUNCA escribe el archivo — solo el diff (council 002 P0-2)."""
111
+ lines = src.splitlines(keepends=True)
112
+ new_lines = list(lines)
113
+ # de abajo hacia arriba para no correr line numbers
114
+ for f in sorted((f for f in findings if f["kind"] == "missing"),
115
+ key=lambda x: -x["line"]):
116
+ i = f["line"] - 1
117
+ if i >= len(new_lines):
118
+ continue
119
+ line = new_lines[i]
120
+ ctor = f["constructor"]
121
+ # audit-3 (gemini P0): si hay >1 ocurrencia del constructor en la línea, NO parchear
122
+ # (la inserción textual no sabe cuál es cuál) — conservador, el hallazgo igual se reporta
123
+ if line.count(ctor + "(") != 1:
124
+ continue
125
+ idx = line.find(ctor + "(")
126
+ if idx < 0:
127
+ continue # constructor multilínea: skip (conservador)
128
+ insert_at = idx + len(ctor) + 1
129
+ rest = line[insert_at:]
130
+ sep = "" if rest.lstrip().startswith(")") else ", "
131
+ new_lines[i] = line[:insert_at] + f"{f['suggest_kwarg']}={cap_value}{sep}" + rest
132
+ if new_lines == lines:
133
+ return ""
134
+ rel = str(path)
135
+ return "".join(difflib.unified_diff(lines, new_lines,
136
+ fromfile=f"a/{rel}", tofile=f"b/{rel}"))
137
+
138
+
139
+ def scan_path(root: Path, max_files: int = 5000):
140
+ """Escanea un árbol; devuelve (findings_por_archivo, n_escaneados)."""
141
+ out = {}
142
+ n = 0
143
+ for py in sorted(root.rglob("*.py")):
144
+ if any(part in EXCLUDE_DIRS for part in py.parts):
145
+ continue
146
+ # NO seguir symlinks — un repo hostil podría apuntar fuera del árbol escaneado
147
+ # (path traversal del scanner). Mismo guard que cli._find_units y pack.build_tarball.
148
+ if py.is_symlink() or any(p.is_symlink() for p in py.parents
149
+ if root in p.parents or p == root):
150
+ continue
151
+ n += 1
152
+ if n > max_files:
153
+ break
154
+ findings, src = scan_file(py)
155
+ if findings:
156
+ out[py] = (findings, src)
157
+ return out, min(n, max_files)
costwright/cli.py ADDED
@@ -0,0 +1,265 @@
1
+ """costwright CLI — `costwright check` y `costwright caps`.
2
+
3
+ Exit codes (council 002 P0-1):
4
+ 0 = el tool corrió (hallazgos = warnings, salvo política)
5
+ 1 = la política --fail-on se violó
6
+ 2 = error de infraestructura (path inválido, crash) — nunca severidad de hallazgo
7
+ """
8
+ import argparse
9
+ import json
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ from costwright import __version__
14
+ from costwright import caps as caps_mod
15
+ from costwright import report as report_mod
16
+ from costwright.extract import extract_unit
17
+ from costwright.mapper import map_unit
18
+ import ast as _ast
19
+
20
+ EXCLUDE_DIRS = {".venv", "venv", "node_modules", "site-packages", ".git", "__pycache__"}
21
+
22
+
23
+ def _find_units(root: Path, max_files: int):
24
+ """Detecta graph units (constructores LangGraph/CrewAI/AgentsSDK) en el árbol."""
25
+ units = []
26
+ n = 0
27
+ for py in sorted(root.rglob("*.py")):
28
+ if any(part in EXCLUDE_DIRS for part in py.parts):
29
+ continue
30
+ # audit-3 (deepseek P0): NO seguir symlinks — un repo hostil podría apuntar
31
+ # fuera del árbol escaneado (path traversal del scanner)
32
+ if py.is_symlink() or any(p.is_symlink() for p in py.parents
33
+ if root in p.parents or p == root):
34
+ continue
35
+ n += 1
36
+ if n > max_files:
37
+ break
38
+ try:
39
+ src = py.read_text(encoding="utf-8", errors="ignore")
40
+ except OSError:
41
+ continue
42
+ # precheck laxo (audit-3: "Crew (" con espacio se perdía con "Crew(")
43
+ if not any(k in src for k in ("StateGraph", "Crew", "Runner.run")):
44
+ continue
45
+ try:
46
+ tree = _ast.parse(src)
47
+ except SyntaxError:
48
+ units.append({"file": py, "kind": "unknown", "line": 0, "syntax_error": True})
49
+ continue
50
+ for node in _ast.walk(tree):
51
+ if not isinstance(node, _ast.Call):
52
+ continue
53
+ f = node.func
54
+ nm = f.id if isinstance(f, _ast.Name) else (
55
+ f"{f.value.id}.{f.attr}" if isinstance(f, _ast.Attribute)
56
+ and isinstance(f.value, _ast.Name) else
57
+ (f.attr if isinstance(f, _ast.Attribute) else ""))
58
+ kind = None
59
+ if nm == "StateGraph":
60
+ kind = "langgraph"
61
+ elif nm == "Crew":
62
+ kind = "crewai"
63
+ elif nm in ("Runner.run", "Runner.run_sync", "Runner.run_streamed"):
64
+ kind = "agents_sdk"
65
+ if kind:
66
+ units.append({"file": py, "kind": kind, "line": node.lineno})
67
+ return units
68
+
69
+
70
+ def cmd_check(args) -> int:
71
+ root = Path(args.path).resolve()
72
+ if not root.exists():
73
+ print(f"costwright: path not found: {root}", file=sys.stderr)
74
+ return 2
75
+ try:
76
+ found = _find_units(root, args.max_files)
77
+ mapped = []
78
+ for u in found:
79
+ rel = str(u["file"].relative_to(root))
80
+ if u.get("syntax_error"):
81
+ mapped.append({"category": "extractor-failure", "reason": "syntax",
82
+ "kind": u["kind"], "rel_path": rel, "line": 0})
83
+ continue
84
+ meta = {"unit_id": rel, "file": u["file"].name, "kind": u["kind"]}
85
+ ex = extract_unit(u["file"].parent, meta)
86
+ r = map_unit(ex, meta)
87
+ r["rel_path"] = rel
88
+ r["line"] = u["line"]
89
+ mapped.append(r)
90
+ rep = report_mod.to_v1(mapped)
91
+ if args.json:
92
+ print(report_mod.dumps(rep))
93
+ else:
94
+ if not rep["units"]:
95
+ print("costwright: no graph units found")
96
+ return 0
97
+ print(report_mod.pretty(rep, verbose=args.verbose))
98
+ # política opt-in (council 002 P0-1)
99
+ s = rep["summary"]
100
+ viol = {"reject": s["runaway"] > 0,
101
+ "default-dependent": s["runaway"] > 0 or s["default_dependent"] > 0,
102
+ "non-certifiable": (s["runaway"] > 0 or s["default_dependent"] > 0
103
+ or s["non_certifiable"] > 0)}
104
+ if args.fail_on and viol.get(args.fail_on, False):
105
+ print(f"costwright: policy --fail-on {args.fail_on} violated", file=sys.stderr)
106
+ return 1
107
+ return 0
108
+ except Exception as e: # noqa: BLE001
109
+ print(f"costwright: internal error: {type(e).__name__}: {e}", file=sys.stderr)
110
+ return 2
111
+
112
+
113
+ def cmd_caps(args) -> int:
114
+ root = Path(args.path).resolve()
115
+ if not root.exists():
116
+ print(f"costwright: path not found: {root}", file=sys.stderr)
117
+ return 2
118
+ try:
119
+ per_file, scanned = caps_mod.scan_path(root, args.max_files)
120
+ if args.json:
121
+ out = {"schema": "costwright.caps.v1", "files_scanned": scanned, "findings": [
122
+ {**f, "file": str(p.relative_to(root))}
123
+ for p, (fs, _) in sorted(per_file.items()) for f in fs]}
124
+ print(json.dumps(out, indent=1, ensure_ascii=False, sort_keys=True))
125
+ else:
126
+ total = sum(len(fs) for fs, _ in per_file.values())
127
+ if not total:
128
+ print(f"costwright caps: all LLM constructors capped ({scanned} files scanned)")
129
+ return 0
130
+ for p, (fs, _) in sorted(per_file.items()):
131
+ rel = p.relative_to(root)
132
+ for f in fs:
133
+ if f["kind"] == "missing":
134
+ print(f" ✗ {rel}:{f['line']} {f['constructor']}(...) sin cap "
135
+ f"→ agregar {f['suggest_kwarg']}=<N>"
136
+ + (f" [{f['note']}]" if f.get("note") else ""))
137
+ else:
138
+ print(f" ▲ {rel}:{f['line']} {f['constructor']}: {f['why']}")
139
+ print(f"\n {total} finding(s) in {len(per_file)} file(s) "
140
+ f"({scanned} scanned). Use --patch to emit a unified diff.")
141
+ if args.patch:
142
+ chunks = []
143
+ for p, (fs, src) in sorted(per_file.items()):
144
+ d = caps_mod.make_patch(p.relative_to(root), src, fs, args.cap)
145
+ if d:
146
+ chunks.append(d)
147
+ patch = "".join(chunks)
148
+ if args.patch == "-":
149
+ sys.stdout.write(patch)
150
+ else:
151
+ Path(args.patch).write_text(patch)
152
+ print(f" patch written to {args.patch} (apply with: git apply {args.patch})")
153
+ return 0
154
+ except Exception as e: # noqa: BLE001
155
+ print(f"costwright: internal error: {type(e).__name__}: {e}", file=sys.stderr)
156
+ return 2
157
+
158
+
159
+ def _load_json(path: str):
160
+ return json.loads(Path(path).read_text(encoding="utf-8"))
161
+
162
+
163
+ def _workflow_digest(path: Path) -> str:
164
+ """Bind the bundle to the analyzed artifact (anti-substitution). Hash the EXACT file bytes (not
165
+ decoded text — `errors='ignore'` could let two byte-distinct files collide). A single file →
166
+ digest its bytes; a directory → a deterministic manifest {rel_path: sha256(bytes)} over the *.py."""
167
+ from costwright import fusion
168
+ if path.is_dir():
169
+ manifest = {}
170
+ for py in sorted(path.rglob("*.py")):
171
+ if any(part in EXCLUDE_DIRS for part in py.parts):
172
+ continue
173
+ manifest[str(py.relative_to(path))] = fusion.digest_bytes(py.read_bytes())
174
+ return fusion.digest(manifest)
175
+ return fusion.digest_bytes(path.read_bytes())
176
+
177
+
178
+ def cmd_fuse(args) -> int:
179
+ from costwright import __version__, fusion
180
+ try:
181
+ cost = _load_json(args.cost)
182
+ risk = _load_json(args.risk)
183
+ except (OSError, ValueError) as e: # ValueError covers json.JSONDecodeError
184
+ print(f"costwright: cannot read input JSON: {type(e).__name__}: {e}", file=sys.stderr)
185
+ return 2
186
+ claim = None
187
+ if args.claim_file:
188
+ try:
189
+ claim = Path(args.claim_file).read_text(encoding="utf-8")
190
+ except OSError as e:
191
+ print(f"costwright: cannot read --claim-file: {e}", file=sys.stderr)
192
+ return 2
193
+ wf_digest = None
194
+ if args.workflow:
195
+ try:
196
+ wf_digest = _workflow_digest(Path(args.workflow))
197
+ except OSError as e:
198
+ print(f"costwright: cannot read --workflow: {e}", file=sys.stderr)
199
+ return 2
200
+ try:
201
+ bundle = fusion.fuse(cost, risk, run_id=args.run_id,
202
+ costwright_version=args.costwright_version or __version__,
203
+ verify_version=args.verify_version,
204
+ created_unix=args.created_unix,
205
+ workflow_digest=wf_digest,
206
+ calibrator_digest=args.calibrator_digest, claim=claim)
207
+ except ValueError as e:
208
+ print(f"costwright: invalid certificate input: {e}", file=sys.stderr)
209
+ return 2
210
+ print(fusion.dumps(bundle) if args.json else fusion.pretty(bundle))
211
+ return 0
212
+
213
+
214
+ def main(argv=None) -> int:
215
+ p = argparse.ArgumentParser(
216
+ prog="costwright",
217
+ description="Static budget certificates for LLM-agent workflows "
218
+ "(LangGraph / CrewAI / OpenAI Agents SDK). Never executes your code.")
219
+ p.add_argument("--version", action="version", version=f"costwright {__version__}")
220
+ sub = p.add_subparsers(dest="cmd", required=True)
221
+
222
+ c = sub.add_parser("check", help="map workflows to the typed-budget calculus and report bounds")
223
+ c.add_argument("path", nargs="?", default=".")
224
+ c.add_argument("--json", action="store_true", help="emit costwright.v1 JSON")
225
+ c.add_argument("--verbose", action="store_true", help="also list certifiable units")
226
+ c.add_argument("--fail-on", choices=["reject", "default-dependent", "non-certifiable"],
227
+ help="severity threshold: exit 1 on findings of this severity OR WORSE "
228
+ "(reject ⊂ default-dependent ⊂ non-certifiable). Default: never fail")
229
+ c.add_argument("--max-files", type=int, default=5000)
230
+ c.set_defaults(fn=cmd_check)
231
+
232
+ k = sub.add_parser("caps", help="find LLM constructors without a token cap; suggest the right kwarg per provider")
233
+ k.add_argument("path", nargs="?", default=".")
234
+ k.add_argument("--json", action="store_true")
235
+ k.add_argument("--patch", metavar="FILE", help="write a unified diff adding caps ('-' = stdout); NEVER edits files")
236
+ k.add_argument("--cap", type=int, default=1024, help="cap value used in --patch (default 1024)")
237
+ k.add_argument("--max-files", type=int, default=5000)
238
+ k.set_defaults(fn=cmd_caps)
239
+
240
+ from costwright.pack import cmd_pack
241
+ pk = sub.add_parser("pack", help="build a deterministic .py-only tarball for server-side certification")
242
+ pk.add_argument("path", nargs="?", default=".")
243
+ pk.add_argument("-o", "--output", default="costwright-artifact.tgz")
244
+ pk.set_defaults(fn=cmd_pack)
245
+
246
+ fz = sub.add_parser("fuse", help="bundle a costwright.v1 cost cert + an eleata-verify risk cert into a "
247
+ "costwright.fusion.v1 audit record (the cartesian product — NOT a joint guarantee)")
248
+ fz.add_argument("--cost", required=True, metavar="FILE", help="costwright.v1 JSON (from `costwright check --json`)")
249
+ fz.add_argument("--risk", required=True, metavar="FILE", help="eleata-verify VerifyResult.to_dict() JSON")
250
+ fz.add_argument("--run-id", required=True, help="binds both certificates to the same run")
251
+ fz.add_argument("--costwright-version", default=None, help="costwright that produced --cost (default: this costwright)")
252
+ fz.add_argument("--verify-version", default="unknown", help="pinned eleata-verify version that produced --risk")
253
+ fz.add_argument("--workflow", metavar="PATH", help="file/dir of the analyzed workflow → workflow_digest (binding)")
254
+ fz.add_argument("--claim-file", metavar="FILE", help="the verified claim text → claim_digest (binding)")
255
+ fz.add_argument("--calibrator-digest", default=None, help="digest/id of the calibrator used (binding)")
256
+ fz.add_argument("--created-unix", type=int, default=None, help="caller-stamped run timestamp (optional)")
257
+ fz.add_argument("--json", action="store_true", help="emit costwright.fusion.v1 JSON")
258
+ fz.set_defaults(fn=cmd_fuse)
259
+
260
+ args = p.parse_args(argv)
261
+ return args.fn(args)
262
+
263
+
264
+ if __name__ == "__main__":
265
+ sys.exit(main())
costwright/extract.py ADDED
@@ -0,0 +1,214 @@
1
+ """F2a — extract: por graph unit, AST → ExtractionResult.
2
+
3
+ Emite: nodos, edges (static/conditional-literal/conditional-fn/dynamic-goto/send), ciclos,
4
+ bounds con fuente (D2/D8), caps de tokens, features no soportadas. 100% estático (D3).
5
+ """
6
+ import ast, json
7
+ from pathlib import Path
8
+
9
+ # D8 — tabla verificada 2026-06-12 (fuentes en spec.md)
10
+ DEFAULTS = {
11
+ "langgraph_recursion_limit_modern": 1000, # >=1.0.6
12
+ "langgraph_recursion_limit_legacy": 25, # <1.0.6
13
+ "crewai_max_iter": 20,
14
+ "agents_sdk_max_turns": 10,
15
+ }
16
+ CAP_KWARGS = {"max_tokens", "max_output_tokens", "max_completion_tokens", "budget_tokens",
17
+ "max_tokens_to_sample", "maxOutputTokens"}
18
+
19
+ def call_name(n: ast.Call) -> str:
20
+ f = n.func
21
+ if isinstance(f, ast.Name): return f.id
22
+ if isinstance(f, ast.Attribute):
23
+ parts = []
24
+ cur = f
25
+ while isinstance(cur, ast.Attribute):
26
+ parts.append(cur.attr); cur = cur.value
27
+ if isinstance(cur, ast.Name): parts.append(cur.id)
28
+ return ".".join(reversed(parts))
29
+ return ""
30
+
31
+ def const_of(node):
32
+ if isinstance(node, ast.Constant): return node.value
33
+ if isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.USub) and isinstance(node.operand, ast.Constant):
34
+ return -node.operand.value
35
+ return None
36
+
37
+ class Extractor(ast.NodeVisitor):
38
+ def __init__(s, src):
39
+ s.src = src
40
+ s.nodes = [] # (name|None, lineno)
41
+ s.edges = [] # dicts {kind, src, dst, line}
42
+ s.bounds = [] # {param, value|None, source, line}
43
+ s.caps = [] # {kwarg, value|None, line}
44
+ s.features = [] # {feature, line} no-soportadas / señales
45
+ s.llm_calls = 0 # heurística: invocaciones a modelos dentro del archivo
46
+ s.while_true_invokes = []
47
+ s._in_while_true = 0
48
+
49
+ def visit_While(s, n):
50
+ is_true = isinstance(n.test, ast.Constant) and n.test.value is True
51
+ # REPL interactivo: while True con input() en el cuerpo — el humano es el loop,
52
+ # NO es un driver runaway autónomo (revisión D5: u082/u229 eran chat-REPLs)
53
+ if is_true:
54
+ body_src = ast.dump(n)
55
+ if "id='input'" in body_src or 'id="input"' in body_src:
56
+ s.features.append({"feature": "interactive-repl", "line": n.lineno})
57
+ s.generic_visit(n); return
58
+ if is_true: s._in_while_true += 1
59
+ s.generic_visit(n)
60
+ if is_true: s._in_while_true -= 1
61
+
62
+ def visit_Call(s, n):
63
+ name = call_name(n)
64
+ last = name.split(".")[-1]
65
+
66
+ if last == "add_node":
67
+ arg0 = n.args[0] if n.args else None
68
+ nname = const_of(arg0) if arg0 is not None else None
69
+ if not isinstance(nname, str) and len(n.args) == 1:
70
+ # LangGraph permite add_node(fn) — 1 SOLO arg: el nombre se infiere de
71
+ # fn.__name__ → nodo nombrado estáticamente (rev D5). Con 2 args, arg0 variable
72
+ # = NOMBRE dinámico (string en runtime) → queda None (dinámico).
73
+ if isinstance(arg0, ast.Name): nname = arg0.id
74
+ elif isinstance(arg0, ast.Attribute): nname = arg0.attr
75
+ s.nodes.append((nname if isinstance(nname, str) else None, n.lineno))
76
+ # subgraph como nodo: add_node(name, X.compile()) — el handler es OTRO grafo;
77
+ # el costo del nodo no es 1 call (rev D5: u139). delegate() lo cubriría; el
78
+ # harness v1 no lo implementa → feature medida.
79
+ for a in list(n.args[1:]) + [k.value for k in n.keywords]:
80
+ if isinstance(a, ast.Call) and call_name(a).split(".")[-1] == "compile":
81
+ s.features.append({"feature": "subgraph-node", "line": n.lineno})
82
+ elif last == "add_edge":
83
+ a = const_or_endref(n.args[0]) if len(n.args) > 0 else None
84
+ b = const_or_endref(n.args[1]) if len(n.args) > 1 else None
85
+ s.edges.append({"kind": "static", "src": a, "dst": b, "line": n.lineno})
86
+ elif last == "add_conditional_edges":
87
+ # dst enumerable si hay dict literal en args/kwargs
88
+ mapping = None
89
+ for x in list(n.args) + [k.value for k in n.keywords]:
90
+ if isinstance(x, ast.Dict): mapping = x
91
+ if mapping is not None:
92
+ dsts = [const_or_endref(v) for v in mapping.values]
93
+ s.edges.append({"kind": "conditional-literal", "src": None, "dsts": dsts, "line": n.lineno})
94
+ else:
95
+ s.edges.append({"kind": "conditional-fn", "src": None, "dsts": None, "line": n.lineno})
96
+ elif last == "Send":
97
+ s.features.append({"feature": "send-fanout", "line": n.lineno})
98
+ elif last == "Command":
99
+ goto = next((k.value for k in n.keywords if k.arg == "goto"), None)
100
+ if goto is not None and const_of(goto) is None and not isinstance(goto, ast.List):
101
+ s.features.append({"feature": "dynamic-goto", "line": n.lineno})
102
+ elif goto is not None:
103
+ s.edges.append({"kind": "static", "src": None, "dst": const_of(goto), "line": n.lineno})
104
+ elif (last == "interrupt" or last == "NodeInterrupt"
105
+ or name.endswith("interrupt_before") or name.endswith("interrupt_after")):
106
+ s.features.append({"feature": "interrupt-human-in-loop", "line": n.lineno})
107
+ elif last in ("invoke", "stream", "ainvoke", "astream", "batch", "abatch", "kickoff",
108
+ "run", "run_sync", "run_streamed"):
109
+ s._scan_invoke(n)
110
+ if s._in_while_true: s.while_true_invokes.append(n.lineno)
111
+ elif last == "compile":
112
+ for k in n.keywords:
113
+ if k.arg in ("interrupt_before", "interrupt_after"):
114
+ s.features.append({"feature": "interrupt-human-in-loop", "line": n.lineno})
115
+ elif last in ("Agent",):
116
+ mi = next((k for k in n.keywords if k.arg == "max_iter"), None)
117
+ if mi is not None:
118
+ s.bounds.append({"param": "max_iter", "value": const_of(mi.value),
119
+ "source": "explicit", "line": n.lineno})
120
+ # CrewAI Agent sin max_iter → default 20 (lo decide el mapper por-kind)
121
+ elif last == "Crew":
122
+ proc = next((k for k in n.keywords if k.arg == "process"), None)
123
+ if proc is not None and "hierarchical" in ast.dump(proc.value):
124
+ s.features.append({"feature": "hierarchical-manager", "line": n.lineno})
125
+
126
+ # caps de tokens en cualquier call (constructores de modelos, llamadas)
127
+ for k in n.keywords:
128
+ if k.arg in CAP_KWARGS:
129
+ s.caps.append({"kwarg": k.arg, "value": const_of(k.value), "line": n.lineno})
130
+ # heurística de llamadas a LLM
131
+ if last in ("ChatOpenAI", "ChatAnthropic", "ChatGoogleGenerativeAI", "ChatBedrock",
132
+ "AzureChatOpenAI", "ChatVertexAI", "OpenAI", "Anthropic", "LLM",
133
+ "init_chat_model", "ChatGroq", "ChatMistralAI", "ChatOllama"):
134
+ s.llm_calls += 1
135
+ s.generic_visit(n)
136
+
137
+ def _scan_invoke(s, n):
138
+ """Busca recursion_limit / max_turns en el config del call-site (D2)."""
139
+ for k in n.keywords:
140
+ if k.arg == "max_turns":
141
+ # distinguir None LITERAL (desactivación deliberada) de expresión no-constante
142
+ # (bound real irrecuperable estáticamente) — revisión D5: u087 era settings.max_turns
143
+ none_lit = isinstance(k.value, ast.Constant) and k.value.value is None
144
+ s.bounds.append({"param": "max_turns", "value": const_of(k.value),
145
+ "none_literal": none_lit,
146
+ "source": "explicit", "line": n.lineno})
147
+ if k.arg == "config" and isinstance(k.value, ast.Dict):
148
+ for kk, vv in zip(k.value.keys, k.value.values):
149
+ if const_of(kk) == "recursion_limit":
150
+ s.bounds.append({"param": "recursion_limit", "value": const_of(vv),
151
+ "source": "explicit", "line": n.lineno})
152
+
153
+ def visit_Dict(s, n):
154
+ # config dicts armados aparte: {"recursion_limit": N, ...}
155
+ for kk, vv in zip(n.keys, n.values):
156
+ if const_of(kk) == "recursion_limit":
157
+ s.bounds.append({"param": "recursion_limit", "value": const_of(vv),
158
+ "source": "explicit", "line": n.lineno})
159
+ s.generic_visit(n)
160
+
161
+ def const_or_endref(node):
162
+ v = const_of(node)
163
+ if v is not None: return v
164
+ if isinstance(node, ast.Name) and node.id in ("START", "END"): return node.id
165
+ if isinstance(node, ast.Attribute) and node.attr in ("START", "END"): return node.attr
166
+ return None
167
+
168
+ def find_cycles(nodes, edges):
169
+ """DFS sobre edges con dst resuelto. Conservador: edges no resueltos no crean ciclo
170
+ (el mapper los trata como dynamic)."""
171
+ g = {}
172
+ for e in edges:
173
+ if e["kind"] == "static" and e.get("src") and e.get("dst") and e["dst"] != "END":
174
+ g.setdefault(e["src"], set()).add(e["dst"])
175
+ elif e["kind"] == "conditional-literal" and e.get("dsts"):
176
+ # src desconocido en muchos casos; si no hay src, no podemos cerrar ciclo → skip
177
+ if e.get("src"):
178
+ for d in e["dsts"]:
179
+ if d and d != "END": g.setdefault(e["src"], set()).add(d)
180
+ WHITE, GRAY, BLACK = 0, 1, 2
181
+ color = {u: WHITE for u in g}
182
+ cyc = False
183
+ def dfs(u):
184
+ nonlocal cyc
185
+ color[u] = GRAY
186
+ for v in g.get(u, ()):
187
+ if color.get(v, WHITE) == GRAY: cyc = True
188
+ elif color.get(v, WHITE) == WHITE: dfs(v)
189
+ color[u] = BLACK
190
+ for u in list(g):
191
+ if color[u] == WHITE: dfs(u)
192
+ return cyc
193
+
194
+ def extract_unit(unit_dir: Path, meta: dict) -> dict:
195
+ f = unit_dir / meta["file"]
196
+ src = f.read_text(encoding="utf-8", errors="ignore")
197
+ try:
198
+ tree = ast.parse(src)
199
+ except SyntaxError:
200
+ return {"unit_id": meta["unit_id"], "status": "extractor-failure", "reason": "syntax"}
201
+ ex = Extractor(src); ex.visit(tree)
202
+ has_cycle = find_cycles(ex.nodes, ex.edges)
203
+ # ciclo "implícito" típico LangGraph: conditional edges que vuelven a un nodo previo —
204
+ # si hay conditional-literal cuyos dsts incluyen un nodo definido, lo tratamos como posible ciclo
205
+ cond_back = any(e["kind"] == "conditional-literal" and e.get("dsts") and
206
+ any(d for d in e["dsts"] if d and d != "END") for e in ex.edges)
207
+ return {
208
+ "unit_id": meta["unit_id"], "kind": meta["kind"], "status": "ok",
209
+ "n_nodes": len(ex.nodes), "n_nodes_named": sum(1 for n, _ in ex.nodes if n),
210
+ "n_nodes_dynamic": sum(1 for n, _ in ex.nodes if n is None),
211
+ "edges": ex.edges, "has_static_cycle": has_cycle, "cond_may_cycle": cond_back,
212
+ "bounds": ex.bounds, "caps": ex.caps, "features": ex.features,
213
+ "llm_constructors": ex.llm_calls, "while_true_invokes": ex.while_true_invokes,
214
+ }