costwright 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: costwright
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Static budget certificates for LLM-agent workflows (LangGraph / CrewAI / OpenAI Agents SDK). Backed by a machine-checked (Lean 4) cost-soundness theorem.
5
5
  Author: Hernán Inverso
6
6
  License: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "costwright"
7
- version = "0.2.2"
7
+ version = "0.2.4"
8
8
  description = "Static budget certificates for LLM-agent workflows (LangGraph / CrewAI / OpenAI Agents SDK). Backed by a machine-checked (Lean 4) cost-soundness theorem."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1 @@
1
+ __version__ = "0.2.4"
@@ -62,25 +62,36 @@ def scan_file(path: Path):
62
62
  "why": "el archivo no parsea (SyntaxError) y menciona un constructor LLM — los token-caps "
63
63
  "NO se pudieron verificar; no asumir que están acotados"}], src
64
64
  return [], None
65
+ # `from langchain_openai import ChatOpenAI as LLM2` → an aliased constructor escapes the by-name lookup and
66
+ # its missing cap is silently NOT reported (codex/Cursor r81). Resolve `from X import Ctor as local` so the
67
+ # aliased call matches PROVIDER_CAPS.
68
+ alias = {a.asname: a.name for nd in ast.walk(tree) if isinstance(nd, ast.ImportFrom)
69
+ for a in nd.names if a.asname}
65
70
  findings = []
66
71
  for node in ast.walk(tree):
67
72
  if not isinstance(node, ast.Call):
68
73
  continue
69
- name = call_name(node)
70
- if name not in PROVIDER_CAPS:
74
+ name = call_name(node) # source name (what make_patch matches + what we display)
75
+ resolved = alias.get(name, name) # the real constructor, for the provider/kwarg lookup
76
+ if resolved not in PROVIDER_CAPS:
71
77
  continue
72
78
  kwargs_present = {k.arg for k in node.keywords if k.arg}
73
- provider, kwarg, note = PROVIDER_CAPS[name]
79
+ provider, kwarg, note = PROVIDER_CAPS[resolved]
74
80
  # detección best-effort de reasoning model por el kwarg `model` (audit-3 gpt-5.5 P0):
75
81
  # en Chat API los o-series/GPT-5 ignoran max_tokens; el cap real es max_completion_tokens
76
82
  model_val = next((k.value.value for k in node.keywords
77
83
  if k.arg == "model" and isinstance(k.value, ast.Constant)
78
84
  and isinstance(k.value.value, str)), "")
85
+ # the model can also be the FIRST POSITIONAL arg — `ChatOpenAI("gpt-5")` (codex/Cursor r76); otherwise
86
+ # a reasoning model passed positionally would escape the reasoning detection below.
87
+ if not model_val and node.args and isinstance(node.args[0], ast.Constant) \
88
+ and isinstance(node.args[0].value, str):
89
+ model_val = node.args[0].value
79
90
  reasoning = any(model_val.startswith(p) for p in
80
91
  ("o1", "o3", "o4", "gpt-5")) if model_val else False
81
92
  # SOLO Chat-API constructors (audit-3 R2 gpt-5.5): el constructor `OpenAI` es
82
93
  # Responses API y su cap correcto sigue siendo max_output_tokens, reasoning o no
83
- if name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning:
94
+ if resolved in ("ChatOpenAI", "AzureChatOpenAI") and reasoning:
84
95
  kwarg = "max_completion_tokens"
85
96
  note = "reasoning model en Chat API: max_tokens es IGNORADO; usar max_completion_tokens"
86
97
  # an EFFECTIVE cap = the CONSTRUCTOR'S correct kwarg (post reasoning-adjustment) present as a positive
@@ -122,7 +133,7 @@ def scan_file(path: Path):
122
133
  "suggest_kwarg": None,
123
134
  "why": "Anthropic: con interleaved/adaptive thinking el budget puede EXCEDER max_tokens — el techo solo vale en modo standard (budget_tokens < max_tokens)",
124
135
  })
125
- elif name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning and "max_completion_tokens" not in kwargs_present:
136
+ elif resolved in ("ChatOpenAI", "AzureChatOpenAI") and reasoning and "max_completion_tokens" not in kwargs_present:
126
137
  findings.append({
127
138
  "kind": "degraded", "constructor": name, "provider": provider,
128
139
  "line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
@@ -134,30 +145,38 @@ def scan_file(path: Path):
134
145
 
135
146
 
136
147
  def make_patch(path: Path, src: str, findings, cap_value: int) -> str:
137
- """Unified diff que agrega `kwarg=cap_value` a cada constructor sin cap.
138
- Edición textual mínima: insertar el kwarg tras el paréntesis de apertura del call.
139
- NUNCA escribe el archivo solo el diff (council 002 P0-2)."""
148
+ """Unified diff que agrega `kwarg=cap_value` como ÚLTIMO argumento de cada constructor sin cap.
149
+ Inserción basada en AST (robusta a args POSICIONALES, strings con paréntesis, y kwargs previos): el kwarg
150
+ va antes del `)` de cierre del call, NUNCA tras el `(` (eso produciría `Ctor(kwarg=…, "positional")` =
151
+ SyntaxError — codex/Cursor r76). NUNCA escribe el archivo — solo el diff (council 002 P0-2)."""
152
+ try:
153
+ tree = ast.parse(src)
154
+ except SyntaxError:
155
+ return ""
156
+ # map (lineno, constructor) → list of Call nodes, to insert at the exact end of the right call
157
+ by_key = {}
158
+ for node in ast.walk(tree):
159
+ if isinstance(node, ast.Call):
160
+ by_key.setdefault((node.lineno, call_name(node)), []).append(node)
140
161
  lines = src.splitlines(keepends=True)
141
162
  new_lines = list(lines)
142
- # de abajo hacia arriba para no correr line numbers
143
- for f in sorted((f for f in findings if f["kind"] == "missing"),
144
- key=lambda x: -x["line"]):
145
- i = f["line"] - 1
146
- if i >= len(new_lines):
147
- continue
163
+ edits = [] # (line_index, col, text) applied right-to-left so columns don't shift
164
+ for f in (f for f in findings if f["kind"] == "missing"):
165
+ cands = by_key.get((f["line"], f["constructor"]), [])
166
+ if len(cands) != 1:
167
+ continue # 0 or >1 matching calls on the line → ambiguous, skip (the finding is still reported)
168
+ call = cands[0]
169
+ if call.end_lineno != call.lineno or call.end_col_offset is None:
170
+ continue # multi-line call → skip (conservative)
171
+ i = call.lineno - 1
172
+ close = call.end_col_offset - 1 # column of the closing ')'
173
+ had_args = bool(call.args) or bool(call.keywords)
174
+ sep = ", " if had_args else ""
175
+ edits.append((i, close, f"{sep}{f['suggest_kwarg']}={cap_value}"))
176
+ # apply right-to-left (highest column on a line first) so earlier insertions don't shift later columns
177
+ for i, col, text in sorted(edits, key=lambda e: (e[0], -e[1])):
148
178
  line = new_lines[i]
149
- ctor = f["constructor"]
150
- # audit-3 (gemini P0): si hay >1 ocurrencia del constructor en la línea, NO parchear
151
- # (la inserción textual no sabe cuál es cuál) — conservador, el hallazgo igual se reporta
152
- if line.count(ctor + "(") != 1:
153
- continue
154
- idx = line.find(ctor + "(")
155
- if idx < 0:
156
- continue # constructor multilínea: skip (conservador)
157
- insert_at = idx + len(ctor) + 1
158
- rest = line[insert_at:]
159
- sep = "" if rest.lstrip().startswith(")") else ", "
160
- new_lines[i] = line[:insert_at] + f"{f['suggest_kwarg']}={cap_value}{sep}" + rest
179
+ new_lines[i] = line[:col] + text + line[col:]
161
180
  if new_lines == lines:
162
181
  return ""
163
182
  rel = str(path)
@@ -47,6 +47,10 @@ def _find_units(root: Path, max_files: int):
47
47
  except SyntaxError:
48
48
  units.append({"file": py, "kind": "unknown", "line": 0, "syntax_error": True})
49
49
  continue
50
+ # resolve `from langgraph.graph import StateGraph as SG` so an aliased framework constructor is still
51
+ # discovered as a unit (codex/Cursor r81) — otherwise the graph is silently dropped from the report.
52
+ alias = {a.asname: a.name for nd in _ast.walk(tree) if isinstance(nd, _ast.ImportFrom)
53
+ for a in nd.names if a.asname}
50
54
  for node in _ast.walk(tree):
51
55
  if not isinstance(node, _ast.Call):
52
56
  continue
@@ -55,6 +59,7 @@ def _find_units(root: Path, max_files: int):
55
59
  f"{f.value.id}.{f.attr}" if isinstance(f, _ast.Attribute)
56
60
  and isinstance(f.value, _ast.Name) else
57
61
  (f.attr if isinstance(f, _ast.Attribute) else ""))
62
+ nm = alias.get(nm, nm)
58
63
  kind = None
59
64
  if nm == "StateGraph":
60
65
  kind = "langgraph"
@@ -143,6 +148,11 @@ def cmd_caps(args) -> int:
143
148
  print(f"\n {total} finding(s) in {len(per_file)} file(s) "
144
149
  f"({scanned} scanned). Use --patch to emit a unified diff.")
145
150
  if args.patch:
151
+ if args.cap < 1:
152
+ # a cap of 0/negative is not an effective token bound — the patch would insert an inert kwarg
153
+ # that costwright itself flags `ineffective` (codex r75). Refuse instead of suggesting it.
154
+ print(f"costwright: --cap must be a positive integer (got {args.cap})", file=sys.stderr)
155
+ return 2
146
156
  chunks = []
147
157
  for p, (fs, src) in sorted(per_file.items()):
148
158
  d = caps_mod.make_patch(p.relative_to(root), src, fs, args.cap)
@@ -224,10 +224,24 @@ class Extractor(ast.NodeVisitor):
224
224
  if mi is not None:
225
225
  s.bounds.append({"param": "max_iter", "value": const_of(mi.value),
226
226
  "source": "explicit", "line": n.lineno})
227
+ elif any(k.arg is None for k in n.keywords):
228
+ # a **kwargs spread on the Agent could carry max_iter (huge or disabling) → unrecoverable; do NOT
229
+ # fall back to the framework default 20 (that would understate) → fail closed (codex/Cursor r80).
230
+ s.bounds.append({"param": "max_iter", "value": None, "source": "explicit", "line": n.lineno})
227
231
  # CrewAI Agent sin max_iter → default 20 (lo decide el mapper por-kind)
228
232
  elif last == "Crew":
233
+ # a hierarchical Crew runs a MANAGER that re-delegates (an unbounded loop) → fail closed. A
234
+ # `manager_agent=` or `manager_llm=` kwarg implies hierarchical coordination (codex/Cursor r80), as
235
+ # does any `process=` that is NOT a confirmed-sequential LITERAL (`Process.sequential` / "sequential")
236
+ # — a hierarchical literal, a VARIABLE (`mode = Process.hierarchical; process=mode` — codex r75), or
237
+ # any computed expression could be the manager loop.
238
+ has_manager = any(k.arg in ("manager_agent", "manager_llm") for k in n.keywords)
239
+ spread = any(k.arg is None for k in n.keywords) # **cfg could hide process=hierarchical / a manager
229
240
  proc = next((k for k in n.keywords if k.arg == "process"), None)
230
- if proc is not None and "hierarchical" in ast.dump(proc.value):
241
+ confirmed_sequential = (proc is not None and isinstance(proc.value, (ast.Attribute, ast.Constant))
242
+ and "sequential" in ast.dump(proc.value)
243
+ and "hierarchical" not in ast.dump(proc.value))
244
+ if has_manager or spread or (proc is not None and not confirmed_sequential):
231
245
  s.features.append({"feature": "hierarchical-manager", "line": n.lineno})
232
246
 
233
247
  # caps de tokens en cualquier call (constructores de modelos, llamadas)
@@ -243,6 +257,12 @@ class Extractor(ast.NodeVisitor):
243
257
 
244
258
  def _scan_invoke(s, n):
245
259
  """Busca recursion_limit / max_turns en el config del call-site (D2)."""
260
+ # a **kwargs spread on an invoke/run call is OPAQUE — it could carry a max_turns / recursion_limit that
261
+ # DISABLES the cap (e.g. `Runner.run(a, **{"max_turns": None})`, `app.invoke({}, **opts)`) and the bound
262
+ # would be unrecoverable → record an UNRESOLVED bound so the mapper fails closed (codex/Cursor r79).
263
+ if any(k.arg is None for k in n.keywords):
264
+ s.bounds.append({"param": "invoke-kwargs-spread", "value": None,
265
+ "source": "explicit", "line": n.lineno})
246
266
  for k in n.keywords:
247
267
  if k.arg == "max_turns":
248
268
  # distinguir None LITERAL (desactivación deliberada) de expresión no-constante
@@ -80,6 +80,8 @@ _SLA_MODES = {"strict", "balanced"}
80
80
  # it is a reported analysis the signed bundle BINDS (tamper-evidence) and whose ARITHMETIC fusion
81
81
  # re-checks in pure stdlib — but whose operational ASSUMPTIONS fusion canNOT verify (self-asserted).
82
82
  _INTERF_KIND = "tv-coupling-bound"
83
+ _CHANNEL_COVERED = "budget-cap-distribution-shift (channel 1 of N; N unknown)"
84
+ _SOURCE_ESTIMATOR = "eleata-verify.epsilon.interference_risk_bound"
83
85
  _ASSURANCE_LEVELS = {"self_asserted", "evidence_attached", "independently_reviewed"}
84
86
  _ASSUMPTIONS = {"A", "C", "D"} # the operational assumptions the (ii) bound needs
85
87
  # status is NEVER "bounded" (council P0-1: no word that reads as a guarantee). Derived by fusion.
@@ -423,8 +425,8 @@ def conditional_analysis_from_epsilon(epsilon_bound: dict, *, assumptions_attest
423
425
  _bound_auth = _inflate_alpha(float(_ab), _eps_auth, float(_cu))
424
426
  block = {
425
427
  "kind": _INTERF_KIND,
426
- "channel_covered": "budget-cap-distribution-shift (channel 1 of N; N unknown)",
427
- "source_estimator": "eleata-verify.epsilon.interference_risk_bound",
428
+ "channel_covered": _CHANNEL_COVERED,
429
+ "source_estimator": _SOURCE_ESTIMATOR,
428
430
  "verify_version": str(verify_version),
429
431
  "note": INTERF_NOTE,
430
432
  "channel1_conditional_risk_upper": _bound_auth, # RECOMPUTED, not the caller's alpha_effective
@@ -562,6 +564,16 @@ def _validate_conditional_analyses(ca: dict, risk_block: dict) -> dict:
562
564
  out["assumptions_complete"] = assumptions_complete
563
565
  out["bound_verification"] = bound_verification
564
566
  out["open_channels_non_exhaustive"] = True # forced — the list is non-exhaustive by construction
567
+ # FORCE the honesty/provenance fields to costwright's own constants — the caller cannot inject a
568
+ # `disclaimer="GUARANTEED SAFE"`, a reassuring `note`, a shrunk `open_channels=["none"]`, or a misleading
569
+ # `channel_covered`/`source_estimator` into the signed bundle (codex r78). Only measured PRIMITIVES
570
+ # (k, m, δ_eps, α, c, attestations) come from the caller; every honesty string is costwright's.
571
+ out["kind"] = _INTERF_KIND
572
+ out["channel_covered"] = _CHANNEL_COVERED
573
+ out["source_estimator"] = _SOURCE_ESTIMATOR
574
+ out["note"] = INTERF_NOTE
575
+ out["open_channels"] = list(_OPEN_CHANNELS)
576
+ out["disclaimer"] = NON_INTERFERENCE
565
577
  return {"channel1_budget_cap_risk": out}
566
578
 
567
579
 
@@ -616,6 +628,16 @@ def fuse(costwright_v1_report: dict, verify_result_dict: dict, *, run_id: str,
616
628
 
617
629
  # --- human output -----------------------------------------------------------------------------------
618
630
  _RISK_BADGE = {"answered": "✓", "abstained": "↻", "uncertified": "∅"}
631
+ # the ✓ for an ANSWERED risk certificate must depend on the VERDICT — a Refuted/Conflicting claim answered
632
+ # within the SLA is NOT a green check (it would read as "safe" when the verifier says the claim is false /
633
+ # the evidence conflicts — codex r81). Verdict-aware glyph; abstained/uncertified keep their status glyph.
634
+ _VERDICT_BADGE = {"Supported": "✓", "Refuted": "✗", "Conflicting": "⚠", "Not Enough Evidence": "▲"}
635
+
636
+
637
+ def _risk_glyph(status: str, verdict: str) -> str:
638
+ if status == "answered":
639
+ return _VERDICT_BADGE.get(verdict, "?")
640
+ return _RISK_BADGE.get(status, "?")
619
641
  _COST_BADGE = {"certifiable": "✓", "default_dependent": "▲", "non_certifiable": "✗",
620
642
  "runaway": "‼", "parse_error": "·", "no_graph_units": "·"}
621
643
  # NEUTRAL glyphs only — NEVER green ✓ for a conditional analysis (council P0-4: must not read as approval).
@@ -675,7 +697,7 @@ def pretty(bundle: dict) -> str:
675
697
  + (f" [{vac} vacuous default bound(s)]" if vac else ""),
676
698
  f" scope: {c['scope']}",
677
699
  f" backing: {c['theorem']['mechanized']}",
678
- f" {_RISK_BADGE.get(r['status'], '?')} RISK ({r['source']}, v{r['verify_version']}) "
700
+ f" {_risk_glyph(r['status'], r['verdict'])} RISK ({r['source']}, v{r['verify_version']}) "
679
701
  f"status={r['status']} verdict={r['verdict']} conf={r['calibrated_confidence']} "
680
702
  f"SLA≤{r['sla_alpha']} ({r['sla_mode']}, certified={r['sla_certified']})"
681
703
  + (" ⚠score-outlier" if r["score_outlier_warning"] else ""),
@@ -653,12 +653,21 @@ def compose(ex_flat: dict) -> dict | None:
653
653
  return {**base, "category": "no-mapeable:subgraph-node",
654
654
  "reason": f"no unique outer graph (candidates {sorted(outers)})"}
655
655
 
656
- outer = outers[0]
657
- res = _resolve(outer, A, seen=frozenset(), depth=0)
658
- if res["category"] == "non_certifiable":
659
- return {**base, "category": "no-mapeable:subgraph-node", "reason": res["prov"]}
660
- if res["category"] == "runaway":
661
- return {**base, "category": "rechaza-con-razon", "reason": res["prov"]}
656
+ # TOP-LEVEL RUNS (codex r82): the unique outer, PLUS any inner subgraph that is ALSO invoked STANDALONE
657
+ # (`mid.compile().invoke(recursion_limit=9000)` while mid is also a node of outer). A standalone invoke is a
658
+ # SEPARATE top-level run whose per-run bound the outer composition does NOT cover (mid runs at its own,
659
+ # possibly-LARGER, limit — not the inherited one). The reported ceiling is the MAX over all top-level runs;
660
+ # reporting only the outer would hide a bigger standalone subgraph run.
661
+ toplevel = [outers[0]] + [iv for iv in sorted(inner_vars) if iv in A["invoke_limit"]]
662
+ results = []
663
+ for v in toplevel:
664
+ r = _resolve(v, A, seen=frozenset(), depth=0)
665
+ if r["category"] == "non_certifiable":
666
+ return {**base, "category": "no-mapeable:subgraph-node", "reason": r["prov"]}
667
+ if r["category"] == "runaway":
668
+ return {**base, "category": "rechaza-con-razon", "reason": r["prov"]}
669
+ results.append(r)
670
+ res = max(results, key=lambda r: r["bound_factor"])
662
671
  # the EFFECTIVE outer steps that _resolve actually used (e.g. the default 1000 when a no-config invoke
663
672
  # dominates an explicit 50 — Cursor r32) drive `supersteps`, kept consistent with the bound and the
664
673
  # composition string. The composed total is the node-executions ceiling (aggregation=sum renders it as
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: costwright
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Static budget certificates for LLM-agent workflows (LangGraph / CrewAI / OpenAI Agents SDK). Backed by a machine-checked (Lean 4) cost-soundness theorem.
5
5
  Author: Hernán Inverso
6
6
  License: Apache-2.0
@@ -1 +0,0 @@
1
- __version__ = "0.2.2"
File without changes
File without changes
File without changes