PyPI - costwright - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

costwright 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{costwright-0.2.2 → costwright-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: costwright
-Version: 0.2.2
+Version: 0.2.4
 Summary: Static budget certificates for LLM-agent workflows (LangGraph / CrewAI / OpenAI Agents SDK). Backed by a machine-checked (Lean 4) cost-soundness theorem.
 Author: Hernán Inverso
 License: Apache-2.0

{costwright-0.2.2 → costwright-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "costwright"
-version = "0.2.2"
+version = "0.2.4"
 description = "Static budget certificates for LLM-agent workflows (LangGraph / CrewAI / OpenAI Agents SDK). Backed by a machine-checked (Lean 4) cost-soundness theorem."
 readme = "README.md"
 requires-python = ">=3.10"

costwright-0.2.4/src/costwright/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.2.4"

{costwright-0.2.2 → costwright-0.2.4}/src/costwright/caps.py RENAMED Viewed

@@ -62,25 +62,36 @@ def scan_file(path: Path):
                      "why": "el archivo no parsea (SyntaxError) y menciona un constructor LLM — los token-caps "
                             "NO se pudieron verificar; no asumir que están acotados"}], src
         return [], None
+    # `from langchain_openai import ChatOpenAI as LLM2` → an aliased constructor escapes the by-name lookup and
+    # its missing cap is silently NOT reported (codex/Cursor r81). Resolve `from X import Ctor as local` so the
+    # aliased call matches PROVIDER_CAPS.
+    alias = {a.asname: a.name for nd in ast.walk(tree) if isinstance(nd, ast.ImportFrom)
+             for a in nd.names if a.asname}
     findings = []
     for node in ast.walk(tree):
         if not isinstance(node, ast.Call):
             continue
-        name = call_name(node)
-        if name not in PROVIDER_CAPS:
+        name = call_name(node)                 # source name (what make_patch matches + what we display)
+        resolved = alias.get(name, name)       # the real constructor, for the provider/kwarg lookup
+        if resolved not in PROVIDER_CAPS:
             continue
         kwargs_present = {k.arg for k in node.keywords if k.arg}
-        provider, kwarg, note = PROVIDER_CAPS[name]
+        provider, kwarg, note = PROVIDER_CAPS[resolved]
         # detección best-effort de reasoning model por el kwarg `model` (audit-3 gpt-5.5 P0):
         # en Chat API los o-series/GPT-5 ignoran max_tokens; el cap real es max_completion_tokens
         model_val = next((k.value.value for k in node.keywords
                           if k.arg == "model" and isinstance(k.value, ast.Constant)
                           and isinstance(k.value.value, str)), "")
+        # the model can also be the FIRST POSITIONAL arg — `ChatOpenAI("gpt-5")` (codex/Cursor r76); otherwise
+        # a reasoning model passed positionally would escape the reasoning detection below.
+        if not model_val and node.args and isinstance(node.args[0], ast.Constant) \
+                and isinstance(node.args[0].value, str):
+            model_val = node.args[0].value
         reasoning = any(model_val.startswith(p) for p in
                         ("o1", "o3", "o4", "gpt-5")) if model_val else False
         # SOLO Chat-API constructors (audit-3 R2 gpt-5.5): el constructor `OpenAI` es
         # Responses API y su cap correcto sigue siendo max_output_tokens, reasoning o no
-        if name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning:
+        if resolved in ("ChatOpenAI", "AzureChatOpenAI") and reasoning:
             kwarg = "max_completion_tokens"
             note = "reasoning model en Chat API: max_tokens es IGNORADO; usar max_completion_tokens"
         # an EFFECTIVE cap = the CONSTRUCTOR'S correct kwarg (post reasoning-adjustment) present as a positive
@@ -122,7 +133,7 @@ def scan_file(path: Path):
                     "suggest_kwarg": None,
                     "why": "Anthropic: con interleaved/adaptive thinking el budget puede EXCEDER max_tokens — el techo solo vale en modo standard (budget_tokens < max_tokens)",
                 })
-            elif name in ("ChatOpenAI", "AzureChatOpenAI") and reasoning and "max_completion_tokens" not in kwargs_present:
+            elif resolved in ("ChatOpenAI", "AzureChatOpenAI") and reasoning and "max_completion_tokens" not in kwargs_present:
                 findings.append({
                     "kind": "degraded", "constructor": name, "provider": provider,
                     "line": node.lineno, "have": sorted(kwargs_present & CAP_KWARGS),
@@ -134,30 +145,38 @@ def scan_file(path: Path):
 def make_patch(path: Path, src: str, findings, cap_value: int) -> str:
-    """Unified diff que agrega `kwarg=cap_value` a cada constructor sin cap.
-    Edición textual mínima: insertar el kwarg tras el paréntesis de apertura del call.
-    NUNCA escribe el archivo — solo el diff (council 002 P0-2)."""
+    """Unified diff que agrega `kwarg=cap_value` como ÚLTIMO argumento de cada constructor sin cap.
+    Inserción basada en AST (robusta a args POSICIONALES, strings con paréntesis, y kwargs previos): el kwarg
+    va antes del `)` de cierre del call, NUNCA tras el `(` (eso produciría `Ctor(kwarg=…, "positional")` =
+    SyntaxError — codex/Cursor r76). NUNCA escribe el archivo — solo el diff (council 002 P0-2)."""
+    try:
+        tree = ast.parse(src)
+    except SyntaxError:
+        return ""
+    # map (lineno, constructor) → list of Call nodes, to insert at the exact end of the right call
+    by_key = {}
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Call):
+            by_key.setdefault((node.lineno, call_name(node)), []).append(node)
     lines = src.splitlines(keepends=True)
     new_lines = list(lines)
-    # de abajo hacia arriba para no correr line numbers
-    for f in sorted((f for f in findings if f["kind"] == "missing"),
-                    key=lambda x: -x["line"]):
-        i = f["line"] - 1
-        if i >= len(new_lines):
-            continue
+    edits = []   # (line_index, col, text) — applied right-to-left so columns don't shift
+    for f in (f for f in findings if f["kind"] == "missing"):
+        cands = by_key.get((f["line"], f["constructor"]), [])
+        if len(cands) != 1:
+            continue   # 0 or >1 matching calls on the line → ambiguous, skip (the finding is still reported)
+        call = cands[0]
+        if call.end_lineno != call.lineno or call.end_col_offset is None:
+            continue   # multi-line call → skip (conservative)
+        i = call.lineno - 1
+        close = call.end_col_offset - 1   # column of the closing ')'
+        had_args = bool(call.args) or bool(call.keywords)
+        sep = ", " if had_args else ""
+        edits.append((i, close, f"{sep}{f['suggest_kwarg']}={cap_value}"))
+    # apply right-to-left (highest column on a line first) so earlier insertions don't shift later columns
+    for i, col, text in sorted(edits, key=lambda e: (e[0], -e[1])):
         line = new_lines[i]
-        ctor = f["constructor"]
-        # audit-3 (gemini P0): si hay >1 ocurrencia del constructor en la línea, NO parchear
-        # (la inserción textual no sabe cuál es cuál) — conservador, el hallazgo igual se reporta
-        if line.count(ctor + "(") != 1:
-            continue
-        idx = line.find(ctor + "(")
-        if idx < 0:
-            continue  # constructor multilínea: skip (conservador)
-        insert_at = idx + len(ctor) + 1
-        rest = line[insert_at:]
-        sep = "" if rest.lstrip().startswith(")") else ", "
-        new_lines[i] = line[:insert_at] + f"{f['suggest_kwarg']}={cap_value}{sep}" + rest
+        new_lines[i] = line[:col] + text + line[col:]
     if new_lines == lines:
         return ""
     rel = str(path)

{costwright-0.2.2 → costwright-0.2.4}/src/costwright/cli.py RENAMED Viewed

@@ -47,6 +47,10 @@ def _find_units(root: Path, max_files: int):
         except SyntaxError:
             units.append({"file": py, "kind": "unknown", "line": 0, "syntax_error": True})
             continue
+        # resolve `from langgraph.graph import StateGraph as SG` so an aliased framework constructor is still
+        # discovered as a unit (codex/Cursor r81) — otherwise the graph is silently dropped from the report.
+        alias = {a.asname: a.name for nd in _ast.walk(tree) if isinstance(nd, _ast.ImportFrom)
+                 for a in nd.names if a.asname}
         for node in _ast.walk(tree):
             if not isinstance(node, _ast.Call):
                 continue
@@ -55,6 +59,7 @@ def _find_units(root: Path, max_files: int):
                 f"{f.value.id}.{f.attr}" if isinstance(f, _ast.Attribute)
                 and isinstance(f.value, _ast.Name) else
                 (f.attr if isinstance(f, _ast.Attribute) else ""))
+            nm = alias.get(nm, nm)
             kind = None
             if nm == "StateGraph":
                 kind = "langgraph"
@@ -143,6 +148,11 @@ def cmd_caps(args) -> int:
             print(f"\n  {total} finding(s) in {len(per_file)} file(s) "
                   f"({scanned} scanned). Use --patch to emit a unified diff.")
         if args.patch:
+            if args.cap < 1:
+                # a cap of 0/negative is not an effective token bound — the patch would insert an inert kwarg
+                # that costwright itself flags `ineffective` (codex r75). Refuse instead of suggesting it.
+                print(f"costwright: --cap must be a positive integer (got {args.cap})", file=sys.stderr)
+                return 2
             chunks = []
             for p, (fs, src) in sorted(per_file.items()):
                 d = caps_mod.make_patch(p.relative_to(root), src, fs, args.cap)

{costwright-0.2.2 → costwright-0.2.4}/src/costwright/extract.py RENAMED Viewed

@@ -224,10 +224,24 @@ class Extractor(ast.NodeVisitor):
             if mi is not None:
                 s.bounds.append({"param": "max_iter", "value": const_of(mi.value),
                                  "source": "explicit", "line": n.lineno})
+            elif any(k.arg is None for k in n.keywords):
+                # a **kwargs spread on the Agent could carry max_iter (huge or disabling) → unrecoverable; do NOT
+                # fall back to the framework default 20 (that would understate) → fail closed (codex/Cursor r80).
+                s.bounds.append({"param": "max_iter", "value": None, "source": "explicit", "line": n.lineno})
             # CrewAI Agent sin max_iter → default 20 (lo decide el mapper por-kind)
         elif last == "Crew":
+            # a hierarchical Crew runs a MANAGER that re-delegates (an unbounded loop) → fail closed. A
+            # `manager_agent=` or `manager_llm=` kwarg implies hierarchical coordination (codex/Cursor r80), as
+            # does any `process=` that is NOT a confirmed-sequential LITERAL (`Process.sequential` / "sequential")
+            # — a hierarchical literal, a VARIABLE (`mode = Process.hierarchical; process=mode` — codex r75), or
+            # any computed expression could be the manager loop.
+            has_manager = any(k.arg in ("manager_agent", "manager_llm") for k in n.keywords)
+            spread = any(k.arg is None for k in n.keywords)   # **cfg could hide process=hierarchical / a manager
             proc = next((k for k in n.keywords if k.arg == "process"), None)
-            if proc is not None and "hierarchical" in ast.dump(proc.value):
+            confirmed_sequential = (proc is not None and isinstance(proc.value, (ast.Attribute, ast.Constant))
+                                    and "sequential" in ast.dump(proc.value)
+                                    and "hierarchical" not in ast.dump(proc.value))
+            if has_manager or spread or (proc is not None and not confirmed_sequential):
                 s.features.append({"feature": "hierarchical-manager", "line": n.lineno})
         # caps de tokens en cualquier call (constructores de modelos, llamadas)
@@ -243,6 +257,12 @@ class Extractor(ast.NodeVisitor):
     def _scan_invoke(s, n):
         """Busca recursion_limit / max_turns en el config del call-site (D2)."""
+        # a **kwargs spread on an invoke/run call is OPAQUE — it could carry a max_turns / recursion_limit that
+        # DISABLES the cap (e.g. `Runner.run(a, **{"max_turns": None})`, `app.invoke({}, **opts)`) and the bound
+        # would be unrecoverable → record an UNRESOLVED bound so the mapper fails closed (codex/Cursor r79).
+        if any(k.arg is None for k in n.keywords):
+            s.bounds.append({"param": "invoke-kwargs-spread", "value": None,
+                             "source": "explicit", "line": n.lineno})
         for k in n.keywords:
             if k.arg == "max_turns":
                 # distinguir None LITERAL (desactivación deliberada) de expresión no-constante

{costwright-0.2.2 → costwright-0.2.4}/src/costwright/fusion.py RENAMED Viewed

@@ -80,6 +80,8 @@ _SLA_MODES = {"strict", "balanced"}
 # it is a reported analysis the signed bundle BINDS (tamper-evidence) and whose ARITHMETIC fusion
 # re-checks in pure stdlib — but whose operational ASSUMPTIONS fusion canNOT verify (self-asserted).
 _INTERF_KIND = "tv-coupling-bound"
+_CHANNEL_COVERED = "budget-cap-distribution-shift (channel 1 of N; N unknown)"
+_SOURCE_ESTIMATOR = "eleata-verify.epsilon.interference_risk_bound"
 _ASSURANCE_LEVELS = {"self_asserted", "evidence_attached", "independently_reviewed"}
 _ASSUMPTIONS = {"A", "C", "D"}                            # the operational assumptions the (ii) bound needs
 # status is NEVER "bounded" (council P0-1: no word that reads as a guarantee). Derived by fusion.
@@ -423,8 +425,8 @@ def conditional_analysis_from_epsilon(epsilon_bound: dict, *, assumptions_attest
     _bound_auth = _inflate_alpha(float(_ab), _eps_auth, float(_cu))
     block = {
         "kind": _INTERF_KIND,
-        "channel_covered": "budget-cap-distribution-shift (channel 1 of N; N unknown)",
-        "source_estimator": "eleata-verify.epsilon.interference_risk_bound",
+        "channel_covered": _CHANNEL_COVERED,
+        "source_estimator": _SOURCE_ESTIMATOR,
         "verify_version": str(verify_version),
         "note": INTERF_NOTE,
         "channel1_conditional_risk_upper": _bound_auth,   # RECOMPUTED, not the caller's alpha_effective
@@ -562,6 +564,16 @@ def _validate_conditional_analyses(ca: dict, risk_block: dict) -> dict:
     out["assumptions_complete"] = assumptions_complete
     out["bound_verification"] = bound_verification
     out["open_channels_non_exhaustive"] = True        # forced — the list is non-exhaustive by construction
+    # FORCE the honesty/provenance fields to costwright's own constants — the caller cannot inject a
+    # `disclaimer="GUARANTEED SAFE"`, a reassuring `note`, a shrunk `open_channels=["none"]`, or a misleading
+    # `channel_covered`/`source_estimator` into the signed bundle (codex r78). Only measured PRIMITIVES
+    # (k, m, δ_eps, α, c, attestations) come from the caller; every honesty string is costwright's.
+    out["kind"] = _INTERF_KIND
+    out["channel_covered"] = _CHANNEL_COVERED
+    out["source_estimator"] = _SOURCE_ESTIMATOR
+    out["note"] = INTERF_NOTE
+    out["open_channels"] = list(_OPEN_CHANNELS)
+    out["disclaimer"] = NON_INTERFERENCE
     return {"channel1_budget_cap_risk": out}
@@ -616,6 +628,16 @@ def fuse(costwright_v1_report: dict, verify_result_dict: dict, *, run_id: str,
 # --- human output -----------------------------------------------------------------------------------
 _RISK_BADGE = {"answered": "✓", "abstained": "↻", "uncertified": "∅"}
+# the ✓ for an ANSWERED risk certificate must depend on the VERDICT — a Refuted/Conflicting claim answered
+# within the SLA is NOT a green check (it would read as "safe" when the verifier says the claim is false /
+# the evidence conflicts — codex r81). Verdict-aware glyph; abstained/uncertified keep their status glyph.
+_VERDICT_BADGE = {"Supported": "✓", "Refuted": "✗", "Conflicting": "⚠", "Not Enough Evidence": "▲"}
+def _risk_glyph(status: str, verdict: str) -> str:
+    if status == "answered":
+        return _VERDICT_BADGE.get(verdict, "?")
+    return _RISK_BADGE.get(status, "?")
 _COST_BADGE = {"certifiable": "✓", "default_dependent": "▲", "non_certifiable": "✗",
                "runaway": "‼", "parse_error": "·", "no_graph_units": "·"}
 # NEUTRAL glyphs only — NEVER green ✓ for a conditional analysis (council P0-4: must not read as approval).
@@ -675,7 +697,7 @@ def pretty(bundle: dict) -> str:
         + (f"  [{vac} vacuous default bound(s)]" if vac else ""),
         f"      scope: {c['scope']}",
         f"      backing: {c['theorem']['mechanized']}",
-        f"  {_RISK_BADGE.get(r['status'], '?')} RISK  ({r['source']}, v{r['verify_version']})  "
+        f"  {_risk_glyph(r['status'], r['verdict'])} RISK  ({r['source']}, v{r['verify_version']})  "
         f"status={r['status']}  verdict={r['verdict']}  conf={r['calibrated_confidence']}  "
         f"SLA≤{r['sla_alpha']} ({r['sla_mode']}, certified={r['sla_certified']})"
         + ("  ⚠score-outlier" if r["score_outlier_warning"] else ""),

{costwright-0.2.2 → costwright-0.2.4}/src/costwright/subgraph.py RENAMED Viewed

@@ -653,12 +653,21 @@ def compose(ex_flat: dict) -> dict | None:
         return {**base, "category": "no-mapeable:subgraph-node",
                 "reason": f"no unique outer graph (candidates {sorted(outers)})"}
-    outer = outers[0]
-    res = _resolve(outer, A, seen=frozenset(), depth=0)
-    if res["category"] == "non_certifiable":
-        return {**base, "category": "no-mapeable:subgraph-node", "reason": res["prov"]}
-    if res["category"] == "runaway":
-        return {**base, "category": "rechaza-con-razon", "reason": res["prov"]}
+    # TOP-LEVEL RUNS (codex r82): the unique outer, PLUS any inner subgraph that is ALSO invoked STANDALONE
+    # (`mid.compile().invoke(recursion_limit=9000)` while mid is also a node of outer). A standalone invoke is a
+    # SEPARATE top-level run whose per-run bound the outer composition does NOT cover (mid runs at its own,
+    # possibly-LARGER, limit — not the inherited one). The reported ceiling is the MAX over all top-level runs;
+    # reporting only the outer would hide a bigger standalone subgraph run.
+    toplevel = [outers[0]] + [iv for iv in sorted(inner_vars) if iv in A["invoke_limit"]]
+    results = []
+    for v in toplevel:
+        r = _resolve(v, A, seen=frozenset(), depth=0)
+        if r["category"] == "non_certifiable":
+            return {**base, "category": "no-mapeable:subgraph-node", "reason": r["prov"]}
+        if r["category"] == "runaway":
+            return {**base, "category": "rechaza-con-razon", "reason": r["prov"]}
+        results.append(r)
+    res = max(results, key=lambda r: r["bound_factor"])
     # the EFFECTIVE outer steps that _resolve actually used (e.g. the default 1000 when a no-config invoke
     # dominates an explicit 50 — Cursor r32) drive `supersteps`, kept consistent with the bound and the
     # composition string. The composed total is the node-executions ceiling (aggregation=sum renders it as

{costwright-0.2.2 → costwright-0.2.4}/src/costwright.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: costwright
-Version: 0.2.2
+Version: 0.2.4
 Summary: Static budget certificates for LLM-agent workflows (LangGraph / CrewAI / OpenAI Agents SDK). Backed by a machine-checked (Lean 4) cost-soundness theorem.
 Author: Hernán Inverso
 License: Apache-2.0