cat-claws 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ Metadata-Version: 2.4
2
+ Name: cat-claws
3
+ Version: 0.1.0
4
+ Summary: Claude Agent SDK backend for the CatLLM ecosystem — classify text through a Claude subscription instead of per-token API billing.
5
+ Project-URL: Source, https://github.com/chrissoria/cat-agent
6
+ Author-email: Chris Soria <chrissoria@berkeley.edu>
7
+ License-Expression: GPL-3.0-or-later
8
+ Keywords: agent-sdk,classification,claude,llm,survey
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Python: >=3.10
15
+ Requires-Dist: cat-stack>=2.0.1
16
+ Requires-Dist: claude-agent-sdk>=0.1.0
17
+ Requires-Dist: pandas
18
+ Description-Content-Type: text/markdown
19
+
20
+ # cat-claws
21
+
22
+ Agent-CLI backend for the [CatLLM ecosystem](https://github.com/chrissoria/cat-llm):
23
+ classify text through a **Claude subscription** (via the Claude Agent SDK)
24
+ instead of per-token API billing. An OpenAI Codex adapter is planned.
25
+
26
+ *(Distribution name `cat-claws`; imports as `catclaws`. Source repo:
27
+ [cat-agent](https://github.com/chrissoria/cat-agent).)*
28
+
29
+ **Status: alpha, under active development.** See `MASTERPLAN.md` for the
30
+ design and step tracker.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install cat-claws
36
+ ```
37
+
38
+ ## Design in one paragraph
39
+
40
+ One row = one sealed, fresh-context agent call (no tools, single turn, no
41
+ settings/CLAUDE.md loading), using cat-stack's validated classification
42
+ prompt byte-for-byte. The model answers in JSON; parsing and the wide 0/1
43
+ output matrix reuse cat-stack's existing machinery. Throughput comes from
44
+ concurrent one-shot calls, never from shared conversations or
45
+ corpus-in-one-prompt (which would contaminate rows and break research
46
+ validity).
47
+
48
+ ## Quick start (Phase 1)
49
+
50
+ ```python
51
+ import catclaws
52
+
53
+ df = catclaws.classify(
54
+ input_data=["I moved for a new job", "Rent got too expensive"],
55
+ categories=["Employment", "Cost of living", "Other"],
56
+ user_model="claude-sonnet-5", # any model your Claude login can use
57
+ description="Why did you move?",
58
+ )
59
+ ```
60
+
61
+ Requires [Claude Code](https://code.claude.com/docs) installed and logged in
62
+ (`claude` on PATH). No API key needed.
@@ -0,0 +1,10 @@
1
+ catclaws/__about__.py,sha256=h8ok3JwGMDibSgYDrKRLB7yD8sS3B9k57p9aWIfdaFU,245
2
+ catclaws/__init__.py,sha256=vQf1SLJGPQaMNylx-z7289bYAl3RLqoZ5w8Wf9-tQOI,440
3
+ catclaws/_backend.py,sha256=M_xovqZqihUC0n7w3VwMNZwLcsYM_ZaH2ZS05aOiDkE,1019
4
+ catclaws/classify.py,sha256=msWqW5tAi2WhAu20okAwyKJ1x2il-dqJDHTgKloQulE,7739
5
+ catclaws/_adapters/__init__.py,sha256=6vsSnIcnPlkbJ9yBAXRE7biP_2bKo4RBwUtreROecow,391
6
+ catclaws/_adapters/base.py,sha256=t8FUr1SmTPTYOelTDTunz3tdSMODA29Kl0Kf6MV9az8,2785
7
+ catclaws/_adapters/claude.py,sha256=aOxz51cP7XDLVnEy3Bb7qbhZmGA4kaMNdJVfYxl2Hrw,7748
8
+ cat_claws-0.1.0.dist-info/METADATA,sha256=zggHjN0phVEs-8-UXY2Pf4zg9xYVjJYk5TalLErL32w,2214
9
+ cat_claws-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ cat_claws-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
catclaws/__about__.py ADDED
@@ -0,0 +1,6 @@
1
+ # SPDX-FileCopyrightText: 2026-present Christopher Soria <chrissoria@berkeley.edu>
2
+ #
3
+ # SPDX-License-Identifier: GPL-3.0-or-later
4
+ __version__ = "0.1.0"
5
+ __title__ = "cat-claws"
6
+ __description__ = "Claude Agent SDK backend for the CatLLM ecosystem"
catclaws/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ # SPDX-FileCopyrightText: 2026-present Christopher Soria <chrissoria@berkeley.edu>
2
+ #
3
+ # SPDX-License-Identifier: GPL-3.0-or-later
4
+ """cat-claws — agent-CLI backend for the CatLLM ecosystem.
5
+
6
+ Classify text through a Claude subscription (Claude Agent SDK) instead of
7
+ per-token API billing. See MASTERPLAN.md for design and roadmap.
8
+ """
9
+
10
+ from .__about__ import __version__
11
+ from .classify import classify
12
+
13
+ __all__ = ["classify", "__version__"]
@@ -0,0 +1,16 @@
1
+ from .base import AgentAdapter
2
+ from .claude import ClaudeAdapter
3
+
4
+ # Adapter registry — Codex joins here in a later phase.
5
+ ADAPTERS = {
6
+ "claude": ClaudeAdapter,
7
+ }
8
+
9
+
10
+ def get_adapter(name: str) -> AgentAdapter:
11
+ try:
12
+ return ADAPTERS[name]()
13
+ except KeyError:
14
+ raise ValueError(
15
+ f"Unknown agent {name!r}. Available: {sorted(ADAPTERS)}"
16
+ ) from None
@@ -0,0 +1,70 @@
1
+ """AgentAdapter contract.
2
+
3
+ An adapter turns ONE prompt into ONE answer through an agent CLI, with
4
+ sealed-session semantics baked into the contract:
5
+
6
+ - fresh context per call (no conversation shared across rows),
7
+ - no tools, single turn, no user/project settings or memory files loaded,
8
+ - a custom system prompt replacing the agent's own scaffolding.
9
+
10
+ Everything above the adapter (prompt building, JSON parsing, the output
11
+ matrix, concurrency) is agent-agnostic. The Claude adapter is the first
12
+ implementation; an OpenAI Codex adapter (`codex exec`) is planned against
13
+ this same contract.
14
+ """
15
+
16
+ import re
17
+
18
+ # Part of the adapter contract: when a call fails specifically because the
19
+ # subscription hit a usage/rate limit, the returned error string starts with
20
+ # this prefix. classify() keys on it to apply backoff (a plain error does
21
+ # not retry with backoff). Every adapter must use this prefix for rate-limit
22
+ # failures so the backoff logic stays agent-agnostic.
23
+ RATE_LIMIT_PREFIX = "rate-limited: "
24
+
25
+ # When an adapter knows when the limit resets, it appends this suffix so the
26
+ # caller can tell a transient throttle (retry) from a hard cap hours away
27
+ # (futile to retry — fail fast with a resumable message). Optional: adapters
28
+ # without a reset time simply omit it, and the caller falls back to backoff.
29
+ _RESET_EPOCH_RE = re.compile(r"resets at epoch (\d+)")
30
+
31
+
32
+ def is_rate_limited(error: str | None) -> bool:
33
+ """True if an adapter error string signals a subscription rate limit."""
34
+ return bool(error) and error.startswith(RATE_LIMIT_PREFIX)
35
+
36
+
37
+ def parse_reset_epoch(error: str | None) -> int | None:
38
+ """Unix epoch when the limit resets, if the adapter included one.
39
+
40
+ Matches the ``(resets at epoch N)`` suffix the Claude adapter emits from
41
+ the SDK's ``RateLimitInfo.resets_at``. Returns None when absent (unknown
42
+ reset -> caller should fall back to bounded backoff)."""
43
+ if not error:
44
+ return None
45
+ m = _RESET_EPOCH_RE.search(error)
46
+ return int(m.group(1)) if m else None
47
+
48
+
49
+ class AgentAdapter:
50
+ """One sealed agent call. Implementations are stateless."""
51
+
52
+ name: str = "base"
53
+
54
+ async def one_shot(
55
+ self,
56
+ prompt: str,
57
+ system_prompt: str | None,
58
+ model: str,
59
+ thinking_budget: int = 0,
60
+ ) -> tuple[str | None, str | None]:
61
+ """Run one sealed call; return (text, error) — exactly one is None.
62
+
63
+ thinking_budget follows cat-stack semantics: 0 disables reasoning,
64
+ >0 grades into the provider's effort vocabulary.
65
+
66
+ Rate-limit failures must return an error string prefixed with
67
+ ``RATE_LIMIT_PREFIX`` so the caller can back off (see contract note
68
+ above). All other failures return an ordinary error string.
69
+ """
70
+ raise NotImplementedError
@@ -0,0 +1,186 @@
1
+ """Claude adapter — claude-agent-sdk implementation of AgentAdapter.
2
+
3
+ Requires Claude Code installed and logged in (`claude` on PATH). Calls run
4
+ through the user's Claude subscription, not an API key.
5
+ """
6
+
7
+ from .base import RATE_LIMIT_PREFIX, AgentAdapter
8
+
9
+ # --- rate-limit detection (pure helpers, unit-tested in tests/test_rate_limit.py) ---
10
+ #
11
+ # The SDK surfaces subscription limits three ways (verified on sdk 0.2.110):
12
+ # 1. a `RateLimitEvent` in the message stream, carrying a `RateLimitInfo`
13
+ # whose `status`/`overage_status` is one of 'allowed'|'allowed_warning'|
14
+ # 'rejected' — only 'rejected' is an actual block;
15
+ # 2. a `ResultMessage.api_error_status` of 429 (too many requests);
16
+ # 3. as a last resort, rate-limit wording in an error/result string.
17
+ # All three funnel into the RATE_LIMIT_PREFIX error so classify() backs off.
18
+
19
+ _RATE_LIMIT_TEXT_MARKERS = (
20
+ "rate limit",
21
+ "rate-limit",
22
+ "rate_limit",
23
+ "usage limit",
24
+ "too many requests",
25
+ "quota exceeded",
26
+ "429",
27
+ )
28
+
29
+
30
+ def _looks_rate_limited_text(text) -> bool:
31
+ """Text fallback: does an error/result string read like a rate limit?"""
32
+ if not text:
33
+ return False
34
+ low = str(text).lower()
35
+ return any(marker in low for marker in _RATE_LIMIT_TEXT_MARKERS)
36
+
37
+
38
+ def _rate_limit_detail(info) -> str | None:
39
+ """Human detail if the PRIMARY usage window is exhausted, else None.
40
+
41
+ Only ``status == "rejected"`` blocks the current request. 'allowed' and
42
+ 'allowed_warning' both let it through (the latter just warns the cap is
43
+ near). Crucially, ``overage_status`` is NOT a per-request block: a common
44
+ steady state is ``overage_status='rejected'`` with
45
+ ``overage_disabled_reason='org_level_disabled'`` — the org simply turned
46
+ off spillover billing — while ``status='allowed'`` and the call succeeds.
47
+ Treating overage rejection as a limit falsely fails every call on such
48
+ accounts (verified live 2026-07-03), so it is deliberately ignored here.
49
+ """
50
+ if info is None:
51
+ return None
52
+ if getattr(info, "status", None) != "rejected":
53
+ return None
54
+ rtype = getattr(info, "rate_limit_type", None) or "usage"
55
+ resets = getattr(info, "resets_at", None)
56
+ detail = f"{rtype} limit reached"
57
+ if resets:
58
+ detail += f" (resets at epoch {resets})"
59
+ return detail
60
+
61
+
62
+ def _api_status_is_rate_limit(status) -> bool:
63
+ """HTTP 429 (too many requests) is the rate-limit status."""
64
+ return status == 429
65
+
66
+
67
+ def _finalize(text, result_error, rate_limit_detail):
68
+ """Turn collected stream state into the (text, error) contract result.
69
+
70
+ A real answer always wins: RateLimitEvents are emitted on SUCCESSFUL calls
71
+ too (they report current utilization), so a present answer with no error
72
+ means the request went through — never discard it for an informational
73
+ limit event. Only when there's no answer do limits (retryable via backoff)
74
+ win over ordinary errors.
75
+ """
76
+ if text and not result_error:
77
+ return text, None
78
+ if rate_limit_detail:
79
+ return None, f"{RATE_LIMIT_PREFIX}{rate_limit_detail}"
80
+ if result_error:
81
+ if _looks_rate_limited_text(result_error):
82
+ return None, f"{RATE_LIMIT_PREFIX}{result_error}"
83
+ return None, str(result_error)
84
+ if text:
85
+ return text, None
86
+ return None, "agent returned an empty response"
87
+
88
+
89
+ class ClaudeAdapter(AgentAdapter):
90
+ name = "claude"
91
+
92
+ async def one_shot(
93
+ self,
94
+ prompt: str,
95
+ system_prompt: str | None,
96
+ model: str,
97
+ thinking_budget: int = 0,
98
+ ) -> tuple[str | None, str | None]:
99
+ try:
100
+ from claude_agent_sdk import (
101
+ query,
102
+ ClaudeAgentOptions,
103
+ AssistantMessage,
104
+ RateLimitEvent,
105
+ ResultMessage,
106
+ TextBlock,
107
+ CLINotFoundError,
108
+ )
109
+ from claude_agent_sdk.types import (
110
+ ThinkingConfigDisabled,
111
+ )
112
+ except ImportError as e:
113
+ return None, (
114
+ "claude-agent-sdk is not installed. Run: pip install cat-agent "
115
+ f"(original error: {e})"
116
+ )
117
+
118
+ # Sealed session: fresh context, no tools, one turn, no user/project
119
+ # settings or CLAUDE.md files (running classify() from inside a repo
120
+ # must not inject that repo's instructions into classifications).
121
+ opts_kwargs = dict(
122
+ model=model,
123
+ allowed_tools=[],
124
+ max_turns=1,
125
+ setting_sources=[],
126
+ )
127
+ if system_prompt:
128
+ opts_kwargs["system_prompt"] = system_prompt
129
+
130
+ # Engine parity: the agent enables thinking by default (Phase-0
131
+ # probe), but cat-stack's default is thinking_budget=0 -> off.
132
+ # Positive budgets grade into the shared effort vocabulary.
133
+ if thinking_budget and thinking_budget > 0:
134
+ from catstack._providers import _thinking_budget_to_effort
135
+ opts_kwargs["effort"] = _thinking_budget_to_effort(thinking_budget)
136
+ else:
137
+ opts_kwargs["thinking"] = ThinkingConfigDisabled(type="disabled")
138
+
139
+ async def _run(options):
140
+ """Consume one query stream -> (text, result_error, rate_limit_detail)."""
141
+ text_parts = []
142
+ result_error = None
143
+ rate_limit_detail = None
144
+ async for message in query(prompt=prompt, options=options):
145
+ if isinstance(message, AssistantMessage):
146
+ for block in message.content:
147
+ if isinstance(block, TextBlock):
148
+ text_parts.append(block.text)
149
+ elif isinstance(message, RateLimitEvent):
150
+ detail = _rate_limit_detail(getattr(message, "rate_limit_info", None))
151
+ if detail:
152
+ rate_limit_detail = detail
153
+ elif isinstance(message, ResultMessage):
154
+ if _api_status_is_rate_limit(getattr(message, "api_error_status", None)):
155
+ rate_limit_detail = rate_limit_detail or "HTTP 429 (too many requests)"
156
+ if getattr(message, "is_error", False):
157
+ errs = getattr(message, "errors", None) or []
158
+ parts = [
159
+ str(p)
160
+ for p in ([getattr(message, "result", None)] + list(errs))
161
+ if p
162
+ ]
163
+ result_error = " ".join(parts) or "agent returned an error result"
164
+ return "".join(text_parts).strip(), result_error, rate_limit_detail
165
+
166
+ try:
167
+ return _finalize(*await _run(ClaudeAgentOptions(**opts_kwargs)))
168
+ except CLINotFoundError:
169
+ return None, (
170
+ "Claude CLI not found. Install it: https://code.claude.com/docs"
171
+ )
172
+ except Exception as e:
173
+ if _looks_rate_limited_text(e):
174
+ return None, f"{RATE_LIMIT_PREFIX}{e}"
175
+ # Thinking-config incompatibilities (e.g. models that reject an
176
+ # explicit disable) fall back to the agent default rather than
177
+ # failing the row.
178
+ if "thinking" in str(e).lower() and "thinking" in opts_kwargs:
179
+ opts_kwargs.pop("thinking", None)
180
+ try:
181
+ return _finalize(*await _run(ClaudeAgentOptions(**opts_kwargs)))
182
+ except Exception as e2:
183
+ if _looks_rate_limited_text(e2):
184
+ return None, f"{RATE_LIMIT_PREFIX}{e2}"
185
+ return None, f"claude adapter failed: {e2}"
186
+ return None, f"claude adapter failed: {e}"
catclaws/_backend.py ADDED
@@ -0,0 +1,31 @@
1
+ """Agent-agnostic plumbing: bounded-concurrency execution of sealed calls.
2
+
3
+ classify() is synchronous (matching the rest of the CatLLM ecosystem); the
4
+ SDKs are asyncio-native. This module owns that seam: build one coroutine per
5
+ row, run them all under a semaphore in a single event loop.
6
+ """
7
+
8
+ import asyncio
9
+
10
+
11
+ def gather_bounded(coro_fns, max_workers: int = 4):
12
+ """Run zero-arg coroutine factories with at most `max_workers` in flight.
13
+
14
+ Returns results in input order. Factory exceptions are captured and
15
+ returned in place of results (callers decide how to record the failure) —
16
+ one bad row must never abort the batch.
17
+ """
18
+
19
+ async def _run():
20
+ sem = asyncio.Semaphore(max(1, int(max_workers)))
21
+
22
+ async def _bounded(fn):
23
+ async with sem:
24
+ try:
25
+ return await fn()
26
+ except Exception as e:
27
+ return e
28
+
29
+ return await asyncio.gather(*[_bounded(fn) for fn in coro_fns])
30
+
31
+ return asyncio.run(_run())
catclaws/classify.py ADDED
@@ -0,0 +1,186 @@
1
+ """catclaws.classify() — one-row-at-a-time classification through an agent CLI.
2
+
3
+ Design (see MASTERPLAN.md):
4
+ - One row = one sealed, fresh-context agent call. Throughput comes from
5
+ bounded concurrency, never from shared conversations or corpus prompts.
6
+ - Prompts are cat-stack's validated classification prompt, byte-identical to
7
+ the API path (`build_text_classification_prompt`).
8
+ - The model answers in JSON (prompt-requested); parsing reuses cat-stack's
9
+ `extract_json` + `validate_classification_json`; output is the standard
10
+ wide 0/1 DataFrame.
11
+ """
12
+
13
+ import asyncio
14
+ import time
15
+
16
+ import pandas as pd
17
+
18
+ from ._adapters import get_adapter
19
+ from ._adapters.base import is_rate_limited, parse_reset_epoch
20
+ from ._backend import gather_bounded
21
+
22
+ # The system prompt is transport scaffolding (it replaces Claude Code's
23
+ # default agent persona), NOT part of the validated per-row instrument —
24
+ # the instrument travels entirely in the user prompt, as it does on the
25
+ # API path where no system message is sent for text classification.
26
+ _SYSTEM_PROMPT = (
27
+ "You are a text classification engine. Follow the user's instructions "
28
+ "exactly and reply with only what they ask for."
29
+ )
30
+
31
+ # First rate-limit backoff, in seconds (doubles each retry). Subscription
32
+ # usage windows are minutes-scale, so the wait starts coarse rather than at
33
+ # API-style sub-second values.
34
+ _RATE_LIMIT_BASE_DELAY = 30.0
35
+
36
+
37
+ def classify(
38
+ input_data,
39
+ categories,
40
+ user_model: str = "claude-sonnet-5",
41
+ agent: str = "claude",
42
+ description: str = "",
43
+ multi_label: bool = True,
44
+ thinking_budget: int = 0,
45
+ max_workers: int = 4,
46
+ json_retries: int = 2,
47
+ rate_limit_retries: int = 2,
48
+ ):
49
+ """Classify text rows into 0/1 category indicators via an agent CLI.
50
+
51
+ Runs on the agent's subscription login (no API key). Same prompt, same
52
+ JSON contract, and same output schema as ``catstack.classify()``.
53
+
54
+ Args:
55
+ input_data: list of text rows (or pandas Series).
56
+ categories: list of category names.
57
+ user_model: model the agent should use (e.g. "claude-sonnet-5").
58
+ agent: which agent CLI answers ("claude"; "codex" planned).
59
+ description: context about the data (survey question etc.) — feeds
60
+ the same "Context:" line as the API path.
61
+ multi_label: multiple categories per row (default) vs single best.
62
+ thinking_budget: cat-stack semantics — 0 disables reasoning (default,
63
+ engine parity), >0 grades into the agent's effort vocabulary.
64
+ max_workers: concurrent sealed calls in flight.
65
+ json_retries: re-asks per row when the reply isn't valid JSON.
66
+ rate_limit_retries: on a rate-limited row, how many times to back off
67
+ (exponential from 30s) and retry before giving up. Consumed before
68
+ json_retries; set 0 to fail fast on limits. Other in-flight rows
69
+ are unaffected while one row waits.
70
+
71
+ Returns:
72
+ pandas.DataFrame with input_data, processing_status, and one 0/1
73
+ category_N column per category (same schema as catstack.classify()).
74
+ """
75
+ from catstack.text_functions_ensemble import build_text_classification_prompt
76
+ from catstack import extract_json
77
+ from catstack._utils import validate_classification_json
78
+
79
+ rows = list(input_data)
80
+ if not rows:
81
+ raise ValueError("input_data is empty")
82
+ categories = list(categories)
83
+ if not categories:
84
+ raise ValueError("categories is empty")
85
+
86
+ adapter = get_adapter(agent)
87
+
88
+ # Same prompt components as the engine builds them.
89
+ categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
90
+ survey_question_context = f"Context: {description}." if description else ""
91
+
92
+ def _row_prompt(text):
93
+ messages = build_text_classification_prompt(
94
+ response_text=text if text is not None else "",
95
+ categories_str=categories_str,
96
+ survey_question_context=survey_question_context,
97
+ multi_label=multi_label,
98
+ )
99
+ return messages[-1]["content"]
100
+
101
+ n_cats = len(categories)
102
+
103
+ # Total seconds our backoff schedule (30s, doubling) can bridge. A limit
104
+ # that resets beyond this can't be waited out here, so retrying is futile.
105
+ _backoff_budget = sum(
106
+ _RATE_LIMIT_BASE_DELAY * (2 ** k)
107
+ for k in range(max(0, int(rate_limit_retries)))
108
+ )
109
+
110
+ def _success(values):
111
+ return {
112
+ "status": "success",
113
+ "indicators": [
114
+ 1 if str(values.get(str(i + 1), "0")) == "1" else 0
115
+ for i in range(n_cats)
116
+ ],
117
+ }
118
+
119
+ async def _classify_row(text):
120
+ """One row: sealed call -> parse -> validate.
121
+
122
+ Two independent retry budgets. A rate-limited reply spends a
123
+ `rate_limit_retries` slot: back off (exponential from 30s) and re-ask,
124
+ without touching json_retries — re-asking a limit immediately would
125
+ just hit it again. The `await asyncio.sleep` yields the event loop, so
126
+ other in-flight rows keep going while this one waits. Any other
127
+ malformed/failed reply spends a `json_retries` slot and re-asks now.
128
+ """
129
+ prompt = _row_prompt(text)
130
+ last_error = "unknown error"
131
+ rl_retries_left = max(0, int(rate_limit_retries))
132
+ json_retries_left = max(0, int(json_retries))
133
+ delay = _RATE_LIMIT_BASE_DELAY
134
+ while True:
135
+ reply, error = await adapter.one_shot(
136
+ prompt,
137
+ system_prompt=_SYSTEM_PROMPT,
138
+ model=user_model,
139
+ thinking_budget=thinking_budget,
140
+ )
141
+ if error and is_rate_limited(error):
142
+ # A hard cap resetting beyond our backoff budget won't clear by
143
+ # retrying — fail fast with the resumable message rather than
144
+ # sleeping through futile re-asks (learned from a live
145
+ # five_hour-window rejection). Unknown/near resets still back off.
146
+ reset = parse_reset_epoch(error)
147
+ futile = reset is not None and (reset - time.time()) > _backoff_budget
148
+ if rl_retries_left > 0 and not futile:
149
+ rl_retries_left -= 1
150
+ await asyncio.sleep(delay)
151
+ delay *= 2
152
+ continue
153
+ # Backoff exhausted or futile — terminal for this row.
154
+ return {"status": f"error: {error}", "indicators": [None] * n_cats}
155
+ if error:
156
+ last_error = error
157
+ else:
158
+ parsed = extract_json(reply)
159
+ ok, values = (False, None)
160
+ if parsed:
161
+ ok, values = validate_classification_json(parsed, n_cats)
162
+ if ok:
163
+ return _success(values)
164
+ last_error = f"invalid classification JSON in reply: {reply[:120]!r}"
165
+ if json_retries_left > 0:
166
+ json_retries_left -= 1
167
+ continue
168
+ return {"status": f"error: {last_error}", "indicators": [None] * n_cats}
169
+
170
+ results = gather_bounded(
171
+ [lambda t=t: _classify_row(t) for t in rows], max_workers=max_workers
172
+ )
173
+
174
+ out = {"input_data": rows, "processing_status": []}
175
+ for i in range(n_cats):
176
+ out[f"category_{i + 1}"] = []
177
+ for res in results:
178
+ if isinstance(res, Exception):
179
+ res = {"status": f"error: {res}", "indicators": [None] * n_cats}
180
+ out["processing_status"].append(
181
+ "success" if res["status"] == "success" else res["status"]
182
+ )
183
+ for i in range(n_cats):
184
+ out[f"category_{i + 1}"].append(res["indicators"][i])
185
+
186
+ return pd.DataFrame(out)