cat-claws 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cat_claws-0.1.0.dist-info/METADATA +62 -0
- cat_claws-0.1.0.dist-info/RECORD +10 -0
- cat_claws-0.1.0.dist-info/WHEEL +4 -0
- catclaws/__about__.py +6 -0
- catclaws/__init__.py +13 -0
- catclaws/_adapters/__init__.py +16 -0
- catclaws/_adapters/base.py +70 -0
- catclaws/_adapters/claude.py +186 -0
- catclaws/_backend.py +31 -0
- catclaws/classify.py +186 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cat-claws
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Claude Agent SDK backend for the CatLLM ecosystem — classify text through a Claude subscription instead of per-token API billing.
|
|
5
|
+
Project-URL: Source, https://github.com/chrissoria/cat-agent
|
|
6
|
+
Author-email: Chris Soria <chrissoria@berkeley.edu>
|
|
7
|
+
License-Expression: GPL-3.0-or-later
|
|
8
|
+
Keywords: agent-sdk,classification,claude,llm,survey
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Requires-Dist: cat-stack>=2.0.1
|
|
16
|
+
Requires-Dist: claude-agent-sdk>=0.1.0
|
|
17
|
+
Requires-Dist: pandas
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# cat-claws
|
|
21
|
+
|
|
22
|
+
Agent-CLI backend for the [CatLLM ecosystem](https://github.com/chrissoria/cat-llm):
|
|
23
|
+
classify text through a **Claude subscription** (via the Claude Agent SDK)
|
|
24
|
+
instead of per-token API billing. An OpenAI Codex adapter is planned.
|
|
25
|
+
|
|
26
|
+
*(Distribution name `cat-claws`; imports as `catclaws`. Source repo:
|
|
27
|
+
[cat-agent](https://github.com/chrissoria/cat-agent).)*
|
|
28
|
+
|
|
29
|
+
**Status: alpha, under active development.** See `MASTERPLAN.md` for the
|
|
30
|
+
design and step tracker.
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install cat-claws
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Design in one paragraph
|
|
39
|
+
|
|
40
|
+
One row = one sealed, fresh-context agent call (no tools, single turn, no
|
|
41
|
+
settings/CLAUDE.md loading), using cat-stack's validated classification
|
|
42
|
+
prompt byte-for-byte. The model answers in JSON; parsing and the wide 0/1
|
|
43
|
+
output matrix reuse cat-stack's existing machinery. Throughput comes from
|
|
44
|
+
concurrent one-shot calls, never from shared conversations or
|
|
45
|
+
corpus-in-one-prompt (which would contaminate rows and break research
|
|
46
|
+
validity).
|
|
47
|
+
|
|
48
|
+
## Quick start (Phase 1)
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import catclaws
|
|
52
|
+
|
|
53
|
+
df = catclaws.classify(
|
|
54
|
+
input_data=["I moved for a new job", "Rent got too expensive"],
|
|
55
|
+
categories=["Employment", "Cost of living", "Other"],
|
|
56
|
+
user_model="claude-sonnet-5", # any model your Claude login can use
|
|
57
|
+
description="Why did you move?",
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Requires [Claude Code](https://code.claude.com/docs) installed and logged in
|
|
62
|
+
(`claude` on PATH). No API key needed.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
catclaws/__about__.py,sha256=h8ok3JwGMDibSgYDrKRLB7yD8sS3B9k57p9aWIfdaFU,245
|
|
2
|
+
catclaws/__init__.py,sha256=vQf1SLJGPQaMNylx-z7289bYAl3RLqoZ5w8Wf9-tQOI,440
|
|
3
|
+
catclaws/_backend.py,sha256=M_xovqZqihUC0n7w3VwMNZwLcsYM_ZaH2ZS05aOiDkE,1019
|
|
4
|
+
catclaws/classify.py,sha256=msWqW5tAi2WhAu20okAwyKJ1x2il-dqJDHTgKloQulE,7739
|
|
5
|
+
catclaws/_adapters/__init__.py,sha256=6vsSnIcnPlkbJ9yBAXRE7biP_2bKo4RBwUtreROecow,391
|
|
6
|
+
catclaws/_adapters/base.py,sha256=t8FUr1SmTPTYOelTDTunz3tdSMODA29Kl0Kf6MV9az8,2785
|
|
7
|
+
catclaws/_adapters/claude.py,sha256=aOxz51cP7XDLVnEy3Bb7qbhZmGA4kaMNdJVfYxl2Hrw,7748
|
|
8
|
+
cat_claws-0.1.0.dist-info/METADATA,sha256=zggHjN0phVEs-8-UXY2Pf4zg9xYVjJYk5TalLErL32w,2214
|
|
9
|
+
cat_claws-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
10
|
+
cat_claws-0.1.0.dist-info/RECORD,,
|
catclaws/__about__.py
ADDED
catclaws/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
+
"""cat-claws — agent-CLI backend for the CatLLM ecosystem.
|
|
5
|
+
|
|
6
|
+
Classify text through a Claude subscription (Claude Agent SDK) instead of
|
|
7
|
+
per-token API billing. See MASTERPLAN.md for design and roadmap.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .__about__ import __version__
|
|
11
|
+
from .classify import classify
|
|
12
|
+
|
|
13
|
+
__all__ = ["classify", "__version__"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .base import AgentAdapter
|
|
2
|
+
from .claude import ClaudeAdapter
|
|
3
|
+
|
|
4
|
+
# Adapter registry — Codex joins here in a later phase.
|
|
5
|
+
ADAPTERS = {
|
|
6
|
+
"claude": ClaudeAdapter,
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_adapter(name: str) -> AgentAdapter:
|
|
11
|
+
try:
|
|
12
|
+
return ADAPTERS[name]()
|
|
13
|
+
except KeyError:
|
|
14
|
+
raise ValueError(
|
|
15
|
+
f"Unknown agent {name!r}. Available: {sorted(ADAPTERS)}"
|
|
16
|
+
) from None
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""AgentAdapter contract.
|
|
2
|
+
|
|
3
|
+
An adapter turns ONE prompt into ONE answer through an agent CLI, with
|
|
4
|
+
sealed-session semantics baked into the contract:
|
|
5
|
+
|
|
6
|
+
- fresh context per call (no conversation shared across rows),
|
|
7
|
+
- no tools, single turn, no user/project settings or memory files loaded,
|
|
8
|
+
- a custom system prompt replacing the agent's own scaffolding.
|
|
9
|
+
|
|
10
|
+
Everything above the adapter (prompt building, JSON parsing, the output
|
|
11
|
+
matrix, concurrency) is agent-agnostic. The Claude adapter is the first
|
|
12
|
+
implementation; an OpenAI Codex adapter (`codex exec`) is planned against
|
|
13
|
+
this same contract.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
|
|
18
|
+
# Part of the adapter contract: when a call fails specifically because the
|
|
19
|
+
# subscription hit a usage/rate limit, the returned error string starts with
|
|
20
|
+
# this prefix. classify() keys on it to apply backoff (a plain error does
|
|
21
|
+
# not retry with backoff). Every adapter must use this prefix for rate-limit
|
|
22
|
+
# failures so the backoff logic stays agent-agnostic.
|
|
23
|
+
RATE_LIMIT_PREFIX = "rate-limited: "
|
|
24
|
+
|
|
25
|
+
# When an adapter knows when the limit resets, it appends this suffix so the
|
|
26
|
+
# caller can tell a transient throttle (retry) from a hard cap hours away
|
|
27
|
+
# (futile to retry — fail fast with a resumable message). Optional: adapters
|
|
28
|
+
# without a reset time simply omit it, and the caller falls back to backoff.
|
|
29
|
+
_RESET_EPOCH_RE = re.compile(r"resets at epoch (\d+)")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_rate_limited(error: str | None) -> bool:
|
|
33
|
+
"""True if an adapter error string signals a subscription rate limit."""
|
|
34
|
+
return bool(error) and error.startswith(RATE_LIMIT_PREFIX)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def parse_reset_epoch(error: str | None) -> int | None:
|
|
38
|
+
"""Unix epoch when the limit resets, if the adapter included one.
|
|
39
|
+
|
|
40
|
+
Matches the ``(resets at epoch N)`` suffix the Claude adapter emits from
|
|
41
|
+
the SDK's ``RateLimitInfo.resets_at``. Returns None when absent (unknown
|
|
42
|
+
reset -> caller should fall back to bounded backoff)."""
|
|
43
|
+
if not error:
|
|
44
|
+
return None
|
|
45
|
+
m = _RESET_EPOCH_RE.search(error)
|
|
46
|
+
return int(m.group(1)) if m else None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class AgentAdapter:
|
|
50
|
+
"""One sealed agent call. Implementations are stateless."""
|
|
51
|
+
|
|
52
|
+
name: str = "base"
|
|
53
|
+
|
|
54
|
+
async def one_shot(
|
|
55
|
+
self,
|
|
56
|
+
prompt: str,
|
|
57
|
+
system_prompt: str | None,
|
|
58
|
+
model: str,
|
|
59
|
+
thinking_budget: int = 0,
|
|
60
|
+
) -> tuple[str | None, str | None]:
|
|
61
|
+
"""Run one sealed call; return (text, error) — exactly one is None.
|
|
62
|
+
|
|
63
|
+
thinking_budget follows cat-stack semantics: 0 disables reasoning,
|
|
64
|
+
>0 grades into the provider's effort vocabulary.
|
|
65
|
+
|
|
66
|
+
Rate-limit failures must return an error string prefixed with
|
|
67
|
+
``RATE_LIMIT_PREFIX`` so the caller can back off (see contract note
|
|
68
|
+
above). All other failures return an ordinary error string.
|
|
69
|
+
"""
|
|
70
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Claude adapter — claude-agent-sdk implementation of AgentAdapter.
|
|
2
|
+
|
|
3
|
+
Requires Claude Code installed and logged in (`claude` on PATH). Calls run
|
|
4
|
+
through the user's Claude subscription, not an API key.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .base import RATE_LIMIT_PREFIX, AgentAdapter
|
|
8
|
+
|
|
9
|
+
# --- rate-limit detection (pure helpers, unit-tested in tests/test_rate_limit.py) ---
|
|
10
|
+
#
|
|
11
|
+
# The SDK surfaces subscription limits three ways (verified on sdk 0.2.110):
|
|
12
|
+
# 1. a `RateLimitEvent` in the message stream, carrying a `RateLimitInfo`
|
|
13
|
+
# whose `status`/`overage_status` is one of 'allowed'|'allowed_warning'|
|
|
14
|
+
# 'rejected' — only 'rejected' is an actual block;
|
|
15
|
+
# 2. a `ResultMessage.api_error_status` of 429 (too many requests);
|
|
16
|
+
# 3. as a last resort, rate-limit wording in an error/result string.
|
|
17
|
+
# All three funnel into the RATE_LIMIT_PREFIX error so classify() backs off.
|
|
18
|
+
|
|
19
|
+
_RATE_LIMIT_TEXT_MARKERS = (
|
|
20
|
+
"rate limit",
|
|
21
|
+
"rate-limit",
|
|
22
|
+
"rate_limit",
|
|
23
|
+
"usage limit",
|
|
24
|
+
"too many requests",
|
|
25
|
+
"quota exceeded",
|
|
26
|
+
"429",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _looks_rate_limited_text(text) -> bool:
|
|
31
|
+
"""Text fallback: does an error/result string read like a rate limit?"""
|
|
32
|
+
if not text:
|
|
33
|
+
return False
|
|
34
|
+
low = str(text).lower()
|
|
35
|
+
return any(marker in low for marker in _RATE_LIMIT_TEXT_MARKERS)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _rate_limit_detail(info) -> str | None:
|
|
39
|
+
"""Human detail if the PRIMARY usage window is exhausted, else None.
|
|
40
|
+
|
|
41
|
+
Only ``status == "rejected"`` blocks the current request. 'allowed' and
|
|
42
|
+
'allowed_warning' both let it through (the latter just warns the cap is
|
|
43
|
+
near). Crucially, ``overage_status`` is NOT a per-request block: a common
|
|
44
|
+
steady state is ``overage_status='rejected'`` with
|
|
45
|
+
``overage_disabled_reason='org_level_disabled'`` — the org simply turned
|
|
46
|
+
off spillover billing — while ``status='allowed'`` and the call succeeds.
|
|
47
|
+
Treating overage rejection as a limit falsely fails every call on such
|
|
48
|
+
accounts (verified live 2026-07-03), so it is deliberately ignored here.
|
|
49
|
+
"""
|
|
50
|
+
if info is None:
|
|
51
|
+
return None
|
|
52
|
+
if getattr(info, "status", None) != "rejected":
|
|
53
|
+
return None
|
|
54
|
+
rtype = getattr(info, "rate_limit_type", None) or "usage"
|
|
55
|
+
resets = getattr(info, "resets_at", None)
|
|
56
|
+
detail = f"{rtype} limit reached"
|
|
57
|
+
if resets:
|
|
58
|
+
detail += f" (resets at epoch {resets})"
|
|
59
|
+
return detail
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _api_status_is_rate_limit(status) -> bool:
|
|
63
|
+
"""HTTP 429 (too many requests) is the rate-limit status."""
|
|
64
|
+
return status == 429
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _finalize(text, result_error, rate_limit_detail):
|
|
68
|
+
"""Turn collected stream state into the (text, error) contract result.
|
|
69
|
+
|
|
70
|
+
A real answer always wins: RateLimitEvents are emitted on SUCCESSFUL calls
|
|
71
|
+
too (they report current utilization), so a present answer with no error
|
|
72
|
+
means the request went through — never discard it for an informational
|
|
73
|
+
limit event. Only when there's no answer do limits (retryable via backoff)
|
|
74
|
+
win over ordinary errors.
|
|
75
|
+
"""
|
|
76
|
+
if text and not result_error:
|
|
77
|
+
return text, None
|
|
78
|
+
if rate_limit_detail:
|
|
79
|
+
return None, f"{RATE_LIMIT_PREFIX}{rate_limit_detail}"
|
|
80
|
+
if result_error:
|
|
81
|
+
if _looks_rate_limited_text(result_error):
|
|
82
|
+
return None, f"{RATE_LIMIT_PREFIX}{result_error}"
|
|
83
|
+
return None, str(result_error)
|
|
84
|
+
if text:
|
|
85
|
+
return text, None
|
|
86
|
+
return None, "agent returned an empty response"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ClaudeAdapter(AgentAdapter):
|
|
90
|
+
name = "claude"
|
|
91
|
+
|
|
92
|
+
async def one_shot(
|
|
93
|
+
self,
|
|
94
|
+
prompt: str,
|
|
95
|
+
system_prompt: str | None,
|
|
96
|
+
model: str,
|
|
97
|
+
thinking_budget: int = 0,
|
|
98
|
+
) -> tuple[str | None, str | None]:
|
|
99
|
+
try:
|
|
100
|
+
from claude_agent_sdk import (
|
|
101
|
+
query,
|
|
102
|
+
ClaudeAgentOptions,
|
|
103
|
+
AssistantMessage,
|
|
104
|
+
RateLimitEvent,
|
|
105
|
+
ResultMessage,
|
|
106
|
+
TextBlock,
|
|
107
|
+
CLINotFoundError,
|
|
108
|
+
)
|
|
109
|
+
from claude_agent_sdk.types import (
|
|
110
|
+
ThinkingConfigDisabled,
|
|
111
|
+
)
|
|
112
|
+
except ImportError as e:
|
|
113
|
+
return None, (
|
|
114
|
+
"claude-agent-sdk is not installed. Run: pip install cat-agent "
|
|
115
|
+
f"(original error: {e})"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Sealed session: fresh context, no tools, one turn, no user/project
|
|
119
|
+
# settings or CLAUDE.md files (running classify() from inside a repo
|
|
120
|
+
# must not inject that repo's instructions into classifications).
|
|
121
|
+
opts_kwargs = dict(
|
|
122
|
+
model=model,
|
|
123
|
+
allowed_tools=[],
|
|
124
|
+
max_turns=1,
|
|
125
|
+
setting_sources=[],
|
|
126
|
+
)
|
|
127
|
+
if system_prompt:
|
|
128
|
+
opts_kwargs["system_prompt"] = system_prompt
|
|
129
|
+
|
|
130
|
+
# Engine parity: the agent enables thinking by default (Phase-0
|
|
131
|
+
# probe), but cat-stack's default is thinking_budget=0 -> off.
|
|
132
|
+
# Positive budgets grade into the shared effort vocabulary.
|
|
133
|
+
if thinking_budget and thinking_budget > 0:
|
|
134
|
+
from catstack._providers import _thinking_budget_to_effort
|
|
135
|
+
opts_kwargs["effort"] = _thinking_budget_to_effort(thinking_budget)
|
|
136
|
+
else:
|
|
137
|
+
opts_kwargs["thinking"] = ThinkingConfigDisabled(type="disabled")
|
|
138
|
+
|
|
139
|
+
async def _run(options):
|
|
140
|
+
"""Consume one query stream -> (text, result_error, rate_limit_detail)."""
|
|
141
|
+
text_parts = []
|
|
142
|
+
result_error = None
|
|
143
|
+
rate_limit_detail = None
|
|
144
|
+
async for message in query(prompt=prompt, options=options):
|
|
145
|
+
if isinstance(message, AssistantMessage):
|
|
146
|
+
for block in message.content:
|
|
147
|
+
if isinstance(block, TextBlock):
|
|
148
|
+
text_parts.append(block.text)
|
|
149
|
+
elif isinstance(message, RateLimitEvent):
|
|
150
|
+
detail = _rate_limit_detail(getattr(message, "rate_limit_info", None))
|
|
151
|
+
if detail:
|
|
152
|
+
rate_limit_detail = detail
|
|
153
|
+
elif isinstance(message, ResultMessage):
|
|
154
|
+
if _api_status_is_rate_limit(getattr(message, "api_error_status", None)):
|
|
155
|
+
rate_limit_detail = rate_limit_detail or "HTTP 429 (too many requests)"
|
|
156
|
+
if getattr(message, "is_error", False):
|
|
157
|
+
errs = getattr(message, "errors", None) or []
|
|
158
|
+
parts = [
|
|
159
|
+
str(p)
|
|
160
|
+
for p in ([getattr(message, "result", None)] + list(errs))
|
|
161
|
+
if p
|
|
162
|
+
]
|
|
163
|
+
result_error = " ".join(parts) or "agent returned an error result"
|
|
164
|
+
return "".join(text_parts).strip(), result_error, rate_limit_detail
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
return _finalize(*await _run(ClaudeAgentOptions(**opts_kwargs)))
|
|
168
|
+
except CLINotFoundError:
|
|
169
|
+
return None, (
|
|
170
|
+
"Claude CLI not found. Install it: https://code.claude.com/docs"
|
|
171
|
+
)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
if _looks_rate_limited_text(e):
|
|
174
|
+
return None, f"{RATE_LIMIT_PREFIX}{e}"
|
|
175
|
+
# Thinking-config incompatibilities (e.g. models that reject an
|
|
176
|
+
# explicit disable) fall back to the agent default rather than
|
|
177
|
+
# failing the row.
|
|
178
|
+
if "thinking" in str(e).lower() and "thinking" in opts_kwargs:
|
|
179
|
+
opts_kwargs.pop("thinking", None)
|
|
180
|
+
try:
|
|
181
|
+
return _finalize(*await _run(ClaudeAgentOptions(**opts_kwargs)))
|
|
182
|
+
except Exception as e2:
|
|
183
|
+
if _looks_rate_limited_text(e2):
|
|
184
|
+
return None, f"{RATE_LIMIT_PREFIX}{e2}"
|
|
185
|
+
return None, f"claude adapter failed: {e2}"
|
|
186
|
+
return None, f"claude adapter failed: {e}"
|
catclaws/_backend.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Agent-agnostic plumbing: bounded-concurrency execution of sealed calls.
|
|
2
|
+
|
|
3
|
+
classify() is synchronous (matching the rest of the CatLLM ecosystem); the
|
|
4
|
+
SDKs are asyncio-native. This module owns that seam: build one coroutine per
|
|
5
|
+
row, run them all under a semaphore in a single event loop.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def gather_bounded(coro_fns, max_workers: int = 4):
|
|
12
|
+
"""Run zero-arg coroutine factories with at most `max_workers` in flight.
|
|
13
|
+
|
|
14
|
+
Returns results in input order. Factory exceptions are captured and
|
|
15
|
+
returned in place of results (callers decide how to record the failure) —
|
|
16
|
+
one bad row must never abort the batch.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
async def _run():
|
|
20
|
+
sem = asyncio.Semaphore(max(1, int(max_workers)))
|
|
21
|
+
|
|
22
|
+
async def _bounded(fn):
|
|
23
|
+
async with sem:
|
|
24
|
+
try:
|
|
25
|
+
return await fn()
|
|
26
|
+
except Exception as e:
|
|
27
|
+
return e
|
|
28
|
+
|
|
29
|
+
return await asyncio.gather(*[_bounded(fn) for fn in coro_fns])
|
|
30
|
+
|
|
31
|
+
return asyncio.run(_run())
|
catclaws/classify.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""catclaws.classify() — one-row-at-a-time classification through an agent CLI.
|
|
2
|
+
|
|
3
|
+
Design (see MASTERPLAN.md):
|
|
4
|
+
- One row = one sealed, fresh-context agent call. Throughput comes from
|
|
5
|
+
bounded concurrency, never from shared conversations or corpus prompts.
|
|
6
|
+
- Prompts are cat-stack's validated classification prompt, byte-identical to
|
|
7
|
+
the API path (`build_text_classification_prompt`).
|
|
8
|
+
- The model answers in JSON (prompt-requested); parsing reuses cat-stack's
|
|
9
|
+
`extract_json` + `validate_classification_json`; output is the standard
|
|
10
|
+
wide 0/1 DataFrame.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import time
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ._adapters import get_adapter
|
|
19
|
+
from ._adapters.base import is_rate_limited, parse_reset_epoch
|
|
20
|
+
from ._backend import gather_bounded
|
|
21
|
+
|
|
22
|
+
# The system prompt is transport scaffolding (it replaces Claude Code's
|
|
23
|
+
# default agent persona), NOT part of the validated per-row instrument —
|
|
24
|
+
# the instrument travels entirely in the user prompt, as it does on the
|
|
25
|
+
# API path where no system message is sent for text classification.
|
|
26
|
+
_SYSTEM_PROMPT = (
|
|
27
|
+
"You are a text classification engine. Follow the user's instructions "
|
|
28
|
+
"exactly and reply with only what they ask for."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# First rate-limit backoff, in seconds (doubles each retry). Subscription
|
|
32
|
+
# usage windows are minutes-scale, so the wait starts coarse rather than at
|
|
33
|
+
# API-style sub-second values.
|
|
34
|
+
_RATE_LIMIT_BASE_DELAY = 30.0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def classify(
|
|
38
|
+
input_data,
|
|
39
|
+
categories,
|
|
40
|
+
user_model: str = "claude-sonnet-5",
|
|
41
|
+
agent: str = "claude",
|
|
42
|
+
description: str = "",
|
|
43
|
+
multi_label: bool = True,
|
|
44
|
+
thinking_budget: int = 0,
|
|
45
|
+
max_workers: int = 4,
|
|
46
|
+
json_retries: int = 2,
|
|
47
|
+
rate_limit_retries: int = 2,
|
|
48
|
+
):
|
|
49
|
+
"""Classify text rows into 0/1 category indicators via an agent CLI.
|
|
50
|
+
|
|
51
|
+
Runs on the agent's subscription login (no API key). Same prompt, same
|
|
52
|
+
JSON contract, and same output schema as ``catstack.classify()``.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
input_data: list of text rows (or pandas Series).
|
|
56
|
+
categories: list of category names.
|
|
57
|
+
user_model: model the agent should use (e.g. "claude-sonnet-5").
|
|
58
|
+
agent: which agent CLI answers ("claude"; "codex" planned).
|
|
59
|
+
description: context about the data (survey question etc.) — feeds
|
|
60
|
+
the same "Context:" line as the API path.
|
|
61
|
+
multi_label: multiple categories per row (default) vs single best.
|
|
62
|
+
thinking_budget: cat-stack semantics — 0 disables reasoning (default,
|
|
63
|
+
engine parity), >0 grades into the agent's effort vocabulary.
|
|
64
|
+
max_workers: concurrent sealed calls in flight.
|
|
65
|
+
json_retries: re-asks per row when the reply isn't valid JSON.
|
|
66
|
+
rate_limit_retries: on a rate-limited row, how many times to back off
|
|
67
|
+
(exponential from 30s) and retry before giving up. Consumed before
|
|
68
|
+
json_retries; set 0 to fail fast on limits. Other in-flight rows
|
|
69
|
+
are unaffected while one row waits.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
pandas.DataFrame with input_data, processing_status, and one 0/1
|
|
73
|
+
category_N column per category (same schema as catstack.classify()).
|
|
74
|
+
"""
|
|
75
|
+
from catstack.text_functions_ensemble import build_text_classification_prompt
|
|
76
|
+
from catstack import extract_json
|
|
77
|
+
from catstack._utils import validate_classification_json
|
|
78
|
+
|
|
79
|
+
rows = list(input_data)
|
|
80
|
+
if not rows:
|
|
81
|
+
raise ValueError("input_data is empty")
|
|
82
|
+
categories = list(categories)
|
|
83
|
+
if not categories:
|
|
84
|
+
raise ValueError("categories is empty")
|
|
85
|
+
|
|
86
|
+
adapter = get_adapter(agent)
|
|
87
|
+
|
|
88
|
+
# Same prompt components as the engine builds them.
|
|
89
|
+
categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
|
|
90
|
+
survey_question_context = f"Context: {description}." if description else ""
|
|
91
|
+
|
|
92
|
+
def _row_prompt(text):
|
|
93
|
+
messages = build_text_classification_prompt(
|
|
94
|
+
response_text=text if text is not None else "",
|
|
95
|
+
categories_str=categories_str,
|
|
96
|
+
survey_question_context=survey_question_context,
|
|
97
|
+
multi_label=multi_label,
|
|
98
|
+
)
|
|
99
|
+
return messages[-1]["content"]
|
|
100
|
+
|
|
101
|
+
n_cats = len(categories)
|
|
102
|
+
|
|
103
|
+
# Total seconds our backoff schedule (30s, doubling) can bridge. A limit
|
|
104
|
+
# that resets beyond this can't be waited out here, so retrying is futile.
|
|
105
|
+
_backoff_budget = sum(
|
|
106
|
+
_RATE_LIMIT_BASE_DELAY * (2 ** k)
|
|
107
|
+
for k in range(max(0, int(rate_limit_retries)))
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def _success(values):
|
|
111
|
+
return {
|
|
112
|
+
"status": "success",
|
|
113
|
+
"indicators": [
|
|
114
|
+
1 if str(values.get(str(i + 1), "0")) == "1" else 0
|
|
115
|
+
for i in range(n_cats)
|
|
116
|
+
],
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async def _classify_row(text):
|
|
120
|
+
"""One row: sealed call -> parse -> validate.
|
|
121
|
+
|
|
122
|
+
Two independent retry budgets. A rate-limited reply spends a
|
|
123
|
+
`rate_limit_retries` slot: back off (exponential from 30s) and re-ask,
|
|
124
|
+
without touching json_retries — re-asking a limit immediately would
|
|
125
|
+
just hit it again. The `await asyncio.sleep` yields the event loop, so
|
|
126
|
+
other in-flight rows keep going while this one waits. Any other
|
|
127
|
+
malformed/failed reply spends a `json_retries` slot and re-asks now.
|
|
128
|
+
"""
|
|
129
|
+
prompt = _row_prompt(text)
|
|
130
|
+
last_error = "unknown error"
|
|
131
|
+
rl_retries_left = max(0, int(rate_limit_retries))
|
|
132
|
+
json_retries_left = max(0, int(json_retries))
|
|
133
|
+
delay = _RATE_LIMIT_BASE_DELAY
|
|
134
|
+
while True:
|
|
135
|
+
reply, error = await adapter.one_shot(
|
|
136
|
+
prompt,
|
|
137
|
+
system_prompt=_SYSTEM_PROMPT,
|
|
138
|
+
model=user_model,
|
|
139
|
+
thinking_budget=thinking_budget,
|
|
140
|
+
)
|
|
141
|
+
if error and is_rate_limited(error):
|
|
142
|
+
# A hard cap resetting beyond our backoff budget won't clear by
|
|
143
|
+
# retrying — fail fast with the resumable message rather than
|
|
144
|
+
# sleeping through futile re-asks (learned from a live
|
|
145
|
+
# five_hour-window rejection). Unknown/near resets still back off.
|
|
146
|
+
reset = parse_reset_epoch(error)
|
|
147
|
+
futile = reset is not None and (reset - time.time()) > _backoff_budget
|
|
148
|
+
if rl_retries_left > 0 and not futile:
|
|
149
|
+
rl_retries_left -= 1
|
|
150
|
+
await asyncio.sleep(delay)
|
|
151
|
+
delay *= 2
|
|
152
|
+
continue
|
|
153
|
+
# Backoff exhausted or futile — terminal for this row.
|
|
154
|
+
return {"status": f"error: {error}", "indicators": [None] * n_cats}
|
|
155
|
+
if error:
|
|
156
|
+
last_error = error
|
|
157
|
+
else:
|
|
158
|
+
parsed = extract_json(reply)
|
|
159
|
+
ok, values = (False, None)
|
|
160
|
+
if parsed:
|
|
161
|
+
ok, values = validate_classification_json(parsed, n_cats)
|
|
162
|
+
if ok:
|
|
163
|
+
return _success(values)
|
|
164
|
+
last_error = f"invalid classification JSON in reply: {reply[:120]!r}"
|
|
165
|
+
if json_retries_left > 0:
|
|
166
|
+
json_retries_left -= 1
|
|
167
|
+
continue
|
|
168
|
+
return {"status": f"error: {last_error}", "indicators": [None] * n_cats}
|
|
169
|
+
|
|
170
|
+
results = gather_bounded(
|
|
171
|
+
[lambda t=t: _classify_row(t) for t in rows], max_workers=max_workers
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
out = {"input_data": rows, "processing_status": []}
|
|
175
|
+
for i in range(n_cats):
|
|
176
|
+
out[f"category_{i + 1}"] = []
|
|
177
|
+
for res in results:
|
|
178
|
+
if isinstance(res, Exception):
|
|
179
|
+
res = {"status": f"error: {res}", "indicators": [None] * n_cats}
|
|
180
|
+
out["processing_status"].append(
|
|
181
|
+
"success" if res["status"] == "success" else res["status"]
|
|
182
|
+
)
|
|
183
|
+
for i in range(n_cats):
|
|
184
|
+
out[f"category_{i + 1}"].append(res["indicators"][i])
|
|
185
|
+
|
|
186
|
+
return pd.DataFrame(out)
|