cat-stack 1.6.2__tar.gz → 1.6.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-1.6.2 → cat_stack-1.6.4}/PKG-INFO +11 -2
- {cat_stack-1.6.2 → cat_stack-1.6.4}/README.md +10 -1
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/__about__.py +1 -1
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_batch.py +74 -20
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_providers.py +6 -2
- {cat_stack-1.6.2 → cat_stack-1.6.4}/.gitignore +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/LICENSE +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/pyproject.toml +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/cat_stack/__init__.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/__init__.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_category_analysis.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_chunked.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_embeddings.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_formatter.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_pilot_test.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_prompts.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_review_ui.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_tiebreaker.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_utils.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_web_fetch.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/_wrapper_helpers.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/CoVe.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/__init__.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/image_CoVe.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/image_stepback.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/pdf_CoVe.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/pdf_stepback.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/stepback.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/calls/top_n.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/classify.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/explore.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/extract.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/image_functions.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/circle.png +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/cube.png +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/diamond.png +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/overlapping_pentagons.png +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/images/rectangles.png +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/model_reference_list.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/pdf_functions.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/prompt_tune.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/summarize.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/text_functions.py +0 -0
- {cat_stack-1.6.2 → cat_stack-1.6.4}/src/catstack/text_functions_ensemble.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.4
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -175,7 +175,16 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
175
175
|
|
|
176
176
|
- **Automatic prompt optimization** (`prompt_tune`) — correct a small sample in a browser UI, and the system generates per-category instructions that improve accuracy
|
|
177
177
|
- **Multi-model ensemble** with consensus voting and agreement scores
|
|
178
|
-
- **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
|
|
178
|
+
- **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI.
|
|
179
|
+
*Caveat for Google (Gemini):* as of 2026-06, Google's batch
|
|
180
|
+
scheduler routinely leaves small jobs (under a few dozen rows) in
|
|
181
|
+
`BATCH_STATE_PENDING` for 30+ minutes — sometimes hours — before
|
|
182
|
+
it starts processing. Google's published SLA is up to 24h. If your
|
|
183
|
+
job is small and you want results back quickly, use `batch_mode=False`
|
|
184
|
+
for Gemini; reserve `batch_mode=True` for large jobs where the
|
|
185
|
+
50% cost discount matters more than wall-clock latency. Other
|
|
186
|
+
providers' batch APIs (OpenAI, Anthropic, xAI) typically complete
|
|
187
|
+
small jobs in 1-3 minutes
|
|
179
188
|
- **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
|
|
180
189
|
- **Text, image, and PDF** input auto-detection (PDF inputs are
|
|
181
190
|
validated against the `%PDF-` magic-byte header before reaching
|
|
@@ -139,7 +139,16 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
139
139
|
|
|
140
140
|
- **Automatic prompt optimization** (`prompt_tune`) — correct a small sample in a browser UI, and the system generates per-category instructions that improve accuracy
|
|
141
141
|
- **Multi-model ensemble** with consensus voting and agreement scores
|
|
142
|
-
- **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
|
|
142
|
+
- **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI.
|
|
143
|
+
*Caveat for Google (Gemini):* as of 2026-06, Google's batch
|
|
144
|
+
scheduler routinely leaves small jobs (under a few dozen rows) in
|
|
145
|
+
`BATCH_STATE_PENDING` for 30+ minutes — sometimes hours — before
|
|
146
|
+
it starts processing. Google's published SLA is up to 24h. If your
|
|
147
|
+
job is small and you want results back quickly, use `batch_mode=False`
|
|
148
|
+
for Gemini; reserve `batch_mode=True` for large jobs where the
|
|
149
|
+
50% cost discount matters more than wall-clock latency. Other
|
|
150
|
+
providers' batch APIs (OpenAI, Anthropic, xAI) typically complete
|
|
151
|
+
small jobs in 1-3 minutes
|
|
143
152
|
- **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
|
|
144
153
|
- **Text, image, and PDF** input auto-detection (PDF inputs are
|
|
145
154
|
validated against the `%PDF-` magic-byte header before reaching
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "1.6.
|
|
4
|
+
__version__ = "1.6.4"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -213,12 +213,17 @@ def _build_jsonl_line(provider: str, custom_id: str, payload: dict, model: str)
|
|
|
213
213
|
"body": payload,
|
|
214
214
|
}
|
|
215
215
|
elif provider == "xai":
|
|
216
|
-
# xAI
|
|
216
|
+
# xAI batch API uses a tagged-union envelope: each request element
|
|
217
|
+
# has `batch_request_id` + `batch_request` (an object with one key
|
|
218
|
+
# naming the endpoint variant: `chat_get_completion`, `responses`,
|
|
219
|
+
# `image_generation`, etc.). For chat classification the variant is
|
|
220
|
+
# `chat_get_completion` and the payload inside it is the standard
|
|
221
|
+
# chat-completion body (model + messages + …).
|
|
217
222
|
return {
|
|
218
|
-
"
|
|
219
|
-
"
|
|
220
|
-
|
|
221
|
-
|
|
223
|
+
"batch_request_id": custom_id,
|
|
224
|
+
"batch_request": {
|
|
225
|
+
"chat_get_completion": payload,
|
|
226
|
+
},
|
|
222
227
|
}
|
|
223
228
|
raise ValueError(f"Unsupported batch provider: {provider}")
|
|
224
229
|
|
|
@@ -369,16 +374,25 @@ def _create_batch_job(
|
|
|
369
374
|
return resp.json()["id"]
|
|
370
375
|
|
|
371
376
|
elif provider == "xai":
|
|
372
|
-
# Step 1: Create empty batch
|
|
377
|
+
# Step 1: Create empty batch. xAI requires a `name` field on create;
|
|
378
|
+
# the older `completion_window` field was removed. Response key is
|
|
379
|
+
# `batch_id`, not `id`.
|
|
380
|
+
import time as _time
|
|
373
381
|
url = BATCH_ENDPOINTS["xai"]["create"]
|
|
374
|
-
body = {"
|
|
382
|
+
body = {"name": f"catstack-{_time.strftime('%Y%m%d-%H%M%S')}"}
|
|
375
383
|
resp = requests.post(url, headers=headers, json=body, timeout=60)
|
|
376
384
|
resp.raise_for_status()
|
|
377
|
-
job_id = resp.json()["
|
|
385
|
+
job_id = resp.json()["batch_id"]
|
|
378
386
|
|
|
379
|
-
# Step 2: Add all requests to the batch
|
|
387
|
+
# Step 2: Add all requests to the batch. xAI wraps the list under a
|
|
388
|
+
# `batch_requests` key; each element is the tagged-union envelope
|
|
389
|
+
# built in `_build_jsonl_line`.
|
|
380
390
|
add_url = BATCH_ENDPOINTS["xai"]["add"].format(job_id=job_id)
|
|
381
|
-
add_resp = requests.post(
|
|
391
|
+
add_resp = requests.post(
|
|
392
|
+
add_url, headers=headers,
|
|
393
|
+
json={"batch_requests": requests_list},
|
|
394
|
+
timeout=120,
|
|
395
|
+
)
|
|
382
396
|
add_resp.raise_for_status()
|
|
383
397
|
return job_id
|
|
384
398
|
|
|
@@ -479,11 +493,30 @@ def _poll_batch_job(
|
|
|
479
493
|
f"total={status_data.get('total_requests', '?')}"
|
|
480
494
|
)
|
|
481
495
|
elif provider == "xai":
|
|
482
|
-
state
|
|
483
|
-
|
|
496
|
+
# xAI returns a `state` *object* with num_* counters, not a
|
|
497
|
+
# top-level state string. Synthesize a state string compatible
|
|
498
|
+
# with the existing terminal/success-set logic:
|
|
499
|
+
# num_pending > 0 → "running"
|
|
500
|
+
# num_pending == 0, all errored/cancelled, no success → "failed"/"cancelled"
|
|
501
|
+
# num_pending == 0, at least one success → "completed"
|
|
502
|
+
state_obj = status_data.get("state", {})
|
|
503
|
+
num_pending = state_obj.get("num_pending", 1)
|
|
504
|
+
num_success = state_obj.get("num_success", 0)
|
|
505
|
+
num_error = state_obj.get("num_error", 0)
|
|
506
|
+
num_cancelled = state_obj.get("num_cancelled", 0)
|
|
507
|
+
if num_pending > 0:
|
|
508
|
+
state = "running"
|
|
509
|
+
elif num_success > 0:
|
|
510
|
+
state = "completed"
|
|
511
|
+
elif num_cancelled > 0 and num_error == 0:
|
|
512
|
+
state = "cancelled"
|
|
513
|
+
elif num_error > 0:
|
|
514
|
+
state = "failed"
|
|
515
|
+
else:
|
|
516
|
+
state = "completed" # all zeros — empty batch
|
|
484
517
|
progress_str = (
|
|
485
|
-
f"completed={
|
|
486
|
-
f"
|
|
518
|
+
f"completed={num_success} failed={num_error} "
|
|
519
|
+
f"pending={num_pending} cancelled={num_cancelled}"
|
|
487
520
|
)
|
|
488
521
|
else:
|
|
489
522
|
state = ""
|
|
@@ -590,12 +623,26 @@ def _download_batch_results(
|
|
|
590
623
|
return resp.text
|
|
591
624
|
|
|
592
625
|
elif provider == "xai":
|
|
626
|
+
# xAI's results endpoint returns paginated JSON ({results: [...],
|
|
627
|
+
# pagination_token: <str or null>}) rather than streaming JSONL.
|
|
628
|
+
# Walk all pages, concatenate the result objects, then re-serialize
|
|
629
|
+
# as JSONL so the existing line-by-line parser in
|
|
630
|
+
# `_parse_batch_results` can consume them unchanged.
|
|
593
631
|
url = BATCH_ENDPOINTS["xai"]["results"].format(job_id=job_id)
|
|
594
632
|
headers_dl = dict(headers)
|
|
595
633
|
headers_dl.pop("Content-Type", None)
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
634
|
+
all_results = []
|
|
635
|
+
pagination_token = None
|
|
636
|
+
while True:
|
|
637
|
+
params = {"pagination_token": pagination_token} if pagination_token else None
|
|
638
|
+
resp = requests.get(url, headers=headers_dl, params=params, timeout=120)
|
|
639
|
+
resp.raise_for_status()
|
|
640
|
+
data = resp.json()
|
|
641
|
+
all_results.extend(data.get("results", []) or [])
|
|
642
|
+
pagination_token = data.get("pagination_token")
|
|
643
|
+
if not pagination_token:
|
|
644
|
+
break
|
|
645
|
+
return "\n".join(json.dumps(r) for r in all_results)
|
|
599
646
|
|
|
600
647
|
raise ValueError(f"Unsupported batch provider: {provider}")
|
|
601
648
|
|
|
@@ -689,9 +736,16 @@ def _parse_batch_results(
|
|
|
689
736
|
raw_text = client._parse_response(response_body)
|
|
690
737
|
|
|
691
738
|
elif provider == "xai":
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
739
|
+
# xAI result envelope:
|
|
740
|
+
# { batch_request_id, batch_result: { response: { chat_get_completion: {…} } } }
|
|
741
|
+
# `chat_get_completion` is the OpenAI-style chat-completion body
|
|
742
|
+
# that client._parse_response() already handles. Failure case has
|
|
743
|
+
# `error_message` at the top level.
|
|
744
|
+
custom_id = data.get("batch_request_id")
|
|
745
|
+
error_val = data.get("error_message")
|
|
746
|
+
batch_result = data.get("batch_result", {}) or {}
|
|
747
|
+
response_obj = batch_result.get("response", {}) or {}
|
|
748
|
+
response_body = response_obj.get("chat_get_completion")
|
|
695
749
|
if error_val or response_body is None:
|
|
696
750
|
error_msg = str(error_val) if error_val else "No response body"
|
|
697
751
|
idx = custom_id_map.get(custom_id)
|
|
@@ -855,7 +855,9 @@ class UnifiedLLMClient:
|
|
|
855
855
|
wait_time = _backoff_with_jitter(initial_delay, attempt, multiplier=5.0)
|
|
856
856
|
elapsed = time.monotonic() - start
|
|
857
857
|
if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
|
|
858
|
-
|
|
858
|
+
# Name the throttling provider/model so multi-model
|
|
859
|
+
# ensemble runs can attribute the slowdown.
|
|
860
|
+
print(f"[{self.provider}/{self.model}] Rate limited. Waiting {wait_time:.1f}s...")
|
|
859
861
|
time.sleep(wait_time)
|
|
860
862
|
continue
|
|
861
863
|
else:
|
|
@@ -893,7 +895,9 @@ class UnifiedLLMClient:
|
|
|
893
895
|
wait_time = _backoff_with_jitter(initial_delay, attempt)
|
|
894
896
|
elapsed = time.monotonic() - start
|
|
895
897
|
if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
|
|
896
|
-
|
|
898
|
+
# Name the failing provider/model — same rationale as
|
|
899
|
+
# the 429 handler above.
|
|
900
|
+
print(f"[{self.provider}/{self.model}] Server error {response.status_code}. Retrying in {wait_time:.1f}s...")
|
|
897
901
|
time.sleep(wait_time)
|
|
898
902
|
continue
|
|
899
903
|
else:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|