cat-stack 1.6.4__tar.gz → 1.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-1.6.4 → cat_stack-1.6.5}/PKG-INFO +10 -1
- {cat_stack-1.6.4 → cat_stack-1.6.5}/README.md +9 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/__about__.py +1 -1
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_providers.py +82 -7
- {cat_stack-1.6.4 → cat_stack-1.6.5}/.gitignore +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/LICENSE +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/pyproject.toml +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/cat_stack/__init__.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/__init__.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_batch.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_category_analysis.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_chunked.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_embeddings.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_formatter.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_pilot_test.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_prompts.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_review_ui.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_tiebreaker.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_utils.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_web_fetch.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_wrapper_helpers.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/CoVe.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/__init__.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/image_CoVe.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/image_stepback.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/pdf_CoVe.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/pdf_stepback.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/stepback.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/top_n.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/classify.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/explore.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/extract.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/image_functions.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/circle.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/cube.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/diamond.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/overlapping_pentagons.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/rectangles.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/model_reference_list.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/pdf_functions.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/prompt_tune.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/summarize.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/text_functions.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/text_functions_ensemble.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.5
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -193,6 +193,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
193
193
|
as `success`)
|
|
194
194
|
- **Embedding similarity** tiebreaker for ensemble consensus ties
|
|
195
195
|
- **Pilot test** — validate classifications on a small sample before committing to the full run
|
|
196
|
+
- **Provider-conditional HTTP timeouts** — cloud providers use a tight
|
|
197
|
+
120 s per-request timeout (catches genuine hangs without waiting too
|
|
198
|
+
long on transient API blips), and the Ollama provider uses a wider
|
|
199
|
+
600 s per-request / 1200 s cumulative budget (accommodates the long
|
|
200
|
+
per-row tails that emerge when running 14B+ models on memory-
|
|
201
|
+
constrained hardware like 16 GB Macs). Power users can override per
|
|
202
|
+
client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
|
|
203
|
+
max_total_wait=1800)`, or set a process-wide override with
|
|
204
|
+
`catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
|
|
196
205
|
|
|
197
206
|
## Future work / contributions welcome
|
|
198
207
|
|
|
@@ -157,6 +157,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
157
157
|
as `success`)
|
|
158
158
|
- **Embedding similarity** tiebreaker for ensemble consensus ties
|
|
159
159
|
- **Pilot test** — validate classifications on a small sample before committing to the full run
|
|
160
|
+
- **Provider-conditional HTTP timeouts** — cloud providers use a tight
|
|
161
|
+
120 s per-request timeout (catches genuine hangs without waiting too
|
|
162
|
+
long on transient API blips), and the Ollama provider uses a wider
|
|
163
|
+
600 s per-request / 1200 s cumulative budget (accommodates the long
|
|
164
|
+
per-row tails that emerge when running 14B+ models on memory-
|
|
165
|
+
constrained hardware like 16 GB Macs). Power users can override per
|
|
166
|
+
client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
|
|
167
|
+
max_total_wait=1800)`, or set a process-wide override with
|
|
168
|
+
`catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
|
|
160
169
|
|
|
161
170
|
## Future work / contributions welcome
|
|
162
171
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "1.6.
|
|
4
|
+
__version__ = "1.6.5"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -19,6 +19,52 @@ import requests
|
|
|
19
19
|
# short enough that batch ensembles don't stall for half an hour."
|
|
20
20
|
_MAX_TOTAL_WAIT_SECONDS = 300.0
|
|
21
21
|
|
|
22
|
+
# Per-HTTP-request timeout, in seconds. For cloud providers (OpenAI,
|
|
23
|
+
# Anthropic, Google, …) inference is usually 1-10 seconds, so 120 s is
|
|
24
|
+
# a generous ceiling that catches genuine hangs.
|
|
25
|
+
#
|
|
26
|
+
# Local Ollama is a different regime: on memory-constrained hardware
|
|
27
|
+
# (e.g., 16 GB M1 Pro running a 14 B-class model), individual rows can
|
|
28
|
+
# take 2-4+ minutes under thermal/memory pressure. cat-stack 1.6.4
|
|
29
|
+
# logged frequent spurious "Request timeout" failures in those
|
|
30
|
+
# conditions even when Ollama was about to produce valid output.
|
|
31
|
+
# `_OLLAMA_REQUEST_TIMEOUT` and `_OLLAMA_MAX_TOTAL_WAIT_SECONDS` give
|
|
32
|
+
# the Ollama path a much longer window. Surfaced during the small-tier
|
|
33
|
+
# paper run, 2026-06-04.
|
|
34
|
+
_REQUEST_TIMEOUT = 120.0 # cloud providers
|
|
35
|
+
_OLLAMA_REQUEST_TIMEOUT = 600.0 # local Ollama — 5x cloud, accommodates slow-row tails
|
|
36
|
+
_OLLAMA_MAX_TOTAL_WAIT_SECONDS = 1200.0 # 4x cloud, since per-call timeout is also 5x
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Session-level user override. Set non-None at the start of a `classify()`
|
|
40
|
+
# call to override the conditional defaults for ALL UnifiedLLMClient
|
|
41
|
+
# instances constructed during that call without per-site arg threading.
|
|
42
|
+
# Single-process scope; safe under cat-stack's intra-call parallelism
|
|
43
|
+
# (per-call sets/resets bracket all workers).
|
|
44
|
+
_session_request_timeout: float = None
|
|
45
|
+
_session_max_total_wait: float = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def set_session_timeouts(request_timeout: float = None, max_total_wait: float = None):
|
|
49
|
+
"""Set the session-level HTTP-timeout overrides. Pass None to clear."""
|
|
50
|
+
global _session_request_timeout, _session_max_total_wait
|
|
51
|
+
_session_request_timeout = request_timeout
|
|
52
|
+
_session_max_total_wait = max_total_wait
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _request_timeout_for(provider: str) -> float:
|
|
56
|
+
"""Per-request HTTP timeout. Session override wins over provider default."""
|
|
57
|
+
if _session_request_timeout is not None:
|
|
58
|
+
return _session_request_timeout
|
|
59
|
+
return _OLLAMA_REQUEST_TIMEOUT if provider == "ollama" else _REQUEST_TIMEOUT
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _max_total_wait_for(provider: str) -> float:
|
|
63
|
+
"""Per-call cumulative-wait cap. Session override wins."""
|
|
64
|
+
if _session_max_total_wait is not None:
|
|
65
|
+
return _session_max_total_wait
|
|
66
|
+
return _OLLAMA_MAX_TOTAL_WAIT_SECONDS if provider == "ollama" else _MAX_TOTAL_WAIT_SECONDS
|
|
67
|
+
|
|
22
68
|
|
|
23
69
|
# ---------------------------------------------------------------------------
|
|
24
70
|
# OpenAI reasoning_effort: per-model-family off-equivalent value.
|
|
@@ -274,10 +320,27 @@ PROVIDER_CONFIG = {
|
|
|
274
320
|
class UnifiedLLMClient:
|
|
275
321
|
"""A unified client for calling various LLM providers via HTTP."""
|
|
276
322
|
|
|
277
|
-
def __init__(self, provider: str, api_key: str, model: str
|
|
323
|
+
def __init__(self, provider: str, api_key: str, model: str,
|
|
324
|
+
request_timeout: float = None,
|
|
325
|
+
max_total_wait: float = None):
|
|
326
|
+
"""
|
|
327
|
+
Args:
|
|
328
|
+
request_timeout (float | None): Override the per-HTTP-request
|
|
329
|
+
timeout (seconds). When None, uses the provider-conditional
|
|
330
|
+
default: 120 s for cloud providers, 600 s for Ollama.
|
|
331
|
+
Pass an explicit float to override per call site.
|
|
332
|
+
max_total_wait (float | None): Override the per-call cumulative
|
|
333
|
+
retry budget (seconds). When None, uses provider-conditional
|
|
334
|
+
default: 300 s for cloud, 1200 s for Ollama.
|
|
335
|
+
"""
|
|
278
336
|
self.provider = _normalize_provider(provider)
|
|
279
337
|
self.api_key = api_key
|
|
280
338
|
self.model = model
|
|
339
|
+
# User-level overrides for HTTP timeouts. None means "use the
|
|
340
|
+
# provider-conditional default" (see _request_timeout_for /
|
|
341
|
+
# _max_total_wait_for at module level).
|
|
342
|
+
self._request_timeout_override = request_timeout
|
|
343
|
+
self._max_total_wait_override = max_total_wait
|
|
281
344
|
|
|
282
345
|
# Lazy HuggingFace router fallback — start with None and only
|
|
283
346
|
# populate when we either (a) have an explicit router suffix, or
|
|
@@ -755,8 +818,20 @@ class UnifiedLLMClient:
|
|
|
755
818
|
payload.pop("response_format")
|
|
756
819
|
|
|
757
820
|
# Track cumulative wait so a long string of transient errors can't
|
|
758
|
-
# block the call indefinitely.
|
|
821
|
+
# block the call indefinitely. Timeouts are provider-conditional by
|
|
822
|
+
# default; user overrides on the client instance (set at __init__)
|
|
823
|
+
# take precedence.
|
|
759
824
|
start = time.monotonic()
|
|
825
|
+
request_timeout = (
|
|
826
|
+
self._request_timeout_override
|
|
827
|
+
if self._request_timeout_override is not None
|
|
828
|
+
else _request_timeout_for(self.provider)
|
|
829
|
+
)
|
|
830
|
+
max_total_wait = (
|
|
831
|
+
self._max_total_wait_override
|
|
832
|
+
if self._max_total_wait_override is not None
|
|
833
|
+
else _max_total_wait_for(self.provider)
|
|
834
|
+
)
|
|
760
835
|
# Per-call flag: have we already tried stripping response_format on a
|
|
761
836
|
# transient error this call? Only strip once per call so we don't
|
|
762
837
|
# mutate payload on every retry tick.
|
|
@@ -769,7 +844,7 @@ class UnifiedLLMClient:
|
|
|
769
844
|
endpoint,
|
|
770
845
|
headers=headers,
|
|
771
846
|
json=payload,
|
|
772
|
-
timeout=
|
|
847
|
+
timeout=request_timeout,
|
|
773
848
|
)
|
|
774
849
|
|
|
775
850
|
# Check for HTTP errors
|
|
@@ -854,7 +929,7 @@ class UnifiedLLMClient:
|
|
|
854
929
|
if wait_time is None:
|
|
855
930
|
wait_time = _backoff_with_jitter(initial_delay, attempt, multiplier=5.0)
|
|
856
931
|
elapsed = time.monotonic() - start
|
|
857
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
932
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
858
933
|
# Name the throttling provider/model so multi-model
|
|
859
934
|
# ensemble runs can attribute the slowdown.
|
|
860
935
|
print(f"[{self.provider}/{self.model}] Rate limited. Waiting {wait_time:.1f}s...")
|
|
@@ -894,7 +969,7 @@ class UnifiedLLMClient:
|
|
|
894
969
|
if wait_time is None:
|
|
895
970
|
wait_time = _backoff_with_jitter(initial_delay, attempt)
|
|
896
971
|
elapsed = time.monotonic() - start
|
|
897
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
972
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
898
973
|
# Name the failing provider/model — same rationale as
|
|
899
974
|
# the 429 handler above.
|
|
900
975
|
print(f"[{self.provider}/{self.model}] Server error {response.status_code}. Retrying in {wait_time:.1f}s...")
|
|
@@ -911,7 +986,7 @@ class UnifiedLLMClient:
|
|
|
911
986
|
except requests.exceptions.Timeout:
|
|
912
987
|
wait_time = _backoff_with_jitter(initial_delay, attempt)
|
|
913
988
|
elapsed = time.monotonic() - start
|
|
914
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
989
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
915
990
|
print(f"Request timeout. Retrying in {wait_time:.1f}s...")
|
|
916
991
|
time.sleep(wait_time)
|
|
917
992
|
else:
|
|
@@ -920,7 +995,7 @@ class UnifiedLLMClient:
|
|
|
920
995
|
except requests.exceptions.RequestException as e:
|
|
921
996
|
wait_time = _backoff_with_jitter(initial_delay, attempt)
|
|
922
997
|
elapsed = time.monotonic() - start
|
|
923
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
998
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
924
999
|
print(f"Request error: {e}. Retrying in {wait_time:.1f}s...")
|
|
925
1000
|
time.sleep(wait_time)
|
|
926
1001
|
else:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|