PyPI - cat-stack - Versions diffs - 1.6.4__tar.gz → 1.6.5__tar.gz - Mend

cat-stack 1.6.4tar.gz → 1.6.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{cat_stack-1.6.4 → cat_stack-1.6.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 1.6.4
+Version: 1.6.5
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -193,6 +193,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
   as `success`)
 - **Embedding similarity** tiebreaker for ensemble consensus ties
 - **Pilot test** — validate classifications on a small sample before committing to the full run
+- **Provider-conditional HTTP timeouts** — cloud providers use a tight
+  120 s per-request timeout (catches genuine hangs without waiting too
+  long on transient API blips), and the Ollama provider uses a wider
+  600 s per-request / 1200 s cumulative budget (accommodates the long
+  per-row tails that emerge when running 14B+ models on memory-
+  constrained hardware like 16 GB Macs). Power users can override per
+  client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
+  max_total_wait=1800)`, or set a process-wide override with
+  `catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
 ## Future work / contributions welcome

{cat_stack-1.6.4 → cat_stack-1.6.5}/README.md RENAMED Viewed

@@ -157,6 +157,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
   as `success`)
 - **Embedding similarity** tiebreaker for ensemble consensus ties
 - **Pilot test** — validate classifications on a small sample before committing to the full run
+- **Provider-conditional HTTP timeouts** — cloud providers use a tight
+  120 s per-request timeout (catches genuine hangs without waiting too
+  long on transient API blips), and the Ollama provider uses a wider
+  600 s per-request / 1200 s cumulative budget (accommodates the long
+  per-row tails that emerge when running 14B+ models on memory-
+  constrained hardware like 16 GB Macs). Power users can override per
+  client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
+  max_total_wait=1800)`, or set a process-wide override with
+  `catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
 ## Future work / contributions welcome

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "1.6.4"
+__version__ = "1.6.5"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_providers.py RENAMED Viewed

@@ -19,6 +19,52 @@ import requests
 # short enough that batch ensembles don't stall for half an hour."
 _MAX_TOTAL_WAIT_SECONDS = 300.0
+# Per-HTTP-request timeout, in seconds. For cloud providers (OpenAI,
+# Anthropic, Google, …) inference is usually 1-10 seconds, so 120 s is
+# a generous ceiling that catches genuine hangs.
+#
+# Local Ollama is a different regime: on memory-constrained hardware
+# (e.g., 16 GB M1 Pro running a 14 B-class model), individual rows can
+# take 2-4+ minutes under thermal/memory pressure. cat-stack 1.6.4
+# logged frequent spurious "Request timeout" failures in those
+# conditions even when Ollama was about to produce valid output.
+# `_OLLAMA_REQUEST_TIMEOUT` and `_OLLAMA_MAX_TOTAL_WAIT_SECONDS` give
+# the Ollama path a much longer window. Surfaced during the small-tier
+# paper run, 2026-06-04.
+_REQUEST_TIMEOUT = 120.0          # cloud providers
+_OLLAMA_REQUEST_TIMEOUT = 600.0   # local Ollama — 5x cloud, accommodates slow-row tails
+_OLLAMA_MAX_TOTAL_WAIT_SECONDS = 1200.0  # 4x cloud, since per-call timeout is also 5x
+# Session-level user override. Set non-None at the start of a `classify()`
+# call to override the conditional defaults for ALL UnifiedLLMClient
+# instances constructed during that call without per-site arg threading.
+# Single-process scope; safe under cat-stack's intra-call parallelism
+# (per-call sets/resets bracket all workers).
+_session_request_timeout: float = None
+_session_max_total_wait: float = None
+def set_session_timeouts(request_timeout: float = None, max_total_wait: float = None):
+    """Set the session-level HTTP-timeout overrides. Pass None to clear."""
+    global _session_request_timeout, _session_max_total_wait
+    _session_request_timeout = request_timeout
+    _session_max_total_wait = max_total_wait
+def _request_timeout_for(provider: str) -> float:
+    """Per-request HTTP timeout. Session override wins over provider default."""
+    if _session_request_timeout is not None:
+        return _session_request_timeout
+    return _OLLAMA_REQUEST_TIMEOUT if provider == "ollama" else _REQUEST_TIMEOUT
+def _max_total_wait_for(provider: str) -> float:
+    """Per-call cumulative-wait cap. Session override wins."""
+    if _session_max_total_wait is not None:
+        return _session_max_total_wait
+    return _OLLAMA_MAX_TOTAL_WAIT_SECONDS if provider == "ollama" else _MAX_TOTAL_WAIT_SECONDS
 # ---------------------------------------------------------------------------
 # OpenAI reasoning_effort: per-model-family off-equivalent value.
@@ -274,10 +320,27 @@ PROVIDER_CONFIG = {
 class UnifiedLLMClient:
     """A unified client for calling various LLM providers via HTTP."""
-    def __init__(self, provider: str, api_key: str, model: str):
+    def __init__(self, provider: str, api_key: str, model: str,
+                 request_timeout: float = None,
+                 max_total_wait: float = None):
+        """
+        Args:
+            request_timeout (float | None): Override the per-HTTP-request
+                timeout (seconds). When None, uses the provider-conditional
+                default: 120 s for cloud providers, 600 s for Ollama.
+                Pass an explicit float to override per call site.
+            max_total_wait (float | None): Override the per-call cumulative
+                retry budget (seconds). When None, uses provider-conditional
+                default: 300 s for cloud, 1200 s for Ollama.
+        """
         self.provider = _normalize_provider(provider)
         self.api_key = api_key
         self.model = model
+        # User-level overrides for HTTP timeouts. None means "use the
+        # provider-conditional default" (see _request_timeout_for /
+        # _max_total_wait_for at module level).
+        self._request_timeout_override = request_timeout
+        self._max_total_wait_override = max_total_wait
         # Lazy HuggingFace router fallback — start with None and only
         # populate when we either (a) have an explicit router suffix, or
@@ -755,8 +818,20 @@ class UnifiedLLMClient:
             payload.pop("response_format")
         # Track cumulative wait so a long string of transient errors can't
-        # block the call indefinitely. See _MAX_TOTAL_WAIT_SECONDS.
+        # block the call indefinitely. Timeouts are provider-conditional by
+        # default; user overrides on the client instance (set at __init__)
+        # take precedence.
         start = time.monotonic()
+        request_timeout = (
+            self._request_timeout_override
+            if self._request_timeout_override is not None
+            else _request_timeout_for(self.provider)
+        )
+        max_total_wait = (
+            self._max_total_wait_override
+            if self._max_total_wait_override is not None
+            else _max_total_wait_for(self.provider)
+        )
         # Per-call flag: have we already tried stripping response_format on a
         # transient error this call? Only strip once per call so we don't
         # mutate payload on every retry tick.
@@ -769,7 +844,7 @@ class UnifiedLLMClient:
                     endpoint,
                     headers=headers,
                     json=payload,
-                    timeout=120,
+                    timeout=request_timeout,
                 )
                 # Check for HTTP errors
@@ -854,7 +929,7 @@ class UnifiedLLMClient:
                     if wait_time is None:
                         wait_time = _backoff_with_jitter(initial_delay, attempt, multiplier=5.0)
                     elapsed = time.monotonic() - start
-                    if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
+                    if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
                         # Name the throttling provider/model so multi-model
                         # ensemble runs can attribute the slowdown.
                         print(f"[{self.provider}/{self.model}] Rate limited. Waiting {wait_time:.1f}s...")
@@ -894,7 +969,7 @@ class UnifiedLLMClient:
                     if wait_time is None:
                         wait_time = _backoff_with_jitter(initial_delay, attempt)
                     elapsed = time.monotonic() - start
-                    if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
+                    if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
                         # Name the failing provider/model — same rationale as
                         # the 429 handler above.
                         print(f"[{self.provider}/{self.model}] Server error {response.status_code}. Retrying in {wait_time:.1f}s...")
@@ -911,7 +986,7 @@ class UnifiedLLMClient:
             except requests.exceptions.Timeout:
                 wait_time = _backoff_with_jitter(initial_delay, attempt)
                 elapsed = time.monotonic() - start
-                if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
+                if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
                     print(f"Request timeout. Retrying in {wait_time:.1f}s...")
                     time.sleep(wait_time)
                 else:
@@ -920,7 +995,7 @@ class UnifiedLLMClient:
             except requests.exceptions.RequestException as e:
                 wait_time = _backoff_with_jitter(initial_delay, attempt)
                 elapsed = time.monotonic() - start
-                if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
+                if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
                     print(f"Request error: {e}. Retrying in {wait_time:.1f}s...")
                     time.sleep(wait_time)
                 else:

{cat_stack-1.6.4 → cat_stack-1.6.5}/.gitignore RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/LICENSE RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/pyproject.toml RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_batch.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_prompts.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_utils.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_wrapper_helpers.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/classify.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/explore.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/extract.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/prompt_tune.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/summarize.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/text_functions.py RENAMED Viewed

File without changes

{cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/text_functions_ensemble.py RENAMED Viewed

File without changes

cat-stack 1.6.4__tar.gz → 1.6.5__tar.gz

cat-stack 1.6.4tar.gz → 1.6.5tar.gz