cat-stack 1.6.4__tar.gz → 1.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {cat_stack-1.6.4 → cat_stack-1.6.5}/PKG-INFO +10 -1
  2. {cat_stack-1.6.4 → cat_stack-1.6.5}/README.md +9 -0
  3. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/__about__.py +1 -1
  4. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_providers.py +82 -7
  5. {cat_stack-1.6.4 → cat_stack-1.6.5}/.gitignore +0 -0
  6. {cat_stack-1.6.4 → cat_stack-1.6.5}/LICENSE +0 -0
  7. {cat_stack-1.6.4 → cat_stack-1.6.5}/pyproject.toml +0 -0
  8. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/cat_stack/__init__.py +0 -0
  9. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/__init__.py +0 -0
  10. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_batch.py +0 -0
  11. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_category_analysis.py +0 -0
  12. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_chunked.py +0 -0
  13. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_embeddings.py +0 -0
  14. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_formatter.py +0 -0
  15. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_pilot_test.py +0 -0
  16. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_prompts.py +0 -0
  17. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_review_ui.py +0 -0
  18. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_tiebreaker.py +0 -0
  19. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_utils.py +0 -0
  20. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_web_fetch.py +0 -0
  21. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/_wrapper_helpers.py +0 -0
  22. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/CoVe.py +0 -0
  23. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/__init__.py +0 -0
  24. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/image_CoVe.py +0 -0
  25. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/image_stepback.py +0 -0
  26. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/pdf_CoVe.py +0 -0
  27. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/pdf_stepback.py +0 -0
  28. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/stepback.py +0 -0
  29. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/calls/top_n.py +0 -0
  30. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/classify.py +0 -0
  31. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/explore.py +0 -0
  32. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/extract.py +0 -0
  33. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/image_functions.py +0 -0
  34. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/circle.png +0 -0
  35. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/cube.png +0 -0
  36. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/diamond.png +0 -0
  37. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/overlapping_pentagons.png +0 -0
  38. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/images/rectangles.png +0 -0
  39. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/model_reference_list.py +0 -0
  40. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/pdf_functions.py +0 -0
  41. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/prompt_tune.py +0 -0
  42. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/summarize.py +0 -0
  43. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/text_functions.py +0 -0
  44. {cat_stack-1.6.4 → cat_stack-1.6.5}/src/catstack/text_functions_ensemble.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 1.6.4
3
+ Version: 1.6.5
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -193,6 +193,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
193
193
  as `success`)
194
194
  - **Embedding similarity** tiebreaker for ensemble consensus ties
195
195
  - **Pilot test** — validate classifications on a small sample before committing to the full run
196
+ - **Provider-conditional HTTP timeouts** — cloud providers use a tight
197
+ 120 s per-request timeout (catches genuine hangs without waiting too
198
+ long on transient API blips), and the Ollama provider uses a wider
199
+ 600 s per-request / 1200 s cumulative budget (accommodates the long
200
+ per-row tails that emerge when running 14B+ models on memory-
201
+ constrained hardware like 16 GB Macs). Power users can override per
202
+ client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
203
+ max_total_wait=1800)`, or set a process-wide override with
204
+ `catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
196
205
 
197
206
  ## Future work / contributions welcome
198
207
 
@@ -157,6 +157,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
157
157
  as `success`)
158
158
  - **Embedding similarity** tiebreaker for ensemble consensus ties
159
159
  - **Pilot test** — validate classifications on a small sample before committing to the full run
160
+ - **Provider-conditional HTTP timeouts** — cloud providers use a tight
161
+ 120 s per-request timeout (catches genuine hangs without waiting too
162
+ long on transient API blips), and the Ollama provider uses a wider
163
+ 600 s per-request / 1200 s cumulative budget (accommodates the long
164
+ per-row tails that emerge when running 14B+ models on memory-
165
+ constrained hardware like 16 GB Macs). Power users can override per
166
+ client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
167
+ max_total_wait=1800)`, or set a process-wide override with
168
+ `catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
160
169
 
161
170
  ## Future work / contributions welcome
162
171
 
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.6.4"
4
+ __version__ = "1.6.5"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -19,6 +19,52 @@ import requests
19
19
  # short enough that batch ensembles don't stall for half an hour."
20
20
  _MAX_TOTAL_WAIT_SECONDS = 300.0
21
21
 
22
+ # Per-HTTP-request timeout, in seconds. For cloud providers (OpenAI,
23
+ # Anthropic, Google, …) inference is usually 1-10 seconds, so 120 s is
24
+ # a generous ceiling that catches genuine hangs.
25
+ #
26
+ # Local Ollama is a different regime: on memory-constrained hardware
27
+ # (e.g., 16 GB M1 Pro running a 14 B-class model), individual rows can
28
+ # take 2-4+ minutes under thermal/memory pressure. cat-stack 1.6.4
29
+ # logged frequent spurious "Request timeout" failures in those
30
+ # conditions even when Ollama was about to produce valid output.
31
+ # `_OLLAMA_REQUEST_TIMEOUT` and `_OLLAMA_MAX_TOTAL_WAIT_SECONDS` give
32
+ # the Ollama path a much longer window. Surfaced during the small-tier
33
+ # paper run, 2026-06-04.
34
+ _REQUEST_TIMEOUT = 120.0 # cloud providers
35
+ _OLLAMA_REQUEST_TIMEOUT = 600.0 # local Ollama — 5x cloud, accommodates slow-row tails
36
+ _OLLAMA_MAX_TOTAL_WAIT_SECONDS = 1200.0 # 4x cloud, since per-call timeout is also 5x
37
+
38
+
39
+ # Session-level user override. Set non-None at the start of a `classify()`
40
+ # call to override the conditional defaults for ALL UnifiedLLMClient
41
+ # instances constructed during that call without per-site arg threading.
42
+ # Single-process scope; safe under cat-stack's intra-call parallelism
43
+ # (per-call sets/resets bracket all workers).
44
+ _session_request_timeout: float = None
45
+ _session_max_total_wait: float = None
46
+
47
+
48
+ def set_session_timeouts(request_timeout: float = None, max_total_wait: float = None):
49
+ """Set the session-level HTTP-timeout overrides. Pass None to clear."""
50
+ global _session_request_timeout, _session_max_total_wait
51
+ _session_request_timeout = request_timeout
52
+ _session_max_total_wait = max_total_wait
53
+
54
+
55
+ def _request_timeout_for(provider: str) -> float:
56
+ """Per-request HTTP timeout. Session override wins over provider default."""
57
+ if _session_request_timeout is not None:
58
+ return _session_request_timeout
59
+ return _OLLAMA_REQUEST_TIMEOUT if provider == "ollama" else _REQUEST_TIMEOUT
60
+
61
+
62
+ def _max_total_wait_for(provider: str) -> float:
63
+ """Per-call cumulative-wait cap. Session override wins."""
64
+ if _session_max_total_wait is not None:
65
+ return _session_max_total_wait
66
+ return _OLLAMA_MAX_TOTAL_WAIT_SECONDS if provider == "ollama" else _MAX_TOTAL_WAIT_SECONDS
67
+
22
68
 
23
69
  # ---------------------------------------------------------------------------
24
70
  # OpenAI reasoning_effort: per-model-family off-equivalent value.
@@ -274,10 +320,27 @@ PROVIDER_CONFIG = {
274
320
  class UnifiedLLMClient:
275
321
  """A unified client for calling various LLM providers via HTTP."""
276
322
 
277
- def __init__(self, provider: str, api_key: str, model: str):
323
+ def __init__(self, provider: str, api_key: str, model: str,
324
+ request_timeout: float = None,
325
+ max_total_wait: float = None):
326
+ """
327
+ Args:
328
+ request_timeout (float | None): Override the per-HTTP-request
329
+ timeout (seconds). When None, uses the provider-conditional
330
+ default: 120 s for cloud providers, 600 s for Ollama.
331
+ Pass an explicit float to override per call site.
332
+ max_total_wait (float | None): Override the per-call cumulative
333
+ retry budget (seconds). When None, uses provider-conditional
334
+ default: 300 s for cloud, 1200 s for Ollama.
335
+ """
278
336
  self.provider = _normalize_provider(provider)
279
337
  self.api_key = api_key
280
338
  self.model = model
339
+ # User-level overrides for HTTP timeouts. None means "use the
340
+ # provider-conditional default" (see _request_timeout_for /
341
+ # _max_total_wait_for at module level).
342
+ self._request_timeout_override = request_timeout
343
+ self._max_total_wait_override = max_total_wait
281
344
 
282
345
  # Lazy HuggingFace router fallback — start with None and only
283
346
  # populate when we either (a) have an explicit router suffix, or
@@ -755,8 +818,20 @@ class UnifiedLLMClient:
755
818
  payload.pop("response_format")
756
819
 
757
820
  # Track cumulative wait so a long string of transient errors can't
758
- # block the call indefinitely. See _MAX_TOTAL_WAIT_SECONDS.
821
+ # block the call indefinitely. Timeouts are provider-conditional by
822
+ # default; user overrides on the client instance (set at __init__)
823
+ # take precedence.
759
824
  start = time.monotonic()
825
+ request_timeout = (
826
+ self._request_timeout_override
827
+ if self._request_timeout_override is not None
828
+ else _request_timeout_for(self.provider)
829
+ )
830
+ max_total_wait = (
831
+ self._max_total_wait_override
832
+ if self._max_total_wait_override is not None
833
+ else _max_total_wait_for(self.provider)
834
+ )
760
835
  # Per-call flag: have we already tried stripping response_format on a
761
836
  # transient error this call? Only strip once per call so we don't
762
837
  # mutate payload on every retry tick.
@@ -769,7 +844,7 @@ class UnifiedLLMClient:
769
844
  endpoint,
770
845
  headers=headers,
771
846
  json=payload,
772
- timeout=120,
847
+ timeout=request_timeout,
773
848
  )
774
849
 
775
850
  # Check for HTTP errors
@@ -854,7 +929,7 @@ class UnifiedLLMClient:
854
929
  if wait_time is None:
855
930
  wait_time = _backoff_with_jitter(initial_delay, attempt, multiplier=5.0)
856
931
  elapsed = time.monotonic() - start
857
- if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
932
+ if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
858
933
  # Name the throttling provider/model so multi-model
859
934
  # ensemble runs can attribute the slowdown.
860
935
  print(f"[{self.provider}/{self.model}] Rate limited. Waiting {wait_time:.1f}s...")
@@ -894,7 +969,7 @@ class UnifiedLLMClient:
894
969
  if wait_time is None:
895
970
  wait_time = _backoff_with_jitter(initial_delay, attempt)
896
971
  elapsed = time.monotonic() - start
897
- if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
972
+ if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
898
973
  # Name the failing provider/model — same rationale as
899
974
  # the 429 handler above.
900
975
  print(f"[{self.provider}/{self.model}] Server error {response.status_code}. Retrying in {wait_time:.1f}s...")
@@ -911,7 +986,7 @@ class UnifiedLLMClient:
911
986
  except requests.exceptions.Timeout:
912
987
  wait_time = _backoff_with_jitter(initial_delay, attempt)
913
988
  elapsed = time.monotonic() - start
914
- if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
989
+ if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
915
990
  print(f"Request timeout. Retrying in {wait_time:.1f}s...")
916
991
  time.sleep(wait_time)
917
992
  else:
@@ -920,7 +995,7 @@ class UnifiedLLMClient:
920
995
  except requests.exceptions.RequestException as e:
921
996
  wait_time = _backoff_with_jitter(initial_delay, attempt)
922
997
  elapsed = time.monotonic() - start
923
- if attempt < max_retries - 1 and elapsed + wait_time <= _MAX_TOTAL_WAIT_SECONDS:
998
+ if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
924
999
  print(f"Request error: {e}. Retrying in {wait_time:.1f}s...")
925
1000
  time.sleep(wait_time)
926
1001
  else:
File without changes
File without changes
File without changes