cat-stack 1.6.4__tar.gz → 1.6.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-1.6.4 → cat_stack-1.6.6}/PKG-INFO +10 -1
- {cat_stack-1.6.4 → cat_stack-1.6.6}/README.md +9 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/__about__.py +1 -1
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_providers.py +159 -7
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/text_functions_ensemble.py +3 -3
- {cat_stack-1.6.4 → cat_stack-1.6.6}/.gitignore +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/LICENSE +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/pyproject.toml +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/cat_stack/__init__.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/__init__.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_batch.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_category_analysis.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_chunked.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_embeddings.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_formatter.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_pilot_test.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_prompts.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_review_ui.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_tiebreaker.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_utils.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_web_fetch.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/_wrapper_helpers.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/CoVe.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/__init__.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/image_CoVe.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/image_stepback.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/pdf_CoVe.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/pdf_stepback.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/stepback.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/calls/top_n.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/classify.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/explore.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/extract.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/image_functions.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/images/circle.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/images/cube.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/images/diamond.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/images/overlapping_pentagons.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/images/rectangles.png +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/model_reference_list.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/pdf_functions.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/prompt_tune.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/summarize.py +0 -0
- {cat_stack-1.6.4 → cat_stack-1.6.6}/src/catstack/text_functions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.6
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -193,6 +193,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
193
193
|
as `success`)
|
|
194
194
|
- **Embedding similarity** tiebreaker for ensemble consensus ties
|
|
195
195
|
- **Pilot test** — validate classifications on a small sample before committing to the full run
|
|
196
|
+
- **Provider-conditional HTTP timeouts** — cloud providers use a tight
|
|
197
|
+
120 s per-request timeout (catches genuine hangs without waiting too
|
|
198
|
+
long on transient API blips), and the Ollama provider uses a wider
|
|
199
|
+
600 s per-request / 1200 s cumulative budget (accommodates the long
|
|
200
|
+
per-row tails that emerge when running 14B+ models on memory-
|
|
201
|
+
constrained hardware like 16 GB Macs). Power users can override per
|
|
202
|
+
client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
|
|
203
|
+
max_total_wait=1800)`, or set a process-wide override with
|
|
204
|
+
`catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
|
|
196
205
|
|
|
197
206
|
## Future work / contributions welcome
|
|
198
207
|
|
|
@@ -157,6 +157,15 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
|
|
|
157
157
|
as `success`)
|
|
158
158
|
- **Embedding similarity** tiebreaker for ensemble consensus ties
|
|
159
159
|
- **Pilot test** — validate classifications on a small sample before committing to the full run
|
|
160
|
+
- **Provider-conditional HTTP timeouts** — cloud providers use a tight
|
|
161
|
+
120 s per-request timeout (catches genuine hangs without waiting too
|
|
162
|
+
long on transient API blips), and the Ollama provider uses a wider
|
|
163
|
+
600 s per-request / 1200 s cumulative budget (accommodates the long
|
|
164
|
+
per-row tails that emerge when running 14B+ models on memory-
|
|
165
|
+
constrained hardware like 16 GB Macs). Power users can override per
|
|
166
|
+
client: `UnifiedLLMClient(provider, key, model, request_timeout=900,
|
|
167
|
+
max_total_wait=1800)`, or set a process-wide override with
|
|
168
|
+
`catstack._providers.set_session_timeouts(request_timeout=..., max_total_wait=...)`
|
|
160
169
|
|
|
161
170
|
## Future work / contributions welcome
|
|
162
171
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "1.6.
|
|
4
|
+
__version__ = "1.6.6"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -19,6 +19,52 @@ import requests
|
|
|
19
19
|
# short enough that batch ensembles don't stall for half an hour."
|
|
20
20
|
_MAX_TOTAL_WAIT_SECONDS = 300.0
|
|
21
21
|
|
|
22
|
+
# Per-HTTP-request timeout, in seconds. For cloud providers (OpenAI,
|
|
23
|
+
# Anthropic, Google, …) inference is usually 1-10 seconds, so 120 s is
|
|
24
|
+
# a generous ceiling that catches genuine hangs.
|
|
25
|
+
#
|
|
26
|
+
# Local Ollama is a different regime: on memory-constrained hardware
|
|
27
|
+
# (e.g., 16 GB M1 Pro running a 14 B-class model), individual rows can
|
|
28
|
+
# take 2-4+ minutes under thermal/memory pressure. cat-stack 1.6.4
|
|
29
|
+
# logged frequent spurious "Request timeout" failures in those
|
|
30
|
+
# conditions even when Ollama was about to produce valid output.
|
|
31
|
+
# `_OLLAMA_REQUEST_TIMEOUT` and `_OLLAMA_MAX_TOTAL_WAIT_SECONDS` give
|
|
32
|
+
# the Ollama path a much longer window. Surfaced during the small-tier
|
|
33
|
+
# paper run, 2026-06-04.
|
|
34
|
+
_REQUEST_TIMEOUT = 120.0 # cloud providers
|
|
35
|
+
_OLLAMA_REQUEST_TIMEOUT = 600.0 # local Ollama — 5x cloud, accommodates slow-row tails
|
|
36
|
+
_OLLAMA_MAX_TOTAL_WAIT_SECONDS = 1200.0 # 4x cloud, since per-call timeout is also 5x
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Session-level user override. Set non-None at the start of a `classify()`
|
|
40
|
+
# call to override the conditional defaults for ALL UnifiedLLMClient
|
|
41
|
+
# instances constructed during that call without per-site arg threading.
|
|
42
|
+
# Single-process scope; safe under cat-stack's intra-call parallelism
|
|
43
|
+
# (per-call sets/resets bracket all workers).
|
|
44
|
+
_session_request_timeout: float = None
|
|
45
|
+
_session_max_total_wait: float = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def set_session_timeouts(request_timeout: float = None, max_total_wait: float = None):
|
|
49
|
+
"""Set the session-level HTTP-timeout overrides. Pass None to clear."""
|
|
50
|
+
global _session_request_timeout, _session_max_total_wait
|
|
51
|
+
_session_request_timeout = request_timeout
|
|
52
|
+
_session_max_total_wait = max_total_wait
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _request_timeout_for(provider: str) -> float:
|
|
56
|
+
"""Per-request HTTP timeout. Session override wins over provider default."""
|
|
57
|
+
if _session_request_timeout is not None:
|
|
58
|
+
return _session_request_timeout
|
|
59
|
+
return _OLLAMA_REQUEST_TIMEOUT if provider == "ollama" else _REQUEST_TIMEOUT
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _max_total_wait_for(provider: str) -> float:
|
|
63
|
+
"""Per-call cumulative-wait cap. Session override wins."""
|
|
64
|
+
if _session_max_total_wait is not None:
|
|
65
|
+
return _session_max_total_wait
|
|
66
|
+
return _OLLAMA_MAX_TOTAL_WAIT_SECONDS if provider == "ollama" else _MAX_TOTAL_WAIT_SECONDS
|
|
67
|
+
|
|
22
68
|
|
|
23
69
|
# ---------------------------------------------------------------------------
|
|
24
70
|
# OpenAI reasoning_effort: per-model-family off-equivalent value.
|
|
@@ -79,6 +125,64 @@ _HF_NEEDS_ENABLE_THINKING_OFF = (
|
|
|
79
125
|
def _hf_model_needs_enable_thinking_off(model: str) -> bool:
|
|
80
126
|
return any(model.startswith(p) for p in _HF_NEEDS_ENABLE_THINKING_OFF)
|
|
81
127
|
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Ollama reasoning control: per-model-family parameter format for the
|
|
131
|
+
# top-level `think` field on chat / generate requests.
|
|
132
|
+
#
|
|
133
|
+
# Ollama standardized on a single API field name (`think`) but the value
|
|
134
|
+
# type differs per model family — gpt-oss takes an enum, most others take
|
|
135
|
+
# a boolean. See https://docs.ollama.com/capabilities/thinking.
|
|
136
|
+
#
|
|
137
|
+
# Coverage philosophy: list every Ollama reasoning model family we know of
|
|
138
|
+
# AND that uses the `think` field. Reasoning models that gate via other
|
|
139
|
+
# mechanisms (system prompts, chat-template flags) are explicitly noted in
|
|
140
|
+
# the "NOT in registry" comment below and handled elsewhere — adding them
|
|
141
|
+
# here would silently inject a no-op `think` field, which Ollama may
|
|
142
|
+
# accept but won't honor, leading to surprising behavior.
|
|
143
|
+
#
|
|
144
|
+
# Entries are checked longest-prefix-first by `_ollama_think_value()`, so
|
|
145
|
+
# put more-specific prefixes earlier when adding (e.g. `qwen3-coder` before
|
|
146
|
+
# `qwen3` if they differ).
|
|
147
|
+
#
|
|
148
|
+
# Registry tuple: (model prefix, value-format, low_value, high_value)
|
|
149
|
+
#
|
|
150
|
+
# Models in registry — `think` field works:
|
|
151
|
+
# gpt-oss — enum: "low" / "medium" / "high" (cannot fully disable)
|
|
152
|
+
# qwen3 / qwen3.* — bool: True / False (covers -thinking variants too)
|
|
153
|
+
# qwq — bool: True / False (Qwen QwQ — preceded Qwen3)
|
|
154
|
+
# deepseek-r1 — bool: True / False (covers -distill variants)
|
|
155
|
+
#
|
|
156
|
+
# Models NOT in registry — different mechanism, do NOT add here:
|
|
157
|
+
# magistral — controlled via system prompt (Mistral Magistral)
|
|
158
|
+
# exaone-deep — uses Modelfile-baked reasoning, no API toggle exposed
|
|
159
|
+
# marco-o1 — uses chat-template wrappers, not `think` field
|
|
160
|
+
#
|
|
161
|
+
# Models with NO reasoning (so `think` should not appear at all):
|
|
162
|
+
# gemma2/3, llama3.x/4.x, mistral, mistral-nemo, qwen2.5 (non-QwQ),
|
|
163
|
+
# phi3/4, granite, olmo, codestral, …
|
|
164
|
+
# These are NOT added; the registry's None-return for unmatched prefixes
|
|
165
|
+
# correctly omits the `think` field for them.
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
_OLLAMA_REASONING_MODELS = (
|
|
168
|
+
("gpt-oss", "enum", "low", "high"),
|
|
169
|
+
("qwen3", "bool", False, True), # covers qwen3.*, qwen3-*, -thinking-* variants
|
|
170
|
+
("qwq", "bool", False, True),
|
|
171
|
+
("deepseek-r1", "bool", False, True), # covers -distill-qwen, -distill-llama, etc.
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _ollama_think_value(model: str, thinking_budget):
|
|
176
|
+
"""Map cat-stack's thinking_budget to the right Ollama `think` value for
|
|
177
|
+
this model family. Returns None if the model isn't in the
|
|
178
|
+
reasoning-capable registry (no `think` field should be set)."""
|
|
179
|
+
if thinking_budget is None:
|
|
180
|
+
return None
|
|
181
|
+
for prefix, fmt, low_val, high_val in _OLLAMA_REASONING_MODELS:
|
|
182
|
+
if model.startswith(prefix):
|
|
183
|
+
return low_val if thinking_budget == 0 else high_val
|
|
184
|
+
return None
|
|
185
|
+
|
|
82
186
|
__all__ = [
|
|
83
187
|
# Main client
|
|
84
188
|
"UnifiedLLMClient",
|
|
@@ -274,10 +378,27 @@ PROVIDER_CONFIG = {
|
|
|
274
378
|
class UnifiedLLMClient:
|
|
275
379
|
"""A unified client for calling various LLM providers via HTTP."""
|
|
276
380
|
|
|
277
|
-
def __init__(self, provider: str, api_key: str, model: str
|
|
381
|
+
def __init__(self, provider: str, api_key: str, model: str,
|
|
382
|
+
request_timeout: float = None,
|
|
383
|
+
max_total_wait: float = None):
|
|
384
|
+
"""
|
|
385
|
+
Args:
|
|
386
|
+
request_timeout (float | None): Override the per-HTTP-request
|
|
387
|
+
timeout (seconds). When None, uses the provider-conditional
|
|
388
|
+
default: 120 s for cloud providers, 600 s for Ollama.
|
|
389
|
+
Pass an explicit float to override per call site.
|
|
390
|
+
max_total_wait (float | None): Override the per-call cumulative
|
|
391
|
+
retry budget (seconds). When None, uses provider-conditional
|
|
392
|
+
default: 300 s for cloud, 1200 s for Ollama.
|
|
393
|
+
"""
|
|
278
394
|
self.provider = _normalize_provider(provider)
|
|
279
395
|
self.api_key = api_key
|
|
280
396
|
self.model = model
|
|
397
|
+
# User-level overrides for HTTP timeouts. None means "use the
|
|
398
|
+
# provider-conditional default" (see _request_timeout_for /
|
|
399
|
+
# _max_total_wait_for at module level).
|
|
400
|
+
self._request_timeout_override = request_timeout
|
|
401
|
+
self._max_total_wait_override = max_total_wait
|
|
281
402
|
|
|
282
403
|
# Lazy HuggingFace router fallback — start with None and only
|
|
283
404
|
# populate when we either (a) have an explicit router suffix, or
|
|
@@ -394,6 +515,12 @@ class UnifiedLLMClient:
|
|
|
394
515
|
elif self.provider in ("huggingface", "huggingface-together"):
|
|
395
516
|
# HuggingFace needs thinking_budget to disable thinking on models that reason by default
|
|
396
517
|
return self._build_openai_payload(messages, json_schema, creativity, force_json, thinking_budget)
|
|
518
|
+
elif self.provider == "ollama":
|
|
519
|
+
# Ollama threads thinking_budget to its top-level `think` field for
|
|
520
|
+
# reasoning-capable models (gpt-oss accepts low/medium/high; others
|
|
521
|
+
# accept booleans). Without this, gpt-oss family models emit long
|
|
522
|
+
# <think> blocks by default that bloat per-row generation 3-5x.
|
|
523
|
+
return self._build_openai_payload(messages, json_schema, creativity, force_json, thinking_budget)
|
|
397
524
|
else:
|
|
398
525
|
# Other OpenAI-compatible providers (xai, mistral, etc.)
|
|
399
526
|
return self._build_openai_payload(messages, json_schema, creativity, force_json)
|
|
@@ -469,6 +596,19 @@ class UnifiedLLMClient:
|
|
|
469
596
|
elif creativity is not None:
|
|
470
597
|
payload["temperature"] = creativity
|
|
471
598
|
|
|
599
|
+
# Ollama: per-model-family reasoning control via the top-level
|
|
600
|
+
# `think` field. gpt-oss expects an enum ("low"/"medium"/"high");
|
|
601
|
+
# qwen3/deepseek-r1 expect a boolean. Models not in the
|
|
602
|
+
# `_OLLAMA_REASONING_MODELS` registry don't support reasoning and
|
|
603
|
+
# get no `think` field (would be a no-op at best, validator-
|
|
604
|
+
# confusing at worst). Without this, Ollama-served gpt-oss
|
|
605
|
+
# produces long `<think>` blocks by default that bloat per-row
|
|
606
|
+
# generation 3-5x.
|
|
607
|
+
if self.provider == "ollama":
|
|
608
|
+
think_value = _ollama_think_value(self.model, thinking_budget)
|
|
609
|
+
if think_value is not None:
|
|
610
|
+
payload["think"] = think_value
|
|
611
|
+
|
|
472
612
|
# HuggingFace: disable thinking on model families whose chat
|
|
473
613
|
# template honors `enable_thinking` (Qwen3-family). Other HF-routed
|
|
474
614
|
# models don't need the kwarg, and strict-validator backends
|
|
@@ -755,8 +895,20 @@ class UnifiedLLMClient:
|
|
|
755
895
|
payload.pop("response_format")
|
|
756
896
|
|
|
757
897
|
# Track cumulative wait so a long string of transient errors can't
|
|
758
|
-
# block the call indefinitely.
|
|
898
|
+
# block the call indefinitely. Timeouts are provider-conditional by
|
|
899
|
+
# default; user overrides on the client instance (set at __init__)
|
|
900
|
+
# take precedence.
|
|
759
901
|
start = time.monotonic()
|
|
902
|
+
request_timeout = (
|
|
903
|
+
self._request_timeout_override
|
|
904
|
+
if self._request_timeout_override is not None
|
|
905
|
+
else _request_timeout_for(self.provider)
|
|
906
|
+
)
|
|
907
|
+
max_total_wait = (
|
|
908
|
+
self._max_total_wait_override
|
|
909
|
+
if self._max_total_wait_override is not None
|
|
910
|
+
else _max_total_wait_for(self.provider)
|
|
911
|
+
)
|
|
760
912
|
# Per-call flag: have we already tried stripping response_format on a
|
|
761
913
|
# transient error this call? Only strip once per call so we don't
|
|
762
914
|
# mutate payload on every retry tick.
|
|
@@ -769,7 +921,7 @@ class UnifiedLLMClient:
|
|
|
769
921
|
endpoint,
|
|
770
922
|
headers=headers,
|
|
771
923
|
json=payload,
|
|
772
|
-
timeout=
|
|
924
|
+
timeout=request_timeout,
|
|
773
925
|
)
|
|
774
926
|
|
|
775
927
|
# Check for HTTP errors
|
|
@@ -854,7 +1006,7 @@ class UnifiedLLMClient:
|
|
|
854
1006
|
if wait_time is None:
|
|
855
1007
|
wait_time = _backoff_with_jitter(initial_delay, attempt, multiplier=5.0)
|
|
856
1008
|
elapsed = time.monotonic() - start
|
|
857
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
1009
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
858
1010
|
# Name the throttling provider/model so multi-model
|
|
859
1011
|
# ensemble runs can attribute the slowdown.
|
|
860
1012
|
print(f"[{self.provider}/{self.model}] Rate limited. Waiting {wait_time:.1f}s...")
|
|
@@ -894,7 +1046,7 @@ class UnifiedLLMClient:
|
|
|
894
1046
|
if wait_time is None:
|
|
895
1047
|
wait_time = _backoff_with_jitter(initial_delay, attempt)
|
|
896
1048
|
elapsed = time.monotonic() - start
|
|
897
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
1049
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
898
1050
|
# Name the failing provider/model — same rationale as
|
|
899
1051
|
# the 429 handler above.
|
|
900
1052
|
print(f"[{self.provider}/{self.model}] Server error {response.status_code}. Retrying in {wait_time:.1f}s...")
|
|
@@ -911,7 +1063,7 @@ class UnifiedLLMClient:
|
|
|
911
1063
|
except requests.exceptions.Timeout:
|
|
912
1064
|
wait_time = _backoff_with_jitter(initial_delay, attempt)
|
|
913
1065
|
elapsed = time.monotonic() - start
|
|
914
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
1066
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
915
1067
|
print(f"Request timeout. Retrying in {wait_time:.1f}s...")
|
|
916
1068
|
time.sleep(wait_time)
|
|
917
1069
|
else:
|
|
@@ -920,7 +1072,7 @@ class UnifiedLLMClient:
|
|
|
920
1072
|
except requests.exceptions.RequestException as e:
|
|
921
1073
|
wait_time = _backoff_with_jitter(initial_delay, attempt)
|
|
922
1074
|
elapsed = time.monotonic() - start
|
|
923
|
-
if attempt < max_retries - 1 and elapsed + wait_time <=
|
|
1075
|
+
if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
|
|
924
1076
|
print(f"Request error: {e}. Retrying in {wait_time:.1f}s...")
|
|
925
1077
|
time.sleep(wait_time)
|
|
926
1078
|
else:
|
|
@@ -3043,7 +3043,7 @@ Categorize text responses {cove_categorize}:
|
|
|
3043
3043
|
messages=messages,
|
|
3044
3044
|
json_schema=json_schemas[cfg["model"]],
|
|
3045
3045
|
creativity=effective_creativity,
|
|
3046
|
-
thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
|
|
3046
|
+
thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together", "ollama") else None,
|
|
3047
3047
|
max_retries=max_retries,
|
|
3048
3048
|
)
|
|
3049
3049
|
|
|
@@ -3100,7 +3100,7 @@ Categorize text responses {cove_categorize}:
|
|
|
3100
3100
|
messages=messages,
|
|
3101
3101
|
json_schema=json_schemas[cfg["model"]],
|
|
3102
3102
|
creativity=effective_creativity,
|
|
3103
|
-
thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
|
|
3103
|
+
thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together", "ollama") else None,
|
|
3104
3104
|
max_retries=max_retries,
|
|
3105
3105
|
)
|
|
3106
3106
|
|
|
@@ -3184,7 +3184,7 @@ Categorize text responses {cove_categorize}:
|
|
|
3184
3184
|
messages=_retry_messages,
|
|
3185
3185
|
json_schema=json_schemas[cfg["model"]],
|
|
3186
3186
|
creativity=effective_creativity,
|
|
3187
|
-
thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
|
|
3187
|
+
thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together", "ollama") else None,
|
|
3188
3188
|
max_retries=max_retries,
|
|
3189
3189
|
)
|
|
3190
3190
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|