cat-stack 1.6.6__tar.gz → 1.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-1.6.6 → cat_stack-1.6.7}/PKG-INFO +1 -1
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/__about__.py +1 -1
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_providers.py +54 -2
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/text_functions_ensemble.py +11 -4
- {cat_stack-1.6.6 → cat_stack-1.6.7}/.gitignore +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/LICENSE +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/README.md +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/pyproject.toml +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/cat_stack/__init__.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/__init__.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_batch.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_category_analysis.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_chunked.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_embeddings.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_formatter.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_pilot_test.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_prompts.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_review_ui.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_tiebreaker.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_utils.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_web_fetch.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_wrapper_helpers.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/CoVe.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/__init__.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/image_CoVe.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/image_stepback.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/pdf_CoVe.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/pdf_stepback.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/stepback.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/top_n.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/classify.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/explore.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/extract.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/image_functions.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/circle.png +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/cube.png +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/diamond.png +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/overlapping_pentagons.png +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/rectangles.png +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/model_reference_list.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/pdf_functions.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/prompt_tune.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/summarize.py +0 -0
- {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/text_functions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.7
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "1.6.
|
|
4
|
+
__version__ = "1.6.7"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -126,6 +126,30 @@ def _hf_model_needs_enable_thinking_off(model: str) -> bool:
|
|
|
126
126
|
return any(model.startswith(p) for p in _HF_NEEDS_ENABLE_THINKING_OFF)
|
|
127
127
|
|
|
128
128
|
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Anthropic deprecated the `temperature` parameter starting with the Opus 4.7 /
|
|
131
|
+
# 4.8 generation: these models return 400 "`temperature` is deprecated for this
|
|
132
|
+
# model." if it is sent. Older models (opus-4-6, sonnet-4-6, sonnet-4-5, and
|
|
133
|
+
# earlier) still accept it. This mirrors the OpenAI reasoning-model handling
|
|
134
|
+
# above — we skip `temperature` up-front for the known-deprecated prefixes in
|
|
135
|
+
# `_build_anthropic_payload`, and `UnifiedLLMClient.complete()` strips it on a
|
|
136
|
+
# runtime 400 as a safety net for future families not yet in this table.
|
|
137
|
+
#
|
|
138
|
+
# Matched by name prefix; extend the tuple when new temperature-free models
|
|
139
|
+
# ship.
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
_ANTHROPIC_TEMPERATURE_DEPRECATED = (
|
|
142
|
+
"claude-opus-4-7",
|
|
143
|
+
"claude-opus-4-8",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _anthropic_supports_temperature(model: str) -> bool:
|
|
148
|
+
"""False for Anthropic models that reject the `temperature` param."""
|
|
149
|
+
m = (model or "").lower()
|
|
150
|
+
return not any(m.startswith(p) for p in _ANTHROPIC_TEMPERATURE_DEPRECATED)
|
|
151
|
+
|
|
152
|
+
|
|
129
153
|
# ---------------------------------------------------------------------------
|
|
130
154
|
# Ollama reasoning control: per-model-family parameter format for the
|
|
131
155
|
# top-level `think` field on chat / generate requests.
|
|
@@ -660,6 +684,14 @@ class UnifiedLLMClient:
|
|
|
660
684
|
if system_content:
|
|
661
685
|
payload["system"] = system_content
|
|
662
686
|
|
|
687
|
+
# Newer Anthropic models (Opus 4.7+) deprecated `temperature` and 400 if
|
|
688
|
+
# it is sent. Skip it for those known prefixes, and also honor the flag
|
|
689
|
+
# cached by complete()'s runtime 400 fallback for future families.
|
|
690
|
+
_temp_ok = (
|
|
691
|
+
_anthropic_supports_temperature(self.model)
|
|
692
|
+
and not getattr(self, "_anthropic_temperature_unsupported", False)
|
|
693
|
+
)
|
|
694
|
+
|
|
663
695
|
# Extended thinking for Anthropic (minimum 1024 tokens)
|
|
664
696
|
# When thinking is enabled, temperature must be 1 (Anthropic requirement),
|
|
665
697
|
# so we skip setting temperature from creativity in that case
|
|
@@ -669,11 +701,12 @@ class UnifiedLLMClient:
|
|
|
669
701
|
"type": "enabled",
|
|
670
702
|
"budget_tokens": budget,
|
|
671
703
|
}
|
|
672
|
-
|
|
704
|
+
if _temp_ok:
|
|
705
|
+
payload["temperature"] = 1
|
|
673
706
|
# When thinking is enabled, max_tokens must be larger than budget_tokens
|
|
674
707
|
if payload["max_tokens"] <= budget:
|
|
675
708
|
payload["max_tokens"] = budget + 4096
|
|
676
|
-
elif creativity is not None:
|
|
709
|
+
elif creativity is not None and _temp_ok:
|
|
677
710
|
payload["temperature"] = creativity
|
|
678
711
|
|
|
679
712
|
# Use tool calling for structured output (most reliable for Anthropic)
|
|
@@ -989,6 +1022,25 @@ class UnifiedLLMClient:
|
|
|
989
1022
|
payload.pop("reasoning_effort")
|
|
990
1023
|
continue
|
|
991
1024
|
|
|
1025
|
+
# Anthropic deprecated `temperature` for newer models
|
|
1026
|
+
# (Opus 4.7+): they 400 with "`temperature` is deprecated
|
|
1027
|
+
# for this model." Strip it, cache on the client so the
|
|
1028
|
+
# payload builder skips it for subsequent rows on this
|
|
1029
|
+
# client, and retry. Safety net for families not yet in
|
|
1030
|
+
# `_ANTHROPIC_TEMPERATURE_DEPRECATED`.
|
|
1031
|
+
if (
|
|
1032
|
+
"temperature" in error_text
|
|
1033
|
+
and "deprecated" in error_text
|
|
1034
|
+
and "temperature" in payload
|
|
1035
|
+
):
|
|
1036
|
+
if not getattr(self, '_warned_temperature_deprecated', False):
|
|
1037
|
+
print(f"\n[CatLLM] Model '{self.model}' deprecated the temperature parameter.")
|
|
1038
|
+
print(f" Dropping it and caching for subsequent calls on this client.\n")
|
|
1039
|
+
self._warned_temperature_deprecated = True
|
|
1040
|
+
self._anthropic_temperature_unsupported = True
|
|
1041
|
+
payload.pop("temperature")
|
|
1042
|
+
continue
|
|
1043
|
+
|
|
992
1044
|
# HuggingFace: try other routers when the current one
|
|
993
1045
|
# rejects the model with a "wrong router" 400.
|
|
994
1046
|
if self._is_hf_wrong_router_400(response.text):
|
|
@@ -3638,11 +3638,12 @@ def build_output_dataframes(
|
|
|
3638
3638
|
# Populate data
|
|
3639
3639
|
for idx, result in enumerate(all_results):
|
|
3640
3640
|
combined_data["input_index"].append(idx)
|
|
3641
|
-
#
|
|
3641
|
+
# Full input_data (whitespace-collapsed). Truncating here breaks
|
|
3642
|
+
# downstream joins against gold-standard files and silently feeds
|
|
3643
|
+
# truncated text to any pipeline that reuses input_data as input.
|
|
3642
3644
|
raw = result["response"]
|
|
3643
3645
|
clean = " ".join(str(raw).split()) # collapse whitespace/newlines
|
|
3644
|
-
|
|
3645
|
-
combined_data["input_data"].append(preview)
|
|
3646
|
+
combined_data["input_data"].append(clean)
|
|
3646
3647
|
aggregated = result["aggregated"]
|
|
3647
3648
|
|
|
3648
3649
|
# Add PDF metadata if present
|
|
@@ -4464,7 +4465,13 @@ def summarize_ensemble(
|
|
|
4464
4465
|
# synthesis still has *something* to anchor on (prior behavior).
|
|
4465
4466
|
original_text_for_synthesis = entry.get("page_text") or page_label
|
|
4466
4467
|
else:
|
|
4467
|
-
# Truncate input_data for readability; add input_index for joining
|
|
4468
|
+
# Truncate input_data for readability; add input_index for joining.
|
|
4469
|
+
# Truncation is intentional HERE (summarize): inputs can be whole
|
|
4470
|
+
# documents/PDF pages, and full text would bloat the output and the
|
|
4471
|
+
# synthesis context. classify()'s writer (build_output_dataframes)
|
|
4472
|
+
# deliberately does NOT truncate — survey-length inputs there are
|
|
4473
|
+
# reused for downstream joins. See repo TODO for the permanent
|
|
4474
|
+
# per-function input_data design (preview + stable join key).
|
|
4468
4475
|
clean = " ".join(str(item).split()) # collapse whitespace/newlines
|
|
4469
4476
|
preview = clean[:100] + "..." if len(clean) > 100 else clean
|
|
4470
4477
|
row = {
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|