cat-stack 1.6.6__tar.gz → 1.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {cat_stack-1.6.6 → cat_stack-1.6.7}/PKG-INFO +1 -1
  2. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/__about__.py +1 -1
  3. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_providers.py +54 -2
  4. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/text_functions_ensemble.py +11 -4
  5. {cat_stack-1.6.6 → cat_stack-1.6.7}/.gitignore +0 -0
  6. {cat_stack-1.6.6 → cat_stack-1.6.7}/LICENSE +0 -0
  7. {cat_stack-1.6.6 → cat_stack-1.6.7}/README.md +0 -0
  8. {cat_stack-1.6.6 → cat_stack-1.6.7}/pyproject.toml +0 -0
  9. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/cat_stack/__init__.py +0 -0
  10. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/__init__.py +0 -0
  11. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_batch.py +0 -0
  12. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_category_analysis.py +0 -0
  13. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_chunked.py +0 -0
  14. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_embeddings.py +0 -0
  15. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_formatter.py +0 -0
  16. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_pilot_test.py +0 -0
  17. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_prompts.py +0 -0
  18. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_review_ui.py +0 -0
  19. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_tiebreaker.py +0 -0
  20. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_utils.py +0 -0
  21. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_web_fetch.py +0 -0
  22. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/_wrapper_helpers.py +0 -0
  23. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/CoVe.py +0 -0
  24. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/__init__.py +0 -0
  25. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/image_CoVe.py +0 -0
  26. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/image_stepback.py +0 -0
  27. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/pdf_CoVe.py +0 -0
  28. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/pdf_stepback.py +0 -0
  29. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/stepback.py +0 -0
  30. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/calls/top_n.py +0 -0
  31. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/classify.py +0 -0
  32. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/explore.py +0 -0
  33. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/extract.py +0 -0
  34. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/image_functions.py +0 -0
  35. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/circle.png +0 -0
  36. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/cube.png +0 -0
  37. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/diamond.png +0 -0
  38. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/overlapping_pentagons.png +0 -0
  39. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/images/rectangles.png +0 -0
  40. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/model_reference_list.py +0 -0
  41. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/pdf_functions.py +0 -0
  42. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/prompt_tune.py +0 -0
  43. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/summarize.py +0 -0
  44. {cat_stack-1.6.6 → cat_stack-1.6.7}/src/catstack/text_functions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 1.6.6
3
+ Version: 1.6.7
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.6.6"
4
+ __version__ = "1.6.7"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -126,6 +126,30 @@ def _hf_model_needs_enable_thinking_off(model: str) -> bool:
126
126
  return any(model.startswith(p) for p in _HF_NEEDS_ENABLE_THINKING_OFF)
127
127
 
128
128
 
129
+ # ---------------------------------------------------------------------------
130
+ # Anthropic deprecated the `temperature` parameter starting with the Opus 4.7 /
131
+ # 4.8 generation: these models return 400 "`temperature` is deprecated for this
132
+ # model." if it is sent. Older models (opus-4-6, sonnet-4-6, sonnet-4-5, and
133
+ # earlier) still accept it. This mirrors the OpenAI reasoning-model handling
134
+ # above — we skip `temperature` up-front for the known-deprecated prefixes in
135
+ # `_build_anthropic_payload`, and `UnifiedLLMClient.complete()` strips it on a
136
+ # runtime 400 as a safety net for future families not yet in this table.
137
+ #
138
+ # Matched by name prefix; extend the tuple when new temperature-free models
139
+ # ship.
140
+ # ---------------------------------------------------------------------------
141
+ _ANTHROPIC_TEMPERATURE_DEPRECATED = (
142
+ "claude-opus-4-7",
143
+ "claude-opus-4-8",
144
+ )
145
+
146
+
147
+ def _anthropic_supports_temperature(model: str) -> bool:
148
+ """False for Anthropic models that reject the `temperature` param."""
149
+ m = (model or "").lower()
150
+ return not any(m.startswith(p) for p in _ANTHROPIC_TEMPERATURE_DEPRECATED)
151
+
152
+
129
153
  # ---------------------------------------------------------------------------
130
154
  # Ollama reasoning control: per-model-family parameter format for the
131
155
  # top-level `think` field on chat / generate requests.
@@ -660,6 +684,14 @@ class UnifiedLLMClient:
660
684
  if system_content:
661
685
  payload["system"] = system_content
662
686
 
687
+ # Newer Anthropic models (Opus 4.7+) deprecated `temperature` and 400 if
688
+ # it is sent. Skip it for those known prefixes, and also honor the flag
689
+ # cached by complete()'s runtime 400 fallback for future families.
690
+ _temp_ok = (
691
+ _anthropic_supports_temperature(self.model)
692
+ and not getattr(self, "_anthropic_temperature_unsupported", False)
693
+ )
694
+
663
695
  # Extended thinking for Anthropic (minimum 1024 tokens)
664
696
  # When thinking is enabled, temperature must be 1 (Anthropic requirement),
665
697
  # so we skip setting temperature from creativity in that case
@@ -669,11 +701,12 @@ class UnifiedLLMClient:
669
701
  "type": "enabled",
670
702
  "budget_tokens": budget,
671
703
  }
672
- payload["temperature"] = 1
704
+ if _temp_ok:
705
+ payload["temperature"] = 1
673
706
  # When thinking is enabled, max_tokens must be larger than budget_tokens
674
707
  if payload["max_tokens"] <= budget:
675
708
  payload["max_tokens"] = budget + 4096
676
- elif creativity is not None:
709
+ elif creativity is not None and _temp_ok:
677
710
  payload["temperature"] = creativity
678
711
 
679
712
  # Use tool calling for structured output (most reliable for Anthropic)
@@ -989,6 +1022,25 @@ class UnifiedLLMClient:
989
1022
  payload.pop("reasoning_effort")
990
1023
  continue
991
1024
 
1025
+ # Anthropic deprecated `temperature` for newer models
1026
+ # (Opus 4.7+): they 400 with "`temperature` is deprecated
1027
+ # for this model." Strip it, cache on the client so the
1028
+ # payload builder skips it for subsequent rows on this
1029
+ # client, and retry. Safety net for families not yet in
1030
+ # `_ANTHROPIC_TEMPERATURE_DEPRECATED`.
1031
+ if (
1032
+ "temperature" in error_text
1033
+ and "deprecated" in error_text
1034
+ and "temperature" in payload
1035
+ ):
1036
+ if not getattr(self, '_warned_temperature_deprecated', False):
1037
+ print(f"\n[CatLLM] Model '{self.model}' deprecated the temperature parameter.")
1038
+ print(f" Dropping it and caching for subsequent calls on this client.\n")
1039
+ self._warned_temperature_deprecated = True
1040
+ self._anthropic_temperature_unsupported = True
1041
+ payload.pop("temperature")
1042
+ continue
1043
+
992
1044
  # HuggingFace: try other routers when the current one
993
1045
  # rejects the model with a "wrong router" 400.
994
1046
  if self._is_hf_wrong_router_400(response.text):
@@ -3638,11 +3638,12 @@ def build_output_dataframes(
3638
3638
  # Populate data
3639
3639
  for idx, result in enumerate(all_results):
3640
3640
  combined_data["input_index"].append(idx)
3641
- # Truncate input_data for readability
3641
+ # Full input_data (whitespace-collapsed). Truncating here breaks
3642
+ # downstream joins against gold-standard files and silently feeds
3643
+ # truncated text to any pipeline that reuses input_data as input.
3642
3644
  raw = result["response"]
3643
3645
  clean = " ".join(str(raw).split()) # collapse whitespace/newlines
3644
- preview = clean[:100] + "..." if len(clean) > 100 else clean
3645
- combined_data["input_data"].append(preview)
3646
+ combined_data["input_data"].append(clean)
3646
3647
  aggregated = result["aggregated"]
3647
3648
 
3648
3649
  # Add PDF metadata if present
@@ -4464,7 +4465,13 @@ def summarize_ensemble(
4464
4465
  # synthesis still has *something* to anchor on (prior behavior).
4465
4466
  original_text_for_synthesis = entry.get("page_text") or page_label
4466
4467
  else:
4467
- # Truncate input_data for readability; add input_index for joining
4468
+ # Truncate input_data for readability; add input_index for joining.
4469
+ # Truncation is intentional HERE (summarize): inputs can be whole
4470
+ # documents/PDF pages, and full text would bloat the output and the
4471
+ # synthesis context. classify()'s writer (build_output_dataframes)
4472
+ # deliberately does NOT truncate — survey-length inputs there are
4473
+ # reused for downstream joins. See repo TODO for the permanent
4474
+ # per-function input_data design (preview + stable join key).
4468
4475
  clean = " ".join(str(item).split()) # collapse whitespace/newlines
4469
4476
  preview = clean[:100] + "..." if len(clean) > 100 else clean
4470
4477
  row = {
File without changes
File without changes
File without changes
File without changes