cat-stack 1.6.6__tar.gz → 1.6.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {cat_stack-1.6.6 → cat_stack-1.6.8}/PKG-INFO +1 -1
  2. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/__about__.py +1 -1
  3. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_providers.py +181 -8
  4. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/text_functions_ensemble.py +11 -4
  5. {cat_stack-1.6.6 → cat_stack-1.6.8}/.gitignore +0 -0
  6. {cat_stack-1.6.6 → cat_stack-1.6.8}/LICENSE +0 -0
  7. {cat_stack-1.6.6 → cat_stack-1.6.8}/README.md +0 -0
  8. {cat_stack-1.6.6 → cat_stack-1.6.8}/pyproject.toml +0 -0
  9. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/cat_stack/__init__.py +0 -0
  10. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/__init__.py +0 -0
  11. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_batch.py +0 -0
  12. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_category_analysis.py +0 -0
  13. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_chunked.py +0 -0
  14. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_embeddings.py +0 -0
  15. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_formatter.py +0 -0
  16. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_pilot_test.py +0 -0
  17. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_prompts.py +0 -0
  18. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_review_ui.py +0 -0
  19. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_tiebreaker.py +0 -0
  20. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_utils.py +0 -0
  21. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_web_fetch.py +0 -0
  22. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/_wrapper_helpers.py +0 -0
  23. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/CoVe.py +0 -0
  24. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/__init__.py +0 -0
  25. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/image_CoVe.py +0 -0
  26. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/image_stepback.py +0 -0
  27. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/pdf_CoVe.py +0 -0
  28. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/pdf_stepback.py +0 -0
  29. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/stepback.py +0 -0
  30. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/calls/top_n.py +0 -0
  31. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/classify.py +0 -0
  32. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/explore.py +0 -0
  33. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/extract.py +0 -0
  34. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/image_functions.py +0 -0
  35. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/images/circle.png +0 -0
  36. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/images/cube.png +0 -0
  37. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/images/diamond.png +0 -0
  38. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/images/overlapping_pentagons.png +0 -0
  39. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/images/rectangles.png +0 -0
  40. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/model_reference_list.py +0 -0
  41. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/pdf_functions.py +0 -0
  42. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/prompt_tune.py +0 -0
  43. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/summarize.py +0 -0
  44. {cat_stack-1.6.6 → cat_stack-1.6.8}/src/catstack/text_functions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 1.6.6
3
+ Version: 1.6.8
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.6.6"
4
+ __version__ = "1.6.8"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -126,6 +126,51 @@ def _hf_model_needs_enable_thinking_off(model: str) -> bool:
126
126
  return any(model.startswith(p) for p in _HF_NEEDS_ENABLE_THINKING_OFF)
127
127
 
128
128
 
129
+ # Router-served models measured (2026-06-12 reasoning audit) to reason by
130
+ # default with NO honored off-switch through the OpenAI-compatible router:
131
+ # the router 400-rejects `chat_template_kwargs.enable_thinking` for their
132
+ # templates, and they expose no reasoning_effort. classify() warns once per
133
+ # client so users know the provider default applies.
134
+ _HF_DEFAULT_REASONING_PREFIXES = (
135
+ "openai/gpt-oss",
136
+ "moonshotai/kimi-k2",
137
+ )
138
+
139
+
140
+ def _hf_model_reasons_by_default(model: str) -> bool:
141
+ m = (model or "").lower()
142
+ return any(m.startswith(p) for p in _HF_DEFAULT_REASONING_PREFIXES)
143
+
144
+
145
+ # Module-level: models already warned about uncontrolled reasoning, so the
146
+ # warning fires once per process even though a fresh client is built per row.
147
+ _WARNED_UNCONTROLLED_REASONING: set = set()
148
+
149
+
150
+ # ---------------------------------------------------------------------------
151
+ # Anthropic deprecated the `temperature` parameter starting with the Opus 4.7 /
152
+ # 4.8 generation: these models return 400 "`temperature` is deprecated for this
153
+ # model." if it is sent. Older models (opus-4-6, sonnet-4-6, sonnet-4-5, and
154
+ # earlier) still accept it. This mirrors the OpenAI reasoning-model handling
155
+ # above — we skip `temperature` up-front for the known-deprecated prefixes in
156
+ # `_build_anthropic_payload`, and `UnifiedLLMClient.complete()` strips it on a
157
+ # runtime 400 as a safety net for future families not yet in this table.
158
+ #
159
+ # Matched by name prefix; extend the tuple when new temperature-free models
160
+ # ship.
161
+ # ---------------------------------------------------------------------------
162
+ _ANTHROPIC_TEMPERATURE_DEPRECATED = (
163
+ "claude-opus-4-7",
164
+ "claude-opus-4-8",
165
+ )
166
+
167
+
168
+ def _anthropic_supports_temperature(model: str) -> bool:
169
+ """False for Anthropic models that reject the `temperature` param."""
170
+ m = (model or "").lower()
171
+ return not any(m.startswith(p) for p in _ANTHROPIC_TEMPERATURE_DEPRECATED)
172
+
173
+
129
174
  # ---------------------------------------------------------------------------
130
175
  # Ollama reasoning control: per-model-family parameter format for the
131
176
  # top-level `think` field on chat / generate requests.
@@ -521,8 +566,15 @@ class UnifiedLLMClient:
521
566
  # accept booleans). Without this, gpt-oss family models emit long
522
567
  # <think> blocks by default that bloat per-row generation 3-5x.
523
568
  return self._build_openai_payload(messages, json_schema, creativity, force_json, thinking_budget)
569
+ elif self.provider == "xai":
570
+ # v1.6.8: forward the reasoning request. grok-4.3+ hybrids reason
571
+ # by default (2026-06-12 audit: 214 reasoning tokens on a trivial
572
+ # probe with no control sent); non-reasoning variants reject
573
+ # reasoning_effort and are handled by the 400 fallback in
574
+ # complete(), which caches the rejection on the client.
575
+ return self._build_openai_payload(messages, json_schema, creativity, force_json, thinking_budget)
524
576
  else:
525
- # Other OpenAI-compatible providers (xai, mistral, etc.)
577
+ # Other OpenAI-compatible providers (mistral, etc.)
526
578
  return self._build_openai_payload(messages, json_schema, creativity, force_json)
527
579
 
528
580
  def _build_openai_payload(
@@ -596,6 +648,25 @@ class UnifiedLLMClient:
596
648
  elif creativity is not None:
597
649
  payload["temperature"] = creativity
598
650
 
651
+ # xAI (v1.6.8): hybrid grok models accept reasoning_effort alongside
652
+ # temperature. "low" is the lowest tier xAI exposes (no "none" /
653
+ # "minimal"); explicitly non-reasoning variants 400 on the field —
654
+ # complete() pops it and caches `_xai_no_reasoning_effort` so later
655
+ # rows on this client skip the doomed field up front.
656
+ if (
657
+ self.provider == "xai"
658
+ and thinking_budget is not None
659
+ and not getattr(self, "_xai_no_reasoning_effort", False)
660
+ # Variants whose name already encodes "non-reasoning" are off by
661
+ # model choice; sending reasoning_effort to them is not just
662
+ # redundant but HARMFUL — verified 2026-06-13 that
663
+ # grok-4-1-fast-non-reasoning returns 0 reasoning tokens with no
664
+ # field but 207 when sent reasoning_effort="low", i.e. the field
665
+ # turns reasoning back ON. Leave these alone.
666
+ and "non-reasoning" not in (self.model or "").lower()
667
+ ):
668
+ payload["reasoning_effort"] = "low" if thinking_budget == 0 else "high"
669
+
599
670
  # Ollama: per-model-family reasoning control via the top-level
600
671
  # `think` field. gpt-oss expects an enum ("low"/"medium"/"high");
601
672
  # qwen3/deepseek-r1 expect a boolean. Models not in the
@@ -624,6 +695,24 @@ class UnifiedLLMClient:
624
695
  and _hf_model_needs_enable_thinking_off(self.model)
625
696
  ):
626
697
  payload["chat_template_kwargs"] = {"enable_thinking": False}
698
+ elif (
699
+ self.provider in ("huggingface", "huggingface-together")
700
+ and thinking_budget == 0
701
+ and _hf_model_reasons_by_default(self.model)
702
+ and self.model not in _WARNED_UNCONTROLLED_REASONING
703
+ ):
704
+ # v1.6.8: these router-served models reason by default and honor
705
+ # no off-switch through the router (enable_thinking is
706
+ # 400-rejected for their templates). Warn once per process (a
707
+ # fresh client is built per row, so a per-instance flag would
708
+ # warn every row) so the uniform "reasoning off" request isn't
709
+ # silently unmet.
710
+ print(
711
+ f"\n[CatLLM] WARNING: no effective reasoning control delivered "
712
+ f"for '{self.model}'; the provider's default reasoning "
713
+ f"behavior applies. See docs/reasoning-controls.md.\n"
714
+ )
715
+ _WARNED_UNCONTROLLED_REASONING.add(self.model)
627
716
 
628
717
  return payload
629
718
 
@@ -660,6 +749,14 @@ class UnifiedLLMClient:
660
749
  if system_content:
661
750
  payload["system"] = system_content
662
751
 
752
+ # Newer Anthropic models (Opus 4.7+) deprecated `temperature` and 400 if
753
+ # it is sent. Skip it for those known prefixes, and also honor the flag
754
+ # cached by complete()'s runtime 400 fallback for future families.
755
+ _temp_ok = (
756
+ _anthropic_supports_temperature(self.model)
757
+ and not getattr(self, "_anthropic_temperature_unsupported", False)
758
+ )
759
+
663
760
  # Extended thinking for Anthropic (minimum 1024 tokens)
664
761
  # When thinking is enabled, temperature must be 1 (Anthropic requirement),
665
762
  # so we skip setting temperature from creativity in that case
@@ -669,11 +766,12 @@ class UnifiedLLMClient:
669
766
  "type": "enabled",
670
767
  "budget_tokens": budget,
671
768
  }
672
- payload["temperature"] = 1
769
+ if _temp_ok:
770
+ payload["temperature"] = 1
673
771
  # When thinking is enabled, max_tokens must be larger than budget_tokens
674
772
  if payload["max_tokens"] <= budget:
675
773
  payload["max_tokens"] = budget + 4096
676
- elif creativity is not None:
774
+ elif creativity is not None and _temp_ok:
677
775
  payload["temperature"] = creativity
678
776
 
679
777
  # Use tool calling for structured output (most reliable for Anthropic)
@@ -726,11 +824,19 @@ class UnifiedLLMClient:
726
824
  if creativity is not None:
727
825
  payload["generationConfig"]["temperature"] = creativity
728
826
 
729
- # Add thinking budget for extended thinking (Google-specific)
730
- # Must be inside generationConfig, not at top level
731
- # Google requires a reasonable minimum budget (enforce 128 tokens minimum)
732
- if thinking_budget and thinking_budget > 0:
733
- budget = max(thinking_budget, 128)
827
+ # Reasoning control (Google-specific). Must be inside generationConfig.
828
+ # v1.6.8: an explicit zero budget is now SENT at thinking_budget = 0.
829
+ # Previously nothing was sent at 0 and Gemini ran at its provider
830
+ # default, which the 2026-06-12 audit measured as thinking ON
831
+ # (~200+ thought tokens on a trivial classification call). Models
832
+ # that reject 0 (minimum-budget tiers) are handled by the 400
833
+ # fallback in complete(), which caches the discovered floor on the
834
+ # client (`_google_thinking_floor`).
835
+ if thinking_budget is not None:
836
+ if thinking_budget > 0:
837
+ budget = max(thinking_budget, 128)
838
+ else:
839
+ budget = getattr(self, "_google_thinking_floor", 0)
734
840
  payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": budget}
735
841
 
736
842
  return payload
@@ -913,6 +1019,10 @@ class UnifiedLLMClient:
913
1019
  # transient error this call? Only strip once per call so we don't
914
1020
  # mutate payload on every retry tick.
915
1021
  stripped_response_format = False
1022
+ # v1.6.8: consecutive-timeout counter + one-shot Google schema drop
1023
+ # (see the Timeout handler below).
1024
+ timeout_count = 0
1025
+ dropped_google_schema = False
916
1026
 
917
1027
  for attempt in range(max_retries):
918
1028
  endpoint = self._get_endpoint()
@@ -986,9 +1096,51 @@ class UnifiedLLMClient:
986
1096
  payload["reasoning_effort"] = "low"
987
1097
  continue
988
1098
  elif current == "low" and "reasoning_effort" in payload:
1099
+ # Model takes no reasoning_effort at all (e.g.
1100
+ # xAI's explicitly non-reasoning variants).
1101
+ # Cache so later rows on this client skip the
1102
+ # doomed field up front (v1.6.8).
1103
+ self._xai_no_reasoning_effort = True
989
1104
  payload.pop("reasoning_effort")
990
1105
  continue
991
1106
 
1107
+ # Google (v1.6.8): minimum-budget thinking tiers reject
1108
+ # thinkingBudget: 0. Fall back to 128 (Google's stated
1109
+ # minimum) and cache on the client.
1110
+ if (
1111
+ self.provider == "google"
1112
+ and "thinking" in error_text
1113
+ and ("budget" in error_text or "invalid" in error_text
1114
+ or "unsupported" in error_text)
1115
+ and payload.get("generationConfig", {})
1116
+ .get("thinkingConfig", {})
1117
+ .get("thinkingBudget") == 0
1118
+ ):
1119
+ self._google_thinking_floor = 128
1120
+ payload["generationConfig"]["thinkingConfig"]["thinkingBudget"] = 128
1121
+ print(f"\n[CatLLM] Model '{self.model}' rejected thinkingBudget=0; "
1122
+ f"falling back to the minimum (128) and caching for this client.\n")
1123
+ continue
1124
+
1125
+ # Anthropic deprecated `temperature` for newer models
1126
+ # (Opus 4.7+): they 400 with "`temperature` is deprecated
1127
+ # for this model." Strip it, cache on the client so the
1128
+ # payload builder skips it for subsequent rows on this
1129
+ # client, and retry. Safety net for families not yet in
1130
+ # `_ANTHROPIC_TEMPERATURE_DEPRECATED`.
1131
+ if (
1132
+ "temperature" in error_text
1133
+ and "deprecated" in error_text
1134
+ and "temperature" in payload
1135
+ ):
1136
+ if not getattr(self, '_warned_temperature_deprecated', False):
1137
+ print(f"\n[CatLLM] Model '{self.model}' deprecated the temperature parameter.")
1138
+ print(f" Dropping it and caching for subsequent calls on this client.\n")
1139
+ self._warned_temperature_deprecated = True
1140
+ self._anthropic_temperature_unsupported = True
1141
+ payload.pop("temperature")
1142
+ continue
1143
+
992
1144
  # HuggingFace: try other routers when the current one
993
1145
  # rejects the model with a "wrong router" 400.
994
1146
  if self._is_hf_wrong_router_400(response.text):
@@ -1061,6 +1213,27 @@ class UnifiedLLMClient:
1061
1213
  return result, None
1062
1214
 
1063
1215
  except requests.exceptions.Timeout:
1216
+ timeout_count += 1
1217
+ # v1.6.8: Gemini can reproducibly hang on specific inputs
1218
+ # when a strict responseSchema is attached (constrained-
1219
+ # decoding pathology; 2026-06-12 audit — a trivial input
1220
+ # timed out 6/6 attempts WITH the schema and answered
1221
+ # instantly without it). After two consecutive timeouts with
1222
+ # a schema attached, drop the schema once and re-ask: the
1223
+ # prompt still requests JSON and extract_json() parses it
1224
+ # from the free-text response.
1225
+ if (
1226
+ self.provider == "google"
1227
+ and timeout_count >= 2
1228
+ and not dropped_google_schema
1229
+ and "responseSchema" in payload.get("generationConfig", {})
1230
+ ):
1231
+ dropped_google_schema = True
1232
+ payload["generationConfig"].pop("responseSchema", None)
1233
+ print(f"[CatLLM] Repeated timeouts from '{self.model}' with "
1234
+ f"responseSchema attached; retrying schema-less "
1235
+ f"(prompt-based JSON parsing).")
1236
+ continue
1064
1237
  wait_time = _backoff_with_jitter(initial_delay, attempt)
1065
1238
  elapsed = time.monotonic() - start
1066
1239
  if attempt < max_retries - 1 and elapsed + wait_time <= max_total_wait:
@@ -3638,11 +3638,12 @@ def build_output_dataframes(
3638
3638
  # Populate data
3639
3639
  for idx, result in enumerate(all_results):
3640
3640
  combined_data["input_index"].append(idx)
3641
- # Truncate input_data for readability
3641
+ # Full input_data (whitespace-collapsed). Truncating here breaks
3642
+ # downstream joins against gold-standard files and silently feeds
3643
+ # truncated text to any pipeline that reuses input_data as input.
3642
3644
  raw = result["response"]
3643
3645
  clean = " ".join(str(raw).split()) # collapse whitespace/newlines
3644
- preview = clean[:100] + "..." if len(clean) > 100 else clean
3645
- combined_data["input_data"].append(preview)
3646
+ combined_data["input_data"].append(clean)
3646
3647
  aggregated = result["aggregated"]
3647
3648
 
3648
3649
  # Add PDF metadata if present
@@ -4464,7 +4465,13 @@ def summarize_ensemble(
4464
4465
  # synthesis still has *something* to anchor on (prior behavior).
4465
4466
  original_text_for_synthesis = entry.get("page_text") or page_label
4466
4467
  else:
4467
- # Truncate input_data for readability; add input_index for joining
4468
+ # Truncate input_data for readability; add input_index for joining.
4469
+ # Truncation is intentional HERE (summarize): inputs can be whole
4470
+ # documents/PDF pages, and full text would bloat the output and the
4471
+ # synthesis context. classify()'s writer (build_output_dataframes)
4472
+ # deliberately does NOT truncate — survey-length inputs there are
4473
+ # reused for downstream joins. See repo TODO for the permanent
4474
+ # per-function input_data design (preview + stable join key).
4468
4475
  clean = " ".join(str(item).split()) # collapse whitespace/newlines
4469
4476
  preview = clean[:100] + "..." if len(clean) > 100 else clean
4470
4477
  row = {
File without changes
File without changes
File without changes
File without changes