cat-stack 1.6.5__tar.gz → 1.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {cat_stack-1.6.5 → cat_stack-1.6.7}/PKG-INFO +1 -1
  2. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/__about__.py +1 -1
  3. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_providers.py +131 -2
  4. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/text_functions_ensemble.py +14 -7
  5. {cat_stack-1.6.5 → cat_stack-1.6.7}/.gitignore +0 -0
  6. {cat_stack-1.6.5 → cat_stack-1.6.7}/LICENSE +0 -0
  7. {cat_stack-1.6.5 → cat_stack-1.6.7}/README.md +0 -0
  8. {cat_stack-1.6.5 → cat_stack-1.6.7}/pyproject.toml +0 -0
  9. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/cat_stack/__init__.py +0 -0
  10. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/__init__.py +0 -0
  11. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_batch.py +0 -0
  12. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_category_analysis.py +0 -0
  13. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_chunked.py +0 -0
  14. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_embeddings.py +0 -0
  15. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_formatter.py +0 -0
  16. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_pilot_test.py +0 -0
  17. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_prompts.py +0 -0
  18. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_review_ui.py +0 -0
  19. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_tiebreaker.py +0 -0
  20. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_utils.py +0 -0
  21. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_web_fetch.py +0 -0
  22. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/_wrapper_helpers.py +0 -0
  23. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/CoVe.py +0 -0
  24. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/__init__.py +0 -0
  25. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/image_CoVe.py +0 -0
  26. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/image_stepback.py +0 -0
  27. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/pdf_CoVe.py +0 -0
  28. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/pdf_stepback.py +0 -0
  29. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/stepback.py +0 -0
  30. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/calls/top_n.py +0 -0
  31. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/classify.py +0 -0
  32. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/explore.py +0 -0
  33. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/extract.py +0 -0
  34. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/image_functions.py +0 -0
  35. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/images/circle.png +0 -0
  36. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/images/cube.png +0 -0
  37. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/images/diamond.png +0 -0
  38. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/images/overlapping_pentagons.png +0 -0
  39. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/images/rectangles.png +0 -0
  40. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/model_reference_list.py +0 -0
  41. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/pdf_functions.py +0 -0
  42. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/prompt_tune.py +0 -0
  43. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/summarize.py +0 -0
  44. {cat_stack-1.6.5 → cat_stack-1.6.7}/src/catstack/text_functions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 1.6.5
3
+ Version: 1.6.7
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.6.5"
4
+ __version__ = "1.6.7"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -125,6 +125,88 @@ _HF_NEEDS_ENABLE_THINKING_OFF = (
125
125
  def _hf_model_needs_enable_thinking_off(model: str) -> bool:
126
126
  return any(model.startswith(p) for p in _HF_NEEDS_ENABLE_THINKING_OFF)
127
127
 
128
+
129
+ # ---------------------------------------------------------------------------
130
+ # Anthropic deprecated the `temperature` parameter starting with the Opus 4.7 /
131
+ # 4.8 generation: these models return 400 "`temperature` is deprecated for this
132
+ # model." if it is sent. Older models (opus-4-6, sonnet-4-6, sonnet-4-5, and
133
+ # earlier) still accept it. This mirrors the OpenAI reasoning-model handling
134
+ # above — we skip `temperature` up-front for the known-deprecated prefixes in
135
+ # `_build_anthropic_payload`, and `UnifiedLLMClient.complete()` strips it on a
136
+ # runtime 400 as a safety net for future families not yet in this table.
137
+ #
138
+ # Matched by name prefix; extend the tuple when new temperature-free models
139
+ # ship.
140
+ # ---------------------------------------------------------------------------
141
+ _ANTHROPIC_TEMPERATURE_DEPRECATED = (
142
+ "claude-opus-4-7",
143
+ "claude-opus-4-8",
144
+ )
145
+
146
+
147
+ def _anthropic_supports_temperature(model: str) -> bool:
148
+ """False for Anthropic models that reject the `temperature` param."""
149
+ m = (model or "").lower()
150
+ return not any(m.startswith(p) for p in _ANTHROPIC_TEMPERATURE_DEPRECATED)
151
+
152
+
153
+ # ---------------------------------------------------------------------------
154
+ # Ollama reasoning control: per-model-family parameter format for the
155
+ # top-level `think` field on chat / generate requests.
156
+ #
157
+ # Ollama standardized on a single API field name (`think`) but the value
158
+ # type differs per model family — gpt-oss takes an enum, most others take
159
+ # a boolean. See https://docs.ollama.com/capabilities/thinking.
160
+ #
161
+ # Coverage philosophy: list every Ollama reasoning model family we know of
162
+ # AND that uses the `think` field. Reasoning models that gate via other
163
+ # mechanisms (system prompts, chat-template flags) are explicitly noted in
164
+ # the "NOT in registry" comment below and handled elsewhere — adding them
165
+ # here would silently inject a no-op `think` field, which Ollama may
166
+ # accept but won't honor, leading to surprising behavior.
167
+ #
168
+ # Entries are checked longest-prefix-first by `_ollama_think_value()`, so
169
+ # put more-specific prefixes earlier when adding (e.g. `qwen3-coder` before
170
+ # `qwen3` if they differ).
171
+ #
172
+ # Registry tuple: (model prefix, value-format, low_value, high_value)
173
+ #
174
+ # Models in registry — `think` field works:
175
+ # gpt-oss — enum: "low" / "medium" / "high" (cannot fully disable)
176
+ # qwen3 / qwen3.* — bool: True / False (covers -thinking variants too)
177
+ # qwq — bool: True / False (Qwen QwQ — preceded Qwen3)
178
+ # deepseek-r1 — bool: True / False (covers -distill variants)
179
+ #
180
+ # Models NOT in registry — different mechanism, do NOT add here:
181
+ # magistral — controlled via system prompt (Mistral Magistral)
182
+ # exaone-deep — uses Modelfile-baked reasoning, no API toggle exposed
183
+ # marco-o1 — uses chat-template wrappers, not `think` field
184
+ #
185
+ # Models with NO reasoning (so `think` should not appear at all):
186
+ # gemma2/3, llama3.x/4.x, mistral, mistral-nemo, qwen2.5 (non-QwQ),
187
+ # phi3/4, granite, olmo, codestral, …
188
+ # These are NOT added; the registry's None-return for unmatched prefixes
189
+ # correctly omits the `think` field for them.
190
+ # ---------------------------------------------------------------------------
191
+ _OLLAMA_REASONING_MODELS = (
192
+ ("gpt-oss", "enum", "low", "high"),
193
+ ("qwen3", "bool", False, True), # covers qwen3.*, qwen3-*, -thinking-* variants
194
+ ("qwq", "bool", False, True),
195
+ ("deepseek-r1", "bool", False, True), # covers -distill-qwen, -distill-llama, etc.
196
+ )
197
+
198
+
199
+ def _ollama_think_value(model: str, thinking_budget):
200
+ """Map cat-stack's thinking_budget to the right Ollama `think` value for
201
+ this model family. Returns None if the model isn't in the
202
+ reasoning-capable registry (no `think` field should be set)."""
203
+ if thinking_budget is None:
204
+ return None
205
+ for prefix, fmt, low_val, high_val in _OLLAMA_REASONING_MODELS:
206
+ if model.startswith(prefix):
207
+ return low_val if thinking_budget == 0 else high_val
208
+ return None
209
+
128
210
  __all__ = [
129
211
  # Main client
130
212
  "UnifiedLLMClient",
@@ -457,6 +539,12 @@ class UnifiedLLMClient:
457
539
  elif self.provider in ("huggingface", "huggingface-together"):
458
540
  # HuggingFace needs thinking_budget to disable thinking on models that reason by default
459
541
  return self._build_openai_payload(messages, json_schema, creativity, force_json, thinking_budget)
542
+ elif self.provider == "ollama":
543
+ # Ollama threads thinking_budget to its top-level `think` field for
544
+ # reasoning-capable models (gpt-oss accepts low/medium/high; others
545
+ # accept booleans). Without this, gpt-oss family models emit long
546
+ # <think> blocks by default that bloat per-row generation 3-5x.
547
+ return self._build_openai_payload(messages, json_schema, creativity, force_json, thinking_budget)
460
548
  else:
461
549
  # Other OpenAI-compatible providers (xai, mistral, etc.)
462
550
  return self._build_openai_payload(messages, json_schema, creativity, force_json)
@@ -532,6 +620,19 @@ class UnifiedLLMClient:
532
620
  elif creativity is not None:
533
621
  payload["temperature"] = creativity
534
622
 
623
+ # Ollama: per-model-family reasoning control via the top-level
624
+ # `think` field. gpt-oss expects an enum ("low"/"medium"/"high");
625
+ # qwen3/deepseek-r1 expect a boolean. Models not in the
626
+ # `_OLLAMA_REASONING_MODELS` registry don't support reasoning and
627
+ # get no `think` field (would be a no-op at best, validator-
628
+ # confusing at worst). Without this, Ollama-served gpt-oss
629
+ # produces long `<think>` blocks by default that bloat per-row
630
+ # generation 3-5x.
631
+ if self.provider == "ollama":
632
+ think_value = _ollama_think_value(self.model, thinking_budget)
633
+ if think_value is not None:
634
+ payload["think"] = think_value
635
+
535
636
  # HuggingFace: disable thinking on model families whose chat
536
637
  # template honors `enable_thinking` (Qwen3-family). Other HF-routed
537
638
  # models don't need the kwarg, and strict-validator backends
@@ -583,6 +684,14 @@ class UnifiedLLMClient:
583
684
  if system_content:
584
685
  payload["system"] = system_content
585
686
 
687
+ # Newer Anthropic models (Opus 4.7+) deprecated `temperature` and 400 if
688
+ # it is sent. Skip it for those known prefixes, and also honor the flag
689
+ # cached by complete()'s runtime 400 fallback for future families.
690
+ _temp_ok = (
691
+ _anthropic_supports_temperature(self.model)
692
+ and not getattr(self, "_anthropic_temperature_unsupported", False)
693
+ )
694
+
586
695
  # Extended thinking for Anthropic (minimum 1024 tokens)
587
696
  # When thinking is enabled, temperature must be 1 (Anthropic requirement),
588
697
  # so we skip setting temperature from creativity in that case
@@ -592,11 +701,12 @@ class UnifiedLLMClient:
592
701
  "type": "enabled",
593
702
  "budget_tokens": budget,
594
703
  }
595
- payload["temperature"] = 1
704
+ if _temp_ok:
705
+ payload["temperature"] = 1
596
706
  # When thinking is enabled, max_tokens must be larger than budget_tokens
597
707
  if payload["max_tokens"] <= budget:
598
708
  payload["max_tokens"] = budget + 4096
599
- elif creativity is not None:
709
+ elif creativity is not None and _temp_ok:
600
710
  payload["temperature"] = creativity
601
711
 
602
712
  # Use tool calling for structured output (most reliable for Anthropic)
@@ -912,6 +1022,25 @@ class UnifiedLLMClient:
912
1022
  payload.pop("reasoning_effort")
913
1023
  continue
914
1024
 
1025
+ # Anthropic deprecated `temperature` for newer models
1026
+ # (Opus 4.7+): they 400 with "`temperature` is deprecated
1027
+ # for this model." Strip it, cache on the client so the
1028
+ # payload builder skips it for subsequent rows on this
1029
+ # client, and retry. Safety net for families not yet in
1030
+ # `_ANTHROPIC_TEMPERATURE_DEPRECATED`.
1031
+ if (
1032
+ "temperature" in error_text
1033
+ and "deprecated" in error_text
1034
+ and "temperature" in payload
1035
+ ):
1036
+ if not getattr(self, '_warned_temperature_deprecated', False):
1037
+ print(f"\n[CatLLM] Model '{self.model}' deprecated the temperature parameter.")
1038
+ print(f" Dropping it and caching for subsequent calls on this client.\n")
1039
+ self._warned_temperature_deprecated = True
1040
+ self._anthropic_temperature_unsupported = True
1041
+ payload.pop("temperature")
1042
+ continue
1043
+
915
1044
  # HuggingFace: try other routers when the current one
916
1045
  # rejects the model with a "wrong router" 400.
917
1046
  if self._is_hf_wrong_router_400(response.text):
@@ -3043,7 +3043,7 @@ Categorize text responses {cove_categorize}:
3043
3043
  messages=messages,
3044
3044
  json_schema=json_schemas[cfg["model"]],
3045
3045
  creativity=effective_creativity,
3046
- thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
3046
+ thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together", "ollama") else None,
3047
3047
  max_retries=max_retries,
3048
3048
  )
3049
3049
 
@@ -3100,7 +3100,7 @@ Categorize text responses {cove_categorize}:
3100
3100
  messages=messages,
3101
3101
  json_schema=json_schemas[cfg["model"]],
3102
3102
  creativity=effective_creativity,
3103
- thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
3103
+ thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together", "ollama") else None,
3104
3104
  max_retries=max_retries,
3105
3105
  )
3106
3106
 
@@ -3184,7 +3184,7 @@ Categorize text responses {cove_categorize}:
3184
3184
  messages=_retry_messages,
3185
3185
  json_schema=json_schemas[cfg["model"]],
3186
3186
  creativity=effective_creativity,
3187
- thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
3187
+ thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together", "ollama") else None,
3188
3188
  max_retries=max_retries,
3189
3189
  )
3190
3190
 
@@ -3638,11 +3638,12 @@ def build_output_dataframes(
3638
3638
  # Populate data
3639
3639
  for idx, result in enumerate(all_results):
3640
3640
  combined_data["input_index"].append(idx)
3641
- # Truncate input_data for readability
3641
+ # Full input_data (whitespace-collapsed). Truncating here breaks
3642
+ # downstream joins against gold-standard files and silently feeds
3643
+ # truncated text to any pipeline that reuses input_data as input.
3642
3644
  raw = result["response"]
3643
3645
  clean = " ".join(str(raw).split()) # collapse whitespace/newlines
3644
- preview = clean[:100] + "..." if len(clean) > 100 else clean
3645
- combined_data["input_data"].append(preview)
3646
+ combined_data["input_data"].append(clean)
3646
3647
  aggregated = result["aggregated"]
3647
3648
 
3648
3649
  # Add PDF metadata if present
@@ -4464,7 +4465,13 @@ def summarize_ensemble(
4464
4465
  # synthesis still has *something* to anchor on (prior behavior).
4465
4466
  original_text_for_synthesis = entry.get("page_text") or page_label
4466
4467
  else:
4467
- # Truncate input_data for readability; add input_index for joining
4468
+ # Truncate input_data for readability; add input_index for joining.
4469
+ # Truncation is intentional HERE (summarize): inputs can be whole
4470
+ # documents/PDF pages, and full text would bloat the output and the
4471
+ # synthesis context. classify()'s writer (build_output_dataframes)
4472
+ # deliberately does NOT truncate — survey-length inputs there are
4473
+ # reused for downstream joins. See repo TODO for the permanent
4474
+ # per-function input_data design (preview + stable join key).
4468
4475
  clean = " ".join(str(item).split()) # collapse whitespace/newlines
4469
4476
  preview = clean[:100] + "..." if len(clean) > 100 else clean
4470
4477
  row = {
File without changes
File without changes
File without changes
File without changes