cat-stack 1.6.0__tar.gz → 1.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {cat_stack-1.6.0 → cat_stack-1.6.2}/PKG-INFO +6 -2
  2. {cat_stack-1.6.0 → cat_stack-1.6.2}/README.md +5 -1
  3. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/__about__.py +1 -1
  4. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_providers.py +139 -12
  5. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/pdf_functions.py +63 -4
  6. {cat_stack-1.6.0 → cat_stack-1.6.2}/.gitignore +0 -0
  7. {cat_stack-1.6.0 → cat_stack-1.6.2}/LICENSE +0 -0
  8. {cat_stack-1.6.0 → cat_stack-1.6.2}/pyproject.toml +0 -0
  9. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/cat_stack/__init__.py +0 -0
  10. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/__init__.py +0 -0
  11. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_batch.py +0 -0
  12. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_category_analysis.py +0 -0
  13. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_chunked.py +0 -0
  14. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_embeddings.py +0 -0
  15. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_formatter.py +0 -0
  16. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_pilot_test.py +0 -0
  17. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_prompts.py +0 -0
  18. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_review_ui.py +0 -0
  19. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_tiebreaker.py +0 -0
  20. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_utils.py +0 -0
  21. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_web_fetch.py +0 -0
  22. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_wrapper_helpers.py +0 -0
  23. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/CoVe.py +0 -0
  24. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/__init__.py +0 -0
  25. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/image_CoVe.py +0 -0
  26. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/image_stepback.py +0 -0
  27. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/pdf_CoVe.py +0 -0
  28. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/pdf_stepback.py +0 -0
  29. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/stepback.py +0 -0
  30. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/calls/top_n.py +0 -0
  31. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/classify.py +0 -0
  32. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/explore.py +0 -0
  33. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/extract.py +0 -0
  34. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/image_functions.py +0 -0
  35. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/images/circle.png +0 -0
  36. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/images/cube.png +0 -0
  37. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/images/diamond.png +0 -0
  38. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/images/overlapping_pentagons.png +0 -0
  39. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/images/rectangles.png +0 -0
  40. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/model_reference_list.py +0 -0
  41. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/prompt_tune.py +0 -0
  42. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/summarize.py +0 -0
  43. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/text_functions.py +0 -0
  44. {cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/text_functions_ensemble.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 1.6.0
3
+ Version: 1.6.2
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -177,7 +177,11 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
177
177
  - **Multi-model ensemble** with consensus voting and agreement scores
178
178
  - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
179
179
  - **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
180
- - **Text, image, and PDF** input auto-detection
180
+ - **Text, image, and PDF** input auto-detection (PDF inputs are
181
+ validated against the `%PDF-` magic-byte header before reaching
182
+ PyMuPDF, so a webpage saved with `.pdf` extension surfaces a clear
183
+ `ValueError` instead of silently classifying a blank rendered page
184
+ as `success`)
181
185
  - **Embedding similarity** tiebreaker for ensemble consensus ties
182
186
  - **Pilot test** — validate classifications on a small sample before committing to the full run
183
187
 
@@ -141,7 +141,11 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
141
141
  - **Multi-model ensemble** with consensus voting and agreement scores
142
142
  - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
143
143
  - **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
144
- - **Text, image, and PDF** input auto-detection
144
+ - **Text, image, and PDF** input auto-detection (PDF inputs are
145
+ validated against the `%PDF-` magic-byte header before reaching
146
+ PyMuPDF, so a webpage saved with `.pdf` extension surfaces a clear
147
+ `ValueError` instead of silently classifying a blank rendered page
148
+ as `success`)
145
149
  - **Embedding similarity** tiebreaker for ensemble consensus ties
146
150
  - **Pilot test** — validate classifications on a small sample before committing to the full run
147
151
 
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.6.0"
4
+ __version__ = "1.6.2"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -19,6 +19,66 @@ import requests
19
19
  # short enough that batch ensembles don't stall for half an hour."
20
20
  _MAX_TOTAL_WAIT_SECONDS = 300.0
21
21
 
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # OpenAI reasoning_effort: per-model-family off-equivalent value.
25
+ # ---------------------------------------------------------------------------
26
+ #
27
+ # Different OpenAI model generations expose different `reasoning_effort`
28
+ # enum values. The "off" value (what `thinking_budget=0` maps to) is not
29
+ # stable across families:
30
+ #
31
+ # o1 / o3 / o4, gpt-5.0..gpt-5.3 → "minimal" (older floor)
32
+ # gpt-5.4 / gpt-5.5 / gpt-5.6 → "none" (new strict-off; "minimal" deprecated)
33
+ #
34
+ # A model sent the wrong floor returns a 400 `unsupported_value`. The
35
+ # table below is consulted in `_openai_reasoning_effort_floor()` to pick
36
+ # the right value up-front. For unknown future families,
37
+ # `UnifiedLLMClient.complete()` catches the 400 and falls back to "low"
38
+ # (universally accepted across all reasoning_effort-supporting models).
39
+ #
40
+ # Entries are matched longest-prefix-first so "gpt-5.4" matches before
41
+ # "gpt-5" — keep that invariant when extending.
42
+ _OPENAI_REASONING_EFFORT_FLOORS = (
43
+ ("gpt-5.4", "none"),
44
+ ("gpt-5.5", "none"),
45
+ ("gpt-5.6", "none"),
46
+ ("gpt-5", "minimal"), # covers 5.0, 5.1, 5.2, 5.3
47
+ ("o1", "minimal"),
48
+ ("o3", "minimal"),
49
+ ("o4", "minimal"),
50
+ )
51
+
52
+
53
+ def _openai_reasoning_effort_floor(model: str) -> str:
54
+ """Return the off-equivalent reasoning_effort value for a reasoning-
55
+ capable OpenAI model, based on its name prefix. Defaults to "minimal"
56
+ for models not covered by the table — the safest historical value."""
57
+ for prefix, floor in _OPENAI_REASONING_EFFORT_FLOORS:
58
+ if model.startswith(prefix):
59
+ return floor
60
+ return "minimal"
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # HuggingFace `chat_template_kwargs={"enable_thinking": False}` is the knob
65
+ # to suppress Qwen3-family `<think>` blocks. Other model families don't
66
+ # expose an `enable_thinking` template variable, and strict HF backends
67
+ # (Fireworks, Groq) reject the unknown field with 400 — forcing a wasted
68
+ # retry. Restrict injection to families that actually honor the flag.
69
+ #
70
+ # The runtime fallback in `complete()` (strip-on-400) stays as a safety
71
+ # net for unexpected cases — e.g. if a Qwen variant lands on a router
72
+ # whose validator doesn't accept the field.
73
+ # ---------------------------------------------------------------------------
74
+ _HF_NEEDS_ENABLE_THINKING_OFF = (
75
+ "Qwen/Qwen3", # covers Qwen3, Qwen3.5, Qwen3.6, …
76
+ )
77
+
78
+
79
+ def _hf_model_needs_enable_thinking_off(model: str) -> bool:
80
+ return any(model.startswith(p) for p in _HF_NEEDS_ENABLE_THINKING_OFF)
81
+
22
82
  __all__ = [
23
83
  # Main client
24
84
  "UnifiedLLMClient",
@@ -350,8 +410,18 @@ class UnifiedLLMClient:
350
410
 
351
411
  Args:
352
412
  force_json: If False and no json_schema, don't set response_format (for text responses)
353
- thinking_budget: For OpenAI models, maps to reasoning_effort:
354
- 0 or None → "minimal", >0 "high"
413
+ thinking_budget: For OpenAI reasoning-capable models, maps to
414
+ reasoning_effort. `thinking_budget=0` picks the
415
+ provider's off-equivalent value from
416
+ `_OPENAI_REASONING_EFFORT_FLOORS`
417
+ ("none" for gpt-5.4+, "minimal" for o-series
418
+ and gpt-5.0-5.3). `thinking_budget>0` maps to
419
+ "high". If the chosen value is rejected at
420
+ runtime with 400 `unsupported_value`,
421
+ `complete()` retries with "low" (universally
422
+ accepted) and caches the override on the
423
+ client so subsequent calls skip the bad
424
+ value.
355
425
  """
356
426
  payload = {
357
427
  "model": self.model,
@@ -388,13 +458,31 @@ class UnifiedLLMClient:
388
458
  if thinking_budget > 0:
389
459
  payload["reasoning_effort"] = "high"
390
460
  else:
391
- payload["reasoning_effort"] = "minimal"
461
+ # Off-equivalent value depends on the model family —
462
+ # see `_OPENAI_REASONING_EFFORT_FLOORS`. A previously-
463
+ # discovered fallback (from a 400 retry in complete())
464
+ # wins if cached on the client.
465
+ payload["reasoning_effort"] = (
466
+ getattr(self, "_reasoning_effort_override", None)
467
+ or _openai_reasoning_effort_floor(self.model)
468
+ )
392
469
  elif creativity is not None:
393
470
  payload["temperature"] = creativity
394
471
 
395
- # HuggingFace: disable thinking for models that reason by default (e.g., Qwen3)
396
- # when thinking_budget is explicitly set to 0
397
- if self.provider in ("huggingface", "huggingface-together") and thinking_budget is not None and thinking_budget == 0:
472
+ # HuggingFace: disable thinking on model families whose chat
473
+ # template honors `enable_thinking` (Qwen3-family). Other HF-routed
474
+ # models don't need the kwarg, and strict-validator backends
475
+ # (Fireworks, Groq) reject the unknown field outright — sending it
476
+ # to a non-Qwen model just buys a wasted retry. See
477
+ # `_hf_model_needs_enable_thinking_off()`. The runtime fallback in
478
+ # `complete()` still strips on 400 if a router rejects the kwarg
479
+ # even for a model we expected to support it.
480
+ if (
481
+ self.provider in ("huggingface", "huggingface-together")
482
+ and thinking_budget is not None
483
+ and thinking_budget == 0
484
+ and _hf_model_needs_enable_thinking_off(self.model)
485
+ ):
398
486
  payload["chat_template_kwargs"] = {"enable_thinking": False}
399
487
 
400
488
  return payload
@@ -637,7 +725,13 @@ class UnifiedLLMClient:
637
725
  creativity: Temperature setting (None for default)
638
726
  thinking_budget: Controls reasoning behavior per provider:
639
727
  - Google: Token budget for extended thinking (0 to disable, >0 to enable)
640
- - OpenAI: Maps to reasoning_effort (0 → "minimal", >0 → "high")
728
+ - OpenAI: Maps to reasoning_effort. `thinking_budget=0`
729
+ picks the model's off-equivalent value from
730
+ `_OPENAI_REASONING_EFFORT_FLOORS` ("none" for gpt-5.4+,
731
+ "minimal" for older o-series / gpt-5.0-5.3). If the
732
+ picked value is rejected at runtime, the client falls
733
+ back to "low" (universally accepted) and caches the
734
+ override. `thinking_budget>0` maps to "high".
641
735
  - Anthropic: Enables extended thinking (0 to disable, >0 to enable with min 1024)
642
736
  force_json: If True and no json_schema, still request JSON output.
643
737
  Set to False for text-only responses (e.g., CoVe intermediate steps)
@@ -693,15 +787,23 @@ class UnifiedLLMClient:
693
787
  payload.pop("response_format")
694
788
  continue # Retry immediately without response_format
695
789
 
696
- # HF: some routers (notably Groq behind HF Inference
697
- # Providers, which serves Llama-3.x and gpt-oss) reject
698
- # `chat_template_kwargs` outright with
699
- # "property 'chat_template_kwargs' is unsupported".
790
+ # HF: some routers reject `chat_template_kwargs` outright.
791
+ # The wording varies per router:
792
+ # Groq: "property 'chat_template_kwargs' is unsupported"
793
+ # Fireworks: "Extra inputs are not permitted, field:
794
+ # 'chat_template_kwargs'"
700
795
  # The kwarg is only there to disable thinking on Qwen3-
701
796
  # family models when thinking_budget=0 — dropping it on
702
797
  # a router that doesn't honor it is harmless. Strip and
703
798
  # retry, mirror the response_format pattern above.
704
- if "chat_template_kwargs" in error_text and "unsupported" in error_text:
799
+ _ctk_rejected = (
800
+ "chat_template_kwargs" in error_text
801
+ and any(phrase in error_text for phrase in (
802
+ "unsupported", "not permitted", "not allowed",
803
+ "extra inputs", "extra fields", "unknown field",
804
+ ))
805
+ )
806
+ if _ctk_rejected:
705
807
  if "chat_template_kwargs" in payload:
706
808
  if not getattr(self, '_warned_no_chat_template_kwargs', False):
707
809
  print(f"\n[CatLLM] Model '{self.model}' does not accept chat_template_kwargs.")
@@ -710,6 +812,31 @@ class UnifiedLLMClient:
710
812
  payload.pop("chat_template_kwargs")
711
813
  continue # Retry immediately without chat_template_kwargs
712
814
 
815
+ # OpenAI reasoning_effort enum varies across model
816
+ # families — gpt-5.4+ deprecated "minimal" in favor of
817
+ # "none"; older models reject "none". If the model
818
+ # rejects our chosen value with 400 unsupported_value,
819
+ # fall back to "low" (universally accepted across all
820
+ # OpenAI reasoning-effort-supporting models) and cache
821
+ # the override so subsequent calls skip the doomed
822
+ # value. If "low" itself is rejected, drop reasoning_effort
823
+ # entirely.
824
+ if "reasoning_effort" in error_text and (
825
+ "unsupported" in error_text or "invalid" in error_text
826
+ ):
827
+ current = payload.get("reasoning_effort")
828
+ if current not in (None, "low"):
829
+ if not getattr(self, '_warned_reasoning_effort_fallback', False):
830
+ print(f"\n[CatLLM] Model '{self.model}' rejected reasoning_effort='{current}'.")
831
+ print(f" Falling back to 'low' and caching for subsequent calls on this client.\n")
832
+ self._warned_reasoning_effort_fallback = True
833
+ self._reasoning_effort_override = "low"
834
+ payload["reasoning_effort"] = "low"
835
+ continue
836
+ elif current == "low" and "reasoning_effort" in payload:
837
+ payload.pop("reasoning_effort")
838
+ continue
839
+
713
840
  # HuggingFace: try other routers when the current one
714
841
  # rejects the model with a "wrong router" 400.
715
842
  if self._is_hf_wrong_router_400(response.text):
@@ -39,16 +39,63 @@ def _anthropic_supports_pdf(model_name):
39
39
  return False
40
40
 
41
41
 
42
+ def _is_likely_pdf(path) -> bool:
43
+ """True if the first 1024 bytes of `path` contain the PDF magic bytes
44
+ (`%PDF-`). PyMuPDF is permissive and will happily "open" an HTML
45
+ file saved with .pdf extension, render a junk page, and let the
46
+ downstream VLM produce a "successful" classification dict from
47
+ blank content. The cheap fix is to refuse files that don't have
48
+ the canonical PDF header before they reach PyMuPDF.
49
+
50
+ Scanning 1024 bytes (rather than checking offset 0 strictly) matches
51
+ what most PDF parsers do — the spec technically allows leading bytes
52
+ before the header (e.g. MIME-wrapped PDFs).
53
+ """
54
+ try:
55
+ with open(path, "rb") as f:
56
+ head = f.read(1024)
57
+ return b"%PDF-" in head
58
+ except OSError:
59
+ return False
60
+
61
+
42
62
  def _load_pdf_files(pdf_input):
43
- """Load PDF files from directory path, single file path, or return list as-is."""
63
+ """Load PDF files from directory path, single file path, or return list as-is.
64
+
65
+ Files are validated against the PDF magic-byte header before being
66
+ returned — a single bogus path raises `ValueError`; bogus files
67
+ found during a directory glob are skipped with a warning. This
68
+ prevents PyMuPDF from silently rendering non-PDF content into
69
+ near-blank pages that then get classified as "success" downstream.
70
+ """
44
71
  import os
45
72
  import glob
46
73
 
47
74
  if isinstance(pdf_input, list):
48
- pdf_files = pdf_input
49
- print(f"Provided a list of {len(pdf_input)} PDFs.")
75
+ pdf_files = []
76
+ for path in pdf_input:
77
+ if not os.path.isfile(path):
78
+ raise FileNotFoundError(f"PDF input not found: {path}")
79
+ if not _is_likely_pdf(path):
80
+ raise ValueError(
81
+ f"File '{path}' does not have a PDF header "
82
+ f"(first 1024 bytes don't contain b'%PDF-'). "
83
+ f"PyMuPDF would happily render it as a junk page "
84
+ f"and the VLM would classify the result as 'success' — "
85
+ f"refusing instead. Check the file is a real PDF."
86
+ )
87
+ pdf_files.append(path)
88
+ print(f"Provided a list of {len(pdf_files)} PDFs.")
50
89
  elif os.path.isfile(pdf_input):
51
90
  # Single file path
91
+ if not _is_likely_pdf(pdf_input):
92
+ raise ValueError(
93
+ f"File '{pdf_input}' does not have a PDF header "
94
+ f"(first 1024 bytes don't contain b'%PDF-'). "
95
+ f"PyMuPDF would happily render it as a junk page "
96
+ f"and the VLM would classify the result as 'success' — "
97
+ f"refusing instead. Check the file is a real PDF."
98
+ )
52
99
  pdf_files = [pdf_input]
53
100
  print(f"Provided 1 PDF file.")
54
101
  elif os.path.isdir(pdf_input):
@@ -62,7 +109,19 @@ def _load_pdf_files(pdf_input):
62
109
  if f.lower() not in seen:
63
110
  seen.add(f.lower())
64
111
  unique_files.append(f)
65
- pdf_files = unique_files
112
+ # Filter out files that don't have the PDF header (e.g. a webpage
113
+ # saved with .pdf extension). Warn per skipped file so the user
114
+ # knows what didn't make it into the run.
115
+ validated = []
116
+ for f in unique_files:
117
+ if _is_likely_pdf(f):
118
+ validated.append(f)
119
+ else:
120
+ print(
121
+ f"[CatLLM] Warning: skipping '{f}' — does not have a "
122
+ f"PDF header (first 1024 bytes don't contain b'%PDF-')."
123
+ )
124
+ pdf_files = validated
66
125
  print(f"Found {len(pdf_files)} PDFs in directory.")
67
126
  else:
68
127
  raise FileNotFoundError(f"PDF input not found: {pdf_input}")
File without changes
File without changes
File without changes