PyPI - cat-stack - Versions diffs - 1.6.0__tar.gz → 1.6.2__tar.gz - Mend

cat-stack 1.6.0tar.gz → 1.6.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{cat_stack-1.6.0 → cat_stack-1.6.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 1.6.0
+Version: 1.6.2
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -177,7 +177,11 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
 - **Multi-model ensemble** with consensus voting and agreement scores
 - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
 - **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
-- **Text, image, and PDF** input auto-detection
+- **Text, image, and PDF** input auto-detection (PDF inputs are
+  validated against the `%PDF-` magic-byte header before reaching
+  PyMuPDF, so a webpage saved with `.pdf` extension surfaces a clear
+  `ValueError` instead of silently classifying a blank rendered page
+  as `success`)
 - **Embedding similarity** tiebreaker for ensemble consensus ties
 - **Pilot test** — validate classifications on a small sample before committing to the full run

{cat_stack-1.6.0 → cat_stack-1.6.2}/README.md RENAMED Viewed

@@ -141,7 +141,11 @@ All providers use the same `(model_name, provider, api_key)` tuple format. Provi
 - **Multi-model ensemble** with consensus voting and agreement scores
 - **Batch API support** for OpenAI, Anthropic, Google, Mistral, and xAI
 - **Prompt strategies**: Chain-of-Thought, Chain-of-Verification, step-back prompting, few-shot examples
-- **Text, image, and PDF** input auto-detection
+- **Text, image, and PDF** input auto-detection (PDF inputs are
+  validated against the `%PDF-` magic-byte header before reaching
+  PyMuPDF, so a webpage saved with `.pdf` extension surfaces a clear
+  `ValueError` instead of silently classifying a blank rendered page
+  as `success`)
 - **Embedding similarity** tiebreaker for ensemble consensus ties
 - **Pilot test** — validate classifications on a small sample before committing to the full run

{cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "1.6.0"
+__version__ = "1.6.2"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/_providers.py RENAMED Viewed

@@ -19,6 +19,66 @@ import requests
 # short enough that batch ensembles don't stall for half an hour."
 _MAX_TOTAL_WAIT_SECONDS = 300.0
+# ---------------------------------------------------------------------------
+# OpenAI reasoning_effort: per-model-family off-equivalent value.
+# ---------------------------------------------------------------------------
+#
+# Different OpenAI model generations expose different `reasoning_effort`
+# enum values. The "off" value (what `thinking_budget=0` maps to) is not
+# stable across families:
+#
+#   o1 / o3 / o4, gpt-5.0..gpt-5.3   → "minimal" (older floor)
+#   gpt-5.4 / gpt-5.5 / gpt-5.6      → "none"    (new strict-off; "minimal" deprecated)
+#
+# A model sent the wrong floor returns a 400 `unsupported_value`. The
+# table below is consulted in `_openai_reasoning_effort_floor()` to pick
+# the right value up-front. For unknown future families,
+# `UnifiedLLMClient.complete()` catches the 400 and falls back to "low"
+# (universally accepted across all reasoning_effort-supporting models).
+#
+# Entries are matched longest-prefix-first so "gpt-5.4" matches before
+# "gpt-5" — keep that invariant when extending.
+_OPENAI_REASONING_EFFORT_FLOORS = (
+    ("gpt-5.4", "none"),
+    ("gpt-5.5", "none"),
+    ("gpt-5.6", "none"),
+    ("gpt-5",   "minimal"),  # covers 5.0, 5.1, 5.2, 5.3
+    ("o1",      "minimal"),
+    ("o3",      "minimal"),
+    ("o4",      "minimal"),
+)
+def _openai_reasoning_effort_floor(model: str) -> str:
+    """Return the off-equivalent reasoning_effort value for a reasoning-
+    capable OpenAI model, based on its name prefix. Defaults to "minimal"
+    for models not covered by the table — the safest historical value."""
+    for prefix, floor in _OPENAI_REASONING_EFFORT_FLOORS:
+        if model.startswith(prefix):
+            return floor
+    return "minimal"
+# ---------------------------------------------------------------------------
+# HuggingFace `chat_template_kwargs={"enable_thinking": False}` is the knob
+# to suppress Qwen3-family `<think>` blocks. Other model families don't
+# expose an `enable_thinking` template variable, and strict HF backends
+# (Fireworks, Groq) reject the unknown field with 400 — forcing a wasted
+# retry. Restrict injection to families that actually honor the flag.
+#
+# The runtime fallback in `complete()` (strip-on-400) stays as a safety
+# net for unexpected cases — e.g. if a Qwen variant lands on a router
+# whose validator doesn't accept the field.
+# ---------------------------------------------------------------------------
+_HF_NEEDS_ENABLE_THINKING_OFF = (
+    "Qwen/Qwen3",   # covers Qwen3, Qwen3.5, Qwen3.6, …
+)
+def _hf_model_needs_enable_thinking_off(model: str) -> bool:
+    return any(model.startswith(p) for p in _HF_NEEDS_ENABLE_THINKING_OFF)
 __all__ = [
     # Main client
     "UnifiedLLMClient",
@@ -350,8 +410,18 @@ class UnifiedLLMClient:
         Args:
             force_json: If False and no json_schema, don't set response_format (for text responses)
-            thinking_budget: For OpenAI models, maps to reasoning_effort:
-                             0 or None → "minimal", >0 → "high"
+            thinking_budget: For OpenAI reasoning-capable models, maps to
+                             reasoning_effort. `thinking_budget=0` picks the
+                             provider's off-equivalent value from
+                             `_OPENAI_REASONING_EFFORT_FLOORS`
+                             ("none" for gpt-5.4+, "minimal" for o-series
+                             and gpt-5.0-5.3). `thinking_budget>0` maps to
+                             "high". If the chosen value is rejected at
+                             runtime with 400 `unsupported_value`,
+                             `complete()` retries with "low" (universally
+                             accepted) and caches the override on the
+                             client so subsequent calls skip the bad
+                             value.
         """
         payload = {
             "model": self.model,
@@ -388,13 +458,31 @@ class UnifiedLLMClient:
                 if thinking_budget > 0:
                     payload["reasoning_effort"] = "high"
                 else:
-                    payload["reasoning_effort"] = "minimal"
+                    # Off-equivalent value depends on the model family —
+                    # see `_OPENAI_REASONING_EFFORT_FLOORS`. A previously-
+                    # discovered fallback (from a 400 retry in complete())
+                    # wins if cached on the client.
+                    payload["reasoning_effort"] = (
+                        getattr(self, "_reasoning_effort_override", None)
+                        or _openai_reasoning_effort_floor(self.model)
+                    )
         elif creativity is not None:
             payload["temperature"] = creativity
-        # HuggingFace: disable thinking for models that reason by default (e.g., Qwen3)
-        # when thinking_budget is explicitly set to 0
-        if self.provider in ("huggingface", "huggingface-together") and thinking_budget is not None and thinking_budget == 0:
+        # HuggingFace: disable thinking on model families whose chat
+        # template honors `enable_thinking` (Qwen3-family). Other HF-routed
+        # models don't need the kwarg, and strict-validator backends
+        # (Fireworks, Groq) reject the unknown field outright — sending it
+        # to a non-Qwen model just buys a wasted retry. See
+        # `_hf_model_needs_enable_thinking_off()`. The runtime fallback in
+        # `complete()` still strips on 400 if a router rejects the kwarg
+        # even for a model we expected to support it.
+        if (
+            self.provider in ("huggingface", "huggingface-together")
+            and thinking_budget is not None
+            and thinking_budget == 0
+            and _hf_model_needs_enable_thinking_off(self.model)
+        ):
             payload["chat_template_kwargs"] = {"enable_thinking": False}
         return payload
@@ -637,7 +725,13 @@ class UnifiedLLMClient:
             creativity: Temperature setting (None for default)
             thinking_budget: Controls reasoning behavior per provider:
                 - Google: Token budget for extended thinking (0 to disable, >0 to enable)
-                - OpenAI: Maps to reasoning_effort (0 → "minimal", >0 → "high")
+                - OpenAI: Maps to reasoning_effort. `thinking_budget=0`
+                  picks the model's off-equivalent value from
+                  `_OPENAI_REASONING_EFFORT_FLOORS` ("none" for gpt-5.4+,
+                  "minimal" for older o-series / gpt-5.0-5.3). If the
+                  picked value is rejected at runtime, the client falls
+                  back to "low" (universally accepted) and caches the
+                  override. `thinking_budget>0` maps to "high".
                 - Anthropic: Enables extended thinking (0 to disable, >0 to enable with min 1024)
             force_json: If True and no json_schema, still request JSON output.
                        Set to False for text-only responses (e.g., CoVe intermediate steps)
@@ -693,15 +787,23 @@ class UnifiedLLMClient:
                             payload.pop("response_format")
                             continue  # Retry immediately without response_format
-                    # HF: some routers (notably Groq behind HF Inference
-                    # Providers, which serves Llama-3.x and gpt-oss) reject
-                    # `chat_template_kwargs` outright with
-                    #   "property 'chat_template_kwargs' is unsupported".
+                    # HF: some routers reject `chat_template_kwargs` outright.
+                    # The wording varies per router:
+                    #   Groq:      "property 'chat_template_kwargs' is unsupported"
+                    #   Fireworks: "Extra inputs are not permitted, field:
+                    #               'chat_template_kwargs'"
                     # The kwarg is only there to disable thinking on Qwen3-
                     # family models when thinking_budget=0 — dropping it on
                     # a router that doesn't honor it is harmless. Strip and
                     # retry, mirror the response_format pattern above.
-                    if "chat_template_kwargs" in error_text and "unsupported" in error_text:
+                    _ctk_rejected = (
+                        "chat_template_kwargs" in error_text
+                        and any(phrase in error_text for phrase in (
+                            "unsupported", "not permitted", "not allowed",
+                            "extra inputs", "extra fields", "unknown field",
+                        ))
+                    )
+                    if _ctk_rejected:
                         if "chat_template_kwargs" in payload:
                             if not getattr(self, '_warned_no_chat_template_kwargs', False):
                                 print(f"\n[CatLLM] Model '{self.model}' does not accept chat_template_kwargs.")
@@ -710,6 +812,31 @@ class UnifiedLLMClient:
                             payload.pop("chat_template_kwargs")
                             continue  # Retry immediately without chat_template_kwargs
+                    # OpenAI reasoning_effort enum varies across model
+                    # families — gpt-5.4+ deprecated "minimal" in favor of
+                    # "none"; older models reject "none". If the model
+                    # rejects our chosen value with 400 unsupported_value,
+                    # fall back to "low" (universally accepted across all
+                    # OpenAI reasoning-effort-supporting models) and cache
+                    # the override so subsequent calls skip the doomed
+                    # value. If "low" itself is rejected, drop reasoning_effort
+                    # entirely.
+                    if "reasoning_effort" in error_text and (
+                        "unsupported" in error_text or "invalid" in error_text
+                    ):
+                        current = payload.get("reasoning_effort")
+                        if current not in (None, "low"):
+                            if not getattr(self, '_warned_reasoning_effort_fallback', False):
+                                print(f"\n[CatLLM] Model '{self.model}' rejected reasoning_effort='{current}'.")
+                                print(f"  Falling back to 'low' and caching for subsequent calls on this client.\n")
+                                self._warned_reasoning_effort_fallback = True
+                            self._reasoning_effort_override = "low"
+                            payload["reasoning_effort"] = "low"
+                            continue
+                        elif current == "low" and "reasoning_effort" in payload:
+                            payload.pop("reasoning_effort")
+                            continue
                     # HuggingFace: try other routers when the current one
                     # rejects the model with a "wrong router" 400.
                     if self._is_hf_wrong_router_400(response.text):

{cat_stack-1.6.0 → cat_stack-1.6.2}/src/catstack/pdf_functions.py RENAMED Viewed

@@ -39,16 +39,63 @@ def _anthropic_supports_pdf(model_name):
     return False
+def _is_likely_pdf(path) -> bool:
+    """True if the first 1024 bytes of `path` contain the PDF magic bytes
+    (`%PDF-`). PyMuPDF is permissive and will happily "open" an HTML
+    file saved with .pdf extension, render a junk page, and let the
+    downstream VLM produce a "successful" classification dict from
+    blank content. The cheap fix is to refuse files that don't have
+    the canonical PDF header before they reach PyMuPDF.
+    Scanning 1024 bytes (rather than checking offset 0 strictly) matches
+    what most PDF parsers do — the spec technically allows leading bytes
+    before the header (e.g. MIME-wrapped PDFs).
+    """
+    try:
+        with open(path, "rb") as f:
+            head = f.read(1024)
+        return b"%PDF-" in head
+    except OSError:
+        return False
 def _load_pdf_files(pdf_input):
-    """Load PDF files from directory path, single file path, or return list as-is."""
+    """Load PDF files from directory path, single file path, or return list as-is.
+    Files are validated against the PDF magic-byte header before being
+    returned — a single bogus path raises `ValueError`; bogus files
+    found during a directory glob are skipped with a warning. This
+    prevents PyMuPDF from silently rendering non-PDF content into
+    near-blank pages that then get classified as "success" downstream.
+    """
     import os
     import glob
     if isinstance(pdf_input, list):
-        pdf_files = pdf_input
-        print(f"Provided a list of {len(pdf_input)} PDFs.")
+        pdf_files = []
+        for path in pdf_input:
+            if not os.path.isfile(path):
+                raise FileNotFoundError(f"PDF input not found: {path}")
+            if not _is_likely_pdf(path):
+                raise ValueError(
+                    f"File '{path}' does not have a PDF header "
+                    f"(first 1024 bytes don't contain b'%PDF-'). "
+                    f"PyMuPDF would happily render it as a junk page "
+                    f"and the VLM would classify the result as 'success' — "
+                    f"refusing instead. Check the file is a real PDF."
+                )
+            pdf_files.append(path)
+        print(f"Provided a list of {len(pdf_files)} PDFs.")
     elif os.path.isfile(pdf_input):
         # Single file path
+        if not _is_likely_pdf(pdf_input):
+            raise ValueError(
+                f"File '{pdf_input}' does not have a PDF header "
+                f"(first 1024 bytes don't contain b'%PDF-'). "
+                f"PyMuPDF would happily render it as a junk page "
+                f"and the VLM would classify the result as 'success' — "
+                f"refusing instead. Check the file is a real PDF."
+            )
         pdf_files = [pdf_input]
         print(f"Provided 1 PDF file.")
     elif os.path.isdir(pdf_input):
@@ -62,7 +109,19 @@ def _load_pdf_files(pdf_input):
             if f.lower() not in seen:
                 seen.add(f.lower())
                 unique_files.append(f)
-        pdf_files = unique_files
+        # Filter out files that don't have the PDF header (e.g. a webpage
+        # saved with .pdf extension). Warn per skipped file so the user
+        # knows what didn't make it into the run.
+        validated = []
+        for f in unique_files:
+            if _is_likely_pdf(f):
+                validated.append(f)
+            else:
+                print(
+                    f"[CatLLM] Warning: skipping '{f}' — does not have a "
+                    f"PDF header (first 1024 bytes don't contain b'%PDF-')."
+                )
+        pdf_files = validated
         print(f"Found {len(pdf_files)} PDFs in directory.")
     else:
         raise FileNotFoundError(f"PDF input not found: {pdf_input}")