PyPI - cat-stack - Versions diffs - 1.0.22__tar.gz → 1.1.0__tar.gz - Mend

cat-stack 1.0.22tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{cat_stack-1.0.22 → cat_stack-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 1.0.22
+Version: 1.1.0
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "1.0.22"
+__version__ = "1.1.0"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/classify.py RENAMED Viewed

@@ -92,6 +92,7 @@ def classify(
     add_other = "prompt",
     check_verbosity: bool = True,
     json_formatter: Optional[bool] = None,
+    two_step_classify: Optional[bool] = None,
     embeddings: bool = False,
     category_descriptions: dict = None,
     embedding_tiebreaker: bool = False,
@@ -133,7 +134,11 @@ def classify(
             - "image" (default): Render pages as images
             - "text": Extract text only
             - "both": Send both image and extracted text
-        creativity (float): Temperature setting. None uses model default.
+        creativity (float): Temperature setting. None uses model default,
+            except for Ollama where it defaults to 0.0 (classification is not
+            creative generation; deterministic output reproduces across runs
+            and avoids high-entropy junk that throws off small local models).
+            Pass an explicit value to override.
         safety (bool): If True, saves progress after each item.
         chain_of_verification (bool): Enable Chain of Verification for accuracy.
         chain_of_thought (bool): Enable step-by-step reasoning. Default False.
@@ -202,6 +207,19 @@ def classify(
             produces invalid output — zero cost on the happy path. On first
             use, the model (~1GB) is downloaded from HuggingFace Hub.
             Requires: pip install cat-llm[formatter]. Default False.
+            Auto-enabled when two_step_classify is True (or when any model in
+            `models` uses the Ollama provider).
+        two_step_classify (bool): Split classification into two LLM calls:
+            (1) natural-language reasoning, then (2) JSON formatting.  More
+            reliable for weaker models — local Ollama models, but also lower-
+            tier API models (gpt-4o-mini, claude-haiku, gemini-flash) that
+            struggle to produce strict per-category JSON in a single shot.
+            When enabled, the raw step-1 reasoning is routed through the
+            fine-tuned JSON formatter (json_formatter is auto-enabled).
+            Default None: auto-enable for Ollama models, disable otherwise.
+            Set True to force it on any provider; False to disable for Ollama.
+            Per-model override is also supported via the 4-tuple options dict:
+                ("gpt-4o-mini", "openai", key, {"two_step_classify": True})
         embeddings (bool): If True, add embedding-based similarity scores
             alongside binary 0/1 classifications. Uses a local sentence-
             transformer model (BAAI/bge-small-en-v1.5, ~130MB) to compute
@@ -552,14 +570,31 @@ def classify(
                     return True
         return False
+    # Local Ollama models benefit enormously from temperature=0 on classification:
+    # in benchmarks, qwen2.5:7b accuracy jumped from 78% to 85% and produced
+    # bit-identical labels across runs (no more "{Negative: '.$/1234567890...'}"
+    # high-entropy junk).  Classification is not creative generation; the user
+    # can still override by passing creativity= explicitly.
+    if creativity is None and _uses_ollama_provider():
+        creativity = 0.0
     if json_formatter is None:
-        json_formatter = _uses_ollama_provider()
-        if json_formatter:
+        if two_step_classify is True:
+            json_formatter = True
             print(
-                "\n[CatLLM] Ollama detected — auto-enabling JSON formatter fallback\n"
-                "  (small local models more often emit malformed JSON).\n"
+                "\n[CatLLM] two_step_classify=True — auto-enabling JSON formatter\n"
+                "  (the formatter receives the step-1 reasoning text and is what\n"
+                "  makes the two-step path actually more accurate than one-shot).\n"
                 "  Pass json_formatter=False to opt out."
             )
+        else:
+            json_formatter = _uses_ollama_provider()
+            if json_formatter:
+                print(
+                    "\n[CatLLM] Ollama detected — auto-enabling JSON formatter fallback\n"
+                    "  (small local models more often emit malformed JSON).\n"
+                    "  Pass json_formatter=False to opt out."
+                )
     # The formatter MODEL is loaded lazily on the first parse failure (saves
     # ~1 GB RAM + load time when no rows actually need rescuing). The dep
@@ -832,6 +867,7 @@ def classify(
         save_directory=save_directory,
         progress_callback=progress_callback,
         formatter_state=_formatter_state,
+        two_step_classify=two_step_classify,
         multi_label=multi_label,
         categories_per_call=categories_per_call,
         embedding_tiebreaker_state=_embedding_tiebreaker_state,

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/text_functions.py RENAMED Viewed

@@ -219,7 +219,7 @@ def ollama_two_step_classify(
     survey_question: str = "",
     creativity: float = None,
     max_retries: int = 5,
-) -> tuple[str, str | None]:
+) -> tuple[str, str, str | None]:
     """
     Two-step classification for Ollama models.
@@ -239,35 +239,42 @@ def ollama_two_step_classify(
         max_retries: Number of retry attempts for JSON validation
     Returns:
-        tuple: (json_string, error_message or None)
+        tuple: (json_string, step1_raw_reply, error_message or None)
+              step1_raw_reply is the unformatted step-1 output; callers can
+              pass it to the fine-tuned formatter even when step-2 produced
+              syntactically valid (but semantically empty) JSON.
     """
     num_categories = len(categories)
     survey_context = f"Context: {survey_question}." if survey_question else ""
     # ==========================================================================
-    # Step 1: Classification (natural language - focus on accuracy)
+    # Step 1: Classification (simple list of applicable categories)
     # ==========================================================================
+    # Weak models (local Ollama, lower-tier API models) can't reliably produce
+    # per-category YES/NO output OR strict JSON in one shot.  Ask for the
+    # simplest possible output — just the names of the applicable categories,
+    # one per line — and let the fine-tuned formatter (or step 2) slot those
+    # names into the indexed JSON schema.
     step1_messages = [
         {
             "role": "system",
-            "content": "You are an expert at categorizing text responses. Focus on accurate classification."
+            "content": "You are an expert at categorizing text. You read a response and pick the categories that apply."
         },
         {
             "role": "user",
             "content": f"""{survey_context}
-Analyze this text response and determine which categories apply:
+Read this text response:
-Response: "{response_text}"
+"{response_text}"
+Decide which of these categories apply to the response:
-Categories:
 {categories_str}
-For each category, explain briefly whether it applies (YES) or not (NO) to this response.
-Format your answer as:
-1. [Category name]: YES/NO - [brief reason]
-2. [Category name]: YES/NO - [brief reason]
-...and so on for all categories."""
+Output ONLY the names of the categories that apply, one per line.
+Write nothing else — no numbering, no reasoning, no JSON, no markdown.
+If none apply, write the single word: None"""
         }
     ]
@@ -276,33 +283,48 @@ Format your answer as:
         json_schema=None,  # No JSON requirement for step 1
         creativity=creativity,
     )
+    # Preserve the original step-1 text; the retry loop below overwrites
+    # step1_reply with error context, but callers need the raw output so the
+    # fine-tuned formatter can extract the true classification signal from it
+    # even when step-2 later produces valid-but-all-zero JSON.
+    original_step1_reply = step1_reply
     if step1_error:
-        return '{"1":"e"}', f"Step 1 failed: {step1_error}"
+        return '{"1":"e"}', "", f"Step 1 failed: {step1_error}"
     # ==========================================================================
     # Step 2: JSON Formatting with validation and retry
     # ==========================================================================
     example_json = json.dumps({str(i): "0" for i in range(1, num_categories + 1)})
+    # Numbered category list for step 2 — the formatter needs to map each
+    # name in step1_reply back to its position in the original list.
+    numbered_categories = "\n".join(
+        f"{i + 1}. {c}" for i, c in enumerate(categories)
+    )
     for attempt in range(max_retries):
         step2_messages = [
             {
                 "role": "system",
-                "content": "You convert classification results to JSON. Output ONLY valid JSON, nothing else."
+                "content": "You convert a list of category names to a JSON object marking which categories were selected. Output ONLY valid JSON, nothing else."
             },
             {
                 "role": "user",
-                "content": f"""Convert this classification to JSON format.
+                "content": f"""Categories (numbered 1 to {num_categories}):
+{numbered_categories}
-Classification results:
+Selected categories (the names that were chosen — may be a subset, all, or none):
 {step1_reply}
+Output a JSON object where each key is a category number ("1" through "{num_categories}")
+and each value is "1" if that category appears in the selected list, "0" if not.
 Rules:
 - Output ONLY a JSON object, no other text
-- Use category numbers as keys (1, 2, 3, etc.)
-- Use "1" if the category was marked YES, "0" if NO
-- Include ALL {num_categories} categories
+- Include ALL {num_categories} categories as keys
+- Match by category name (allow partial / case-insensitive matches)
+- If the selected list says "None" or is empty, all values are "0"
 Example format:
 {example_json}
@@ -320,14 +342,14 @@ Your JSON output:"""
         if step2_error:
             if attempt < max_retries - 1:
                 continue
-            return '{"1":"e"}', f"Step 2 failed: {step2_error}"
+            return '{"1":"e"}', original_step1_reply, f"Step 2 failed: {step2_error}"
         # Extract and validate JSON
         extracted = extract_json(step2_reply)
         is_valid, normalized = validate_classification_json(extracted, num_categories)
         if is_valid:
-            return json.dumps(normalized), None
+            return json.dumps(normalized), original_step1_reply, None
         # If invalid, try again with more explicit instructions
         if attempt < max_retries - 1:
@@ -340,7 +362,7 @@ Please be more careful to output EXACTLY {num_categories} categories numbered 1
     # All retries exhausted - try to salvage what we can
     extracted = extract_json(step2_reply) if step2_reply else '{"1":"e"}'
-    return extracted, f"JSON validation failed after {max_retries} attempts"
+    return extracted, original_step1_reply, f"JSON validation failed after {max_retries} attempts"
 # =============================================================================

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/text_functions_ensemble.py RENAMED Viewed

@@ -575,7 +575,11 @@ def _format_creativity_suffix(creativity) -> str:
     return f"_t{int(round(creativity * 100))}"
-def prepare_model_configs(models: list, auto_download: bool = False) -> list:
+def prepare_model_configs(
+    models: list,
+    auto_download: bool = False,
+    two_step_classify: Optional[bool] = None,
+) -> list:
     """
     Validate and prepare model configurations.
@@ -583,8 +587,14 @@ def prepare_model_configs(models: list, auto_download: bool = False) -> list:
         models: List of tuples. Each tuple can be:
             - (model, provider, api_key) — 3 elements
             - (model, provider, api_key, options) — 4 elements, where options is a
-              dict with per-model overrides (e.g. {"creativity": 0.5})
+              dict with per-model overrides (e.g. {"creativity": 0.5,
+              "two_step_classify": True})
         auto_download: If True, automatically download missing Ollama models
+        two_step_classify: Global override for the two-step classify mode.
+            None (default) → auto-enable for Ollama, off for everything else.
+            True → enable for all models (useful for weaker API models that
+            also struggle with strict JSON). False → never use it.
+            Per-model overrides via the options dict take precedence.
     Returns:
         List of config dicts with validated settings
@@ -690,6 +700,18 @@ def prepare_model_configs(models: list, auto_download: bool = False) -> list:
         # Per-model creativity override (None means use global)
         per_model_creativity = options.get("creativity", None) if options else None
+        # Resolve two-step setting.  Precedence:
+        #   1. per-model option override  (options["two_step_classify"])
+        #   2. global parameter override   (two_step_classify=)
+        #   3. auto-detect: True iff provider is Ollama
+        per_model_two_step = options.get("two_step_classify", None) if options else None
+        if per_model_two_step is not None:
+            effective_two_step = bool(per_model_two_step)
+        elif two_step_classify is not None:
+            effective_two_step = bool(two_step_classify)
+        else:
+            effective_two_step = (detected_provider == "ollama")
         # Build sanitized column name
         base_name = sanitize_model_name(model)
         if is_ensemble:
@@ -699,7 +721,7 @@ def prepare_model_configs(models: list, auto_download: bool = False) -> list:
             "model": model,
             "provider": detected_provider,
             "api_key": api_key,
-            "use_two_step": (detected_provider == "ollama"),
+            "use_two_step": effective_two_step,
             "sanitized_name": base_name,
             "creativity": per_model_creativity,
         })
@@ -2274,6 +2296,8 @@ def classify_ensemble(
     auto_download: bool = False,
     # JSON formatter fallback
     formatter_state: dict = None,
+    # Two-step classify (text-first then format). None = auto-detect for Ollama.
+    two_step_classify: Optional[bool] = None,
     # Label mode
     multi_label: bool = True,
     # Chunked classification
@@ -2483,7 +2507,11 @@ def classify_ensemble(
     # Prepare model configurations
     print(f"Validating {len(models)} model configuration(s)...")
-    model_configs = prepare_model_configs(models, auto_download=auto_download)
+    model_configs = prepare_model_configs(
+        models,
+        auto_download=auto_download,
+        two_step_classify=two_step_classify,
+    )
     # Print model info
     print(f"\nModels to use:")
@@ -2934,8 +2962,8 @@ Categorize text responses {cove_categorize}:
             else:
                 response_text = item
-                if cfg["use_two_step"]:  # Ollama
-                    json_result, error = ollama_two_step_classify(
+                if cfg["use_two_step"]:  # Ollama (or two_step_classify=True)
+                    json_result, step1_raw, error = ollama_two_step_classify(
                         client=client,
                         response_text=response_text,
                         categories=categories,
@@ -2944,8 +2972,28 @@ Categorize text responses {cove_categorize}:
                         creativity=effective_creativity,
                         max_retries=max_retries,
                     )
-                    if not error:
-                        json_result = _try_formatter_fallback(json_result, json_result)
+                    # Normal path: step 2 (qwen as formatter) usually maps the
+                    # step-1 list correctly. Only fall back to the fine-tuned
+                    # formatter when step 2 returned all-zeros AND step 1 said
+                    # something non-empty — that combination signals step 2
+                    # silently lost the classification signal (the original bug).
+                    # Overriding a confident, non-zero step-2 result with the
+                    # formatter's interpretation of messy step-1 text loses
+                    # accuracy in the common case.
+                    def _is_all_zero(js):
+                        try:
+                            d = json.loads(js)
+                            return all(str(v) == "0" for v in d.values())
+                        except Exception:
+                            return False
+                    step1_meaningful = step1_raw and step1_raw.strip().lower() not in ("", "none")
+                    if step1_meaningful and _is_all_zero(json_result):
+                        fmt_result = _try_formatter_fallback('{"1":"e"}', step1_raw)
+                        if fmt_result != '{"1":"e"}':
+                            json_result = fmt_result
+                    elif error or not json_result:
+                        json_result = _try_formatter_fallback(json_result or '{"1":"e"}', step1_raw or "")
                     # CoVe not supported for Ollama two-step (already has verification)
                 else:
                     messages = build_text_classification_prompt(

{cat_stack-1.0.22 → cat_stack-1.1.0}/.gitignore RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/LICENSE RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/README.md RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/pyproject.toml RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_batch.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_prompts.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_providers.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_utils.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/all_calls.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/explore.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/extract.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/prompt_tune.py RENAMED Viewed

File without changes

{cat_stack-1.0.22 → cat_stack-1.1.0}/src/catstack/summarize.py RENAMED Viewed

File without changes

cat-stack 1.0.22__tar.gz → 1.1.0__tar.gz

cat-stack 1.0.22tar.gz → 1.1.0tar.gz