PyPI - cat-stack - Versions diffs - 1.4.0__tar.gz → 1.4.1__tar.gz - Mend

cat-stack 1.4.0tar.gz → 1.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{cat_stack-1.4.0 → cat_stack-1.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 1.4.0
+Version: 1.4.1
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "1.4.0"
+__version__ = "1.4.1"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/classify.py RENAMED Viewed

@@ -84,7 +84,8 @@ def classify(
     parallel: bool = None,
     fail_strategy: str = "partial",
     max_retries: int = 5,
-    batch_retries: int = 2,
+    batch_retries: int = 1,
+    json_retries: int = 2,
     retry_delay: float = 1.0,
     row_delay: float = 0.0,
     pdf_dpi: int = 150,
@@ -183,7 +184,9 @@ def classify(
             (e.g., Ollama on limited hardware) or debugging.
         fail_strategy (str): How to handle failures - "partial" (default) or "strict".
         max_retries (int): Max retries per API call. Default 5.
-        batch_retries (int): Max retries for batch-level failures. Default 2.
+        batch_retries (int): Max retries for batch-level failures. Default 1.
+            Note: composes multiplicatively with json_retries — a row can hit
+            the LLM up to (1 + json_retries) * (1 + batch_retries) times.
         retry_delay (float): Delay between retries in seconds. Default 1.0.
         row_delay (float): Delay in seconds between processing each row. Useful
             when multiple models share the same API provider/key to avoid rate
@@ -407,6 +410,7 @@ def classify(
             fail_strategy=fail_strategy,
             max_retries=max_retries,
             batch_retries=batch_retries,
+            json_retries=json_retries,
             retry_delay=retry_delay,
             row_delay=row_delay,
             auto_download=auto_download,
@@ -849,6 +853,7 @@ def classify(
         fail_strategy=fail_strategy,
         max_retries=max_retries,
         batch_retries=batch_retries,
+        json_retries=json_retries,
         retry_delay=retry_delay,
         row_delay=row_delay,
         auto_download=auto_download,

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/summarize.py RENAMED Viewed

@@ -55,7 +55,7 @@ def summarize(
     # Robustness parameters
     safety: bool = False,
     max_retries: int = 5,
-    batch_retries: int = 2,
+    batch_retries: int = 1,
     retry_delay: float = 1.0,
     row_delay: float = 0.0,
     fail_strategy: str = "partial",
@@ -131,7 +131,7 @@ def summarize(
         auto_download (bool): Auto-download missing Ollama models. Default False.
         safety (bool): If True, saves progress after each item. Requires filename.
         max_retries (int): Max retries per API call. Default 5.
-        batch_retries (int): Max retries for batch-level failures. Default 2.
+        batch_retries (int): Max retries for batch-level failures. Default 1.
         retry_delay (float): Delay between retries in seconds. Default 1.0.
         row_delay (float): Delay in seconds between processing each row. Default 0.0.
         fail_strategy (str): How to handle failures - "partial" (default) or "strict".

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/text_functions_ensemble.py RENAMED Viewed

@@ -2277,7 +2277,8 @@ def classify_ensemble(
     fail_strategy: str = "partial",
     safety: bool = False,
     max_retries: int = 5,
-    batch_retries: int = 2,
+    batch_retries: int = 1,
+    json_retries: int = 2,
     retry_delay: float = 1.0,
     row_delay: float = 0.0,
     filename: str = None,
@@ -2368,8 +2369,10 @@ def classify_ensemble(
         max_retries: Maximum retry attempts for each API call (handles rate limits,
             server errors, timeouts). Default 5.
         batch_retries: Maximum retry passes for failed (row, model) pairs after
-            the batch completes. Default 2 means up to 3 total attempts. Set to 0
-            to disable batch-level retries.
+            the batch completes. Default 1 means up to 2 total attempts. Set to 0
+            to disable batch-level retries. Note: composes multiplicatively with
+            json_retries — a row can hit the LLM up to
+            (1 + json_retries) * (1 + batch_retries) times.
         retry_delay: Seconds to wait between batch retry passes.
         # Output parameters:
@@ -3009,35 +3012,59 @@ Categorize text responses {cove_categorize}:
                         multi_label=multi_label,
                         system_prompt=system_prompt,
                     )
-                    reply, error = client.complete(
-                        messages=messages,
-                        json_schema=json_schemas[cfg["model"]],
-                        creativity=effective_creativity,
-                        thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
-                        max_retries=max_retries,
-                    )
-                    if error:
-                        json_result = '{"1":"e"}'
-                    else:
+                    json_result = '{"1":"e"}'
+                    error = None
+                    _n_cats = len(categories)
+                    for _json_attempt in range(json_retries + 1):
+                        # On retries, nudge the model toward plain JSON output
+                        if _json_attempt > 0:
+                            _nudge = "\n\nRespond with ONLY valid JSON, no explanation or additional text."
+                            _last = messages[-1]
+                            _content = _last.get("content", "")
+                            _retry_messages = messages[:-1] + [{**_last, "content": _content + _nudge}]
+                        else:
+                            _retry_messages = messages
+                        reply, error = client.complete(
+                            messages=_retry_messages,
+                            json_schema=json_schemas[cfg["model"]],
+                            creativity=effective_creativity,
+                            thinking_budget=thinking_budget if cfg["provider"] in ("google", "openai", "anthropic", "huggingface", "huggingface-together") else None,
+                            max_retries=max_retries,
+                        )
+                        if error:
+                            json_result = '{"1":"e"}'
+                            break  # API-level failure already retried by max_retries
                         json_result = extract_json(reply)
-                        json_result = _try_formatter_fallback(json_result, reply)
+                        _json_valid, _ = validate_classification_json(json_result, _n_cats)
-                        # Run Chain of Verification if enabled
-                        if chain_of_verification and not error:
-                            step2, step3, step4 = build_cove_prompts(
-                                cove_original_task, response_text
-                            )
-                            json_result = run_chain_of_verification(
-                                client=client,
-                                initial_reply=json_result,
-                                step2_prompt=step2,
-                                step3_prompt=step3,
-                                step4_prompt=step4,
-                                json_schema=json_schemas[cfg["model"]],
-                                creativity=effective_creativity,
-                                max_retries=max_retries,
-                            )
-                            json_result = _try_formatter_fallback(json_result, json_result)
+                        if _json_valid:
+                            break
+                        # Final attempt: invoke formatter before giving up
+                        if _json_attempt == json_retries:
+                            json_result = _try_formatter_fallback(json_result, reply)
+                    # Run Chain of Verification if enabled
+                    if chain_of_verification and not error:
+                        step2, step3, step4 = build_cove_prompts(
+                            cove_original_task, response_text
+                        )
+                        json_result = run_chain_of_verification(
+                            client=client,
+                            initial_reply=json_result,
+                            step2_prompt=step2,
+                            step3_prompt=step3,
+                            step4_prompt=step4,
+                            json_schema=json_schemas[cfg["model"]],
+                            creativity=effective_creativity,
+                            max_retries=max_retries,
+                        )
+                        json_result = _try_formatter_fallback(json_result, json_result)
             return (cfg["sanitized_name"], json_result, error)
@@ -3760,7 +3787,7 @@ def summarize_ensemble(
     context_prompt: bool = False,
     step_back_prompt: bool = False,
     max_retries: int = 5,
-    batch_retries: int = 2,
+    batch_retries: int = 1,
     retry_delay: float = 1.0,
     row_delay: float = 0.0,
     fail_strategy: str = "partial",
@@ -3806,7 +3833,7 @@ def summarize_ensemble(
         context_prompt: Add expert context prefix
         step_back_prompt: Enable step-back prompting
         max_retries: Max retries per API call
-        batch_retries: Number of batch retry passes for failed items
+        batch_retries: Number of batch retry passes for failed items (default 1)
         retry_delay: Delay between retries in seconds
         row_delay: Delay in seconds between processing each row (default 0.0)
         fail_strategy: How to handle failures - "partial" (default) or "strict"

{cat_stack-1.4.0 → cat_stack-1.4.1}/.gitignore RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/LICENSE RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/README.md RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/pyproject.toml RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_batch.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_prompts.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_providers.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_utils.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/_wrapper_helpers.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/all_calls.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/explore.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/extract.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/prompt_tune.py RENAMED Viewed

File without changes

{cat_stack-1.4.0 → cat_stack-1.4.1}/src/catstack/text_functions.py RENAMED Viewed

File without changes

cat-stack 1.4.0__tar.gz → 1.4.1__tar.gz

cat-stack 1.4.0tar.gz → 1.4.1tar.gz