PyPI - cat-stack - Versions diffs - 2.0.0b6__tar.gz → 2.0.1__tar.gz - Mend

cat-stack 2.0.0b6tar.gz → 2.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 2.0.0b6
+Version: 2.0.1
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "2.0.0b6"
+__version__ = "2.0.1"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_utils.py RENAMED Viewed

@@ -9,6 +9,8 @@ import json
 import re
 __all__ = [
+    # Param resolution
+    "_resolve_description_context",
     # JSON utilities
     "build_json_schema",
     "validate_classification_json",
@@ -31,6 +33,42 @@ __all__ = [
 ]
+# =============================================================================
+# Param Resolution
+# =============================================================================
+def _resolve_description_context(description, survey_question, fn_name):
+    """Reconcile the canonical `description=` with its deprecated alias
+    `survey_question=` for entry points whose downstream prompt assembly
+    still keys the text-prompt "Context:" line (plus step-back and
+    categories="auto") off `survey_question`.
+    Returns the reconciled ``(description, survey_question)`` pair:
+    - only survey_question given -> DeprecationWarning; mirrored into
+      description.
+    - only description given -> mirrored into survey_question so the context
+      framing isn't silently lost (description-only callers include every
+      domain wrapper).
+    - both given -> kept distinct (e.g. cat-vader: survey_question= feed
+      question for the Context line, description= platform context).
+    """
+    import warnings
+    if survey_question:
+        warnings.warn(
+            f"`survey_question=` is deprecated in {fn_name}(); use "
+            "`description=` instead. The value will be mirrored to "
+            "`description` for now.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        if not description:
+            description = survey_question
+    elif description:
+        survey_question = description
+    return description, survey_question
 # =============================================================================
 # Label Cleaning
 # =============================================================================

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/classify.py RENAMED Viewed

@@ -41,6 +41,86 @@ from .image_functions import image_multi_class
 from .pdf_functions import pdf_multi_class
+# Minimum estimated API calls (rows x batch-capable models) before the
+# batch-mode cost tip is worth printing. Below this, the absolute savings
+# are small and the async round-trip isn't worth suggesting.
+_BATCH_NUDGE_MIN_REQUESTS = 500
+def _maybe_print_batch_nudge(
+    input_data,
+    models,
+    categories_per_call,
+    chain_of_verification,
+    embedding_tiebreaker,
+    progress_callback,
+):
+    """Print a one-line cost tip when a synchronous run qualifies for
+    batch_mode=True. Checks the same eligibility rules the batch path
+    enforces, so the tip is only shown when opting in would actually work."""
+    # Options the batch path rejects or ignores -> no tip.
+    if (
+        categories_per_call is not None
+        or chain_of_verification
+        or embedding_tiebreaker
+        or progress_callback is not None
+    ):
+        return
+    try:
+        n_rows = len(input_data)
+    except TypeError:
+        return
+    if n_rows == 0:
+        return
+    # Batch mode is text-only.
+    from .text_functions_ensemble import _detect_input_type
+    if _detect_input_type(input_data) != "text":
+        return
+    # Count models on batch-capable providers (openai/anthropic/google/
+    # mistral/xai). `models` is already normalized to a list here; provider
+    # may still be "auto"/None in the spec, so resolve it the same way
+    # prepare_model_configs will.
+    from ._batch import UNSUPPORTED_BATCH_PROVIDERS
+    from ._providers import detect_provider
+    n_capable = 0
+    for m in models:
+        name, provider = None, None
+        if isinstance(m, (list, tuple)):
+            name = m[0] if len(m) >= 1 else None
+            provider = m[1] if len(m) >= 2 else None
+        elif isinstance(m, dict):
+            name = m.get("model")
+            provider = m.get("provider")
+        elif isinstance(m, str):
+            name = m
+        if not provider or provider == "auto":
+            if not name:
+                continue
+            try:
+                provider = detect_provider(name)
+            except Exception:
+                continue
+        if provider not in UNSUPPORTED_BATCH_PROVIDERS:
+            n_capable += 1
+    est_requests = n_rows * n_capable
+    if n_capable == 0 or est_requests < _BATCH_NUDGE_MIN_REQUESTS:
+        return
+    print(
+        f"\n[CatLLM] Tip: this run (~{est_requests:,} API calls across "
+        f"{n_capable} batch-capable model(s)) qualifies for batch_mode=True.\n"
+        "  The async batch API costs ~50% less with identical prompts and\n"
+        "  results, and gets higher rate limits. The trade-off is latency:\n"
+        "  the job completes asynchronously (typically minutes to a few\n"
+        "  hours; 24h worst case). Add batch_mode=True to opt in.\n"
+    )
 def classify(
     input_data,
     categories,
@@ -168,6 +248,8 @@ def classify(
             Providers without batch API (HuggingFace, Perplexity, Ollama) fall back to
             synchronous calls and are merged in with the batch results.
             Incompatible with: PDF/image input, progress_callback.
+            Large qualifying synchronous runs (>= ~500 estimated API calls)
+            print a one-line tip suggesting batch_mode=True.
         batch_poll_interval (float): Seconds between batch job status checks. Default 30.
         batch_timeout (float): Max seconds to wait for batch completion. Default 86400 (24h).
         models (list): For multi-model mode, list of (model, provider, api_key) tuples.
@@ -355,21 +437,13 @@ def classify(
         ...     consensus_threshold="unanimous",  # or "majority", "two-thirds", or 0.75
         ... )
     """
-    # `description` is the canonical content-neutral way to describe the
-    # data; `survey_question` is a soft-deprecated alias kept working for
-    # legacy callers (cat-survey, pre-rename notebooks, the ecosystem
-    # docs). Mirror it into `description` if `description` wasn't set, so
-    # downstream prompt assembly only needs to look in one place.
-    if survey_question:
-        warnings.warn(
-            "`survey_question=` is deprecated in classify(); use "
-            "`description=` instead. The value will be mirrored to "
-            "`description` for now.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        if not description:
-            description = survey_question
+    # Reconcile the canonical `description=` with the deprecated
+    # `survey_question=` (each is mirrored into the other when only one is
+    # given — see _resolve_description_context for the full rules).
+    from ._utils import _resolve_description_context
+    description, survey_question = _resolve_description_context(
+        description, survey_question, "classify"
+    )
     # Build models list
     if models is None:
@@ -620,6 +694,28 @@ def classify(
         print("\n\n".join(_strategy_warnings))
         print()
+    # =========================================================================
+    # Batch-mode cost nudge
+    # =========================================================================
+    # One-line tip when a large synchronous run would qualify for the async
+    # batch API (~50% cheaper, higher rate limits, identical prompts and
+    # results). Fires only when batch_mode=True would actually accept this
+    # run — text input, no batch-incompatible options, at least one
+    # batch-capable provider — so the tip is never a dead end. Informational
+    # only: must never affect or abort the run.
+    if not batch_mode:
+        try:
+            _maybe_print_batch_nudge(
+                input_data=input_data,
+                models=models,
+                categories_per_call=categories_per_call,
+                chain_of_verification=chain_of_verification,
+                embedding_tiebreaker=embedding_tiebreaker,
+                progress_callback=progress_callback,
+            )
+        except Exception:
+            pass
     # =========================================================================
     # JSON formatter fallback
     # =========================================================================

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/prompt_tune.py RENAMED Viewed

@@ -186,19 +186,15 @@ def prompt_tune(
         ...     system_prompt=result["system_prompt"],
         ... )
     """
-    # `description` is the canonical content-neutral way to describe the
-    # data; `survey_question` is a soft-deprecated alias kept working for
-    # legacy callers.
-    if survey_question:
-        warnings.warn(
-            "`survey_question=` is deprecated in prompt_tune(); use "
-            "`description=` instead. The value will be mirrored to "
-            "`description` for now.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        if not description:
-            description = survey_question
+    # Reconcile the canonical `description=` with the deprecated
+    # `survey_question=` (each is mirrored into the other when only one is
+    # given — see _resolve_description_context for the full rules). Without
+    # the description->survey_question direction, description-only callers
+    # ran the whole tuning loop with no "Context:" line in the prompts.
+    from ._utils import _resolve_description_context
+    description, survey_question = _resolve_description_context(
+        description, survey_question, "prompt_tune"
+    )
     # Build models list
     if models is None:

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/.gitignore RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/LICENSE RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/README.md RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/pyproject.toml RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/__init__.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_batch.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_prompts.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_providers.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/_wrapper_helpers.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/collapse_themes.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/explore.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/extract.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/summarize.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/text_functions.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.0.1}/src/catstack/text_functions_ensemble.py RENAMED Viewed

File without changes

cat-stack 2.0.0b6__tar.gz → 2.0.1__tar.gz

cat-stack 2.0.0b6tar.gz → 2.0.1tar.gz