PyPI - cat-stack - Versions diffs - 2.0.0b6__tar.gz → 2.1.0__tar.gz - Mend

cat-stack 2.0.0b6tar.gz → 2.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 2.0.0b6
+Version: 2.1.0
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -22,6 +22,8 @@ Requires-Python: >=3.8
 Requires-Dist: pandas
 Requires-Dist: requests
 Requires-Dist: tqdm
+Provides-Extra: agent
+Requires-Dist: cat-claws>=0.1.0; extra == 'agent'
 Provides-Extra: docx
 Requires-Dist: python-docx>=1.0.0; extra == 'docx'
 Provides-Extra: embeddings

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/pyproject.toml RENAMED Viewed

@@ -35,6 +35,7 @@ pdf = ["PyMuPDF>=1.23.0"]
 docx = ["python-docx>=1.0.0"]
 formatter = ["torch>=2.0.0", "transformers>=4.40.0", "accelerate>=0.27.0"]
 embeddings = ["sentence-transformers>=2.2.0"]
+agent = ["cat-claws>=0.1.0"]
 [project.urls]
 Documentation = "https://github.com/chrissoria/cat-stack#readme"

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "2.0.0b6"
+__version__ = "2.1.0"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_providers.py RENAMED Viewed

@@ -721,6 +721,11 @@ PROVIDER_CONFIG = {
         "auth_header": None,
         "auth_prefix": "",
     },
+    "claude-agent": {
+        "endpoint": None,  # Uses the cat-claws SDK adapter, not HTTP
+        "auth_header": None,
+        "auth_prefix": "",
+    },
 }
@@ -1195,7 +1200,7 @@ class UnifiedLLMClient:
                 except FileNotFoundError:
                     return None, (
                         "Claude CLI not found. Install it: "
-                        "https://docs.anthropic.com/en/docs/claude-code"
+                        "https://code.claude.com/docs"
                     )
             return None, "Max retries exceeded"
@@ -1206,6 +1211,55 @@ class UnifiedLLMClient:
             # contract that callers depend on.
             return None, f"Claude CLI subprocess failed: {e} (prompt may be too large for argv)"
+    def _call_claude_agent(
+        self,
+        messages: list,
+        thinking_budget: int = None,
+    ) -> tuple[str, str | None]:
+        """Route one completion through the cat-claws SDK adapter.
+        Like `_call_claude_cli`, this runs on the user's Claude subscription
+        (no API key) and returns the same (text, error) contract. cat-claws is
+        an optional dependency (the `[agent]` extra); a missing install
+        degrades to a clear install hint rather than an ImportError traceback.
+        The adapter is async. complete() is sync and may run inside ensemble
+        worker threads, so we drive one sealed call per invocation with
+        asyncio.run (a fresh loop per call) - never a shared/module-global
+        loop. Message flattening mirrors _call_claude_cli exactly.
+        """
+        try:
+            from catclaws._adapters import get_adapter
+        except ImportError:
+            return None, (
+                "cat-claws is not installed. Install it to use "
+                "model_source='claude-agent': pip install cat-stack[agent]"
+            )
+        import asyncio
+        system_parts = []
+        user_parts = []
+        for msg in messages:
+            if msg["role"] == "system":
+                system_parts.append(msg["content"])
+            elif msg["role"] in ("user", "assistant"):
+                user_parts.append(msg["content"])
+        system_prompt = "\n\n".join(system_parts) if system_parts else None
+        user_prompt = "\n\n".join(user_parts)
+        adapter = get_adapter("claude")
+        try:
+            return asyncio.run(
+                adapter.one_shot(
+                    user_prompt,
+                    system_prompt=system_prompt,
+                    model=self.model,
+                    thinking_budget=thinking_budget or 0,
+                )
+            )
+        except Exception as e:
+            return None, f"cat-claws call failed: {e}"
     def complete(
         self,
         messages: list,
@@ -1249,6 +1303,9 @@ class UnifiedLLMClient:
         if self.provider == "claude-code":
             return self._call_claude_cli(messages, max_retries=max_retries, initial_delay=initial_delay)
+        if self.provider == "claude-agent":
+            return self._call_claude_agent(messages, thinking_budget=thinking_budget)
         headers = self._get_headers()
         payload = self._build_payload(messages, json_schema, creativity, thinking_budget=thinking_budget, force_json=force_json)
@@ -1741,6 +1798,8 @@ def _detect_model_source(user_model, model_source):
     still use this name. Will be inlined in a future cleanup."""
     if model_source and model_source.lower() == "claude-code":
         return "claude-code"
+    if model_source and model_source.lower() == "claude-agent":
+        return "claude-agent"
     return detect_provider(user_model, provider=model_source)

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_utils.py RENAMED Viewed

@@ -9,6 +9,8 @@ import json
 import re
 __all__ = [
+    # Param resolution
+    "_resolve_description_context",
     # JSON utilities
     "build_json_schema",
     "validate_classification_json",
@@ -31,6 +33,42 @@ __all__ = [
 ]
+# =============================================================================
+# Param Resolution
+# =============================================================================
+def _resolve_description_context(description, survey_question, fn_name):
+    """Reconcile the canonical `description=` with its deprecated alias
+    `survey_question=` for entry points whose downstream prompt assembly
+    still keys the text-prompt "Context:" line (plus step-back and
+    categories="auto") off `survey_question`.
+    Returns the reconciled ``(description, survey_question)`` pair:
+    - only survey_question given -> DeprecationWarning; mirrored into
+      description.
+    - only description given -> mirrored into survey_question so the context
+      framing isn't silently lost (description-only callers include every
+      domain wrapper).
+    - both given -> kept distinct (e.g. cat-vader: survey_question= feed
+      question for the Context line, description= platform context).
+    """
+    import warnings
+    if survey_question:
+        warnings.warn(
+            f"`survey_question=` is deprecated in {fn_name}(); use "
+            "`description=` instead. The value will be mirrored to "
+            "`description` for now.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        if not description:
+            description = survey_question
+    elif description:
+        survey_question = description
+    return description, survey_question
 # =============================================================================
 # Label Cleaning
 # =============================================================================

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/classify.py RENAMED Viewed

@@ -41,6 +41,86 @@ from .image_functions import image_multi_class
 from .pdf_functions import pdf_multi_class
+# Minimum estimated API calls (rows x batch-capable models) before the
+# batch-mode cost tip is worth printing. Below this, the absolute savings
+# are small and the async round-trip isn't worth suggesting.
+_BATCH_NUDGE_MIN_REQUESTS = 500
+def _maybe_print_batch_nudge(
+    input_data,
+    models,
+    categories_per_call,
+    chain_of_verification,
+    embedding_tiebreaker,
+    progress_callback,
+):
+    """Print a one-line cost tip when a synchronous run qualifies for
+    batch_mode=True. Checks the same eligibility rules the batch path
+    enforces, so the tip is only shown when opting in would actually work."""
+    # Options the batch path rejects or ignores -> no tip.
+    if (
+        categories_per_call is not None
+        or chain_of_verification
+        or embedding_tiebreaker
+        or progress_callback is not None
+    ):
+        return
+    try:
+        n_rows = len(input_data)
+    except TypeError:
+        return
+    if n_rows == 0:
+        return
+    # Batch mode is text-only.
+    from .text_functions_ensemble import _detect_input_type
+    if _detect_input_type(input_data) != "text":
+        return
+    # Count models on batch-capable providers (openai/anthropic/google/
+    # mistral/xai). `models` is already normalized to a list here; provider
+    # may still be "auto"/None in the spec, so resolve it the same way
+    # prepare_model_configs will.
+    from ._batch import UNSUPPORTED_BATCH_PROVIDERS
+    from ._providers import detect_provider
+    n_capable = 0
+    for m in models:
+        name, provider = None, None
+        if isinstance(m, (list, tuple)):
+            name = m[0] if len(m) >= 1 else None
+            provider = m[1] if len(m) >= 2 else None
+        elif isinstance(m, dict):
+            name = m.get("model")
+            provider = m.get("provider")
+        elif isinstance(m, str):
+            name = m
+        if not provider or provider == "auto":
+            if not name:
+                continue
+            try:
+                provider = detect_provider(name)
+            except Exception:
+                continue
+        if provider not in UNSUPPORTED_BATCH_PROVIDERS:
+            n_capable += 1
+    est_requests = n_rows * n_capable
+    if n_capable == 0 or est_requests < _BATCH_NUDGE_MIN_REQUESTS:
+        return
+    print(
+        f"\n[CatLLM] Tip: this run (~{est_requests:,} API calls across "
+        f"{n_capable} batch-capable model(s)) qualifies for batch_mode=True.\n"
+        "  The async batch API costs ~50% less with identical prompts and\n"
+        "  results, and gets higher rate limits. The trade-off is latency:\n"
+        "  the job completes asynchronously (typically minutes to a few\n"
+        "  hours; 24h worst case). Add batch_mode=True to opt in.\n"
+    )
 def classify(
     input_data,
     categories,
@@ -168,6 +248,8 @@ def classify(
             Providers without batch API (HuggingFace, Perplexity, Ollama) fall back to
             synchronous calls and are merged in with the batch results.
             Incompatible with: PDF/image input, progress_callback.
+            Large qualifying synchronous runs (>= ~500 estimated API calls)
+            print a one-line tip suggesting batch_mode=True.
         batch_poll_interval (float): Seconds between batch job status checks. Default 30.
         batch_timeout (float): Max seconds to wait for batch completion. Default 86400 (24h).
         models (list): For multi-model mode, list of (model, provider, api_key) tuples.
@@ -355,21 +437,13 @@ def classify(
         ...     consensus_threshold="unanimous",  # or "majority", "two-thirds", or 0.75
         ... )
     """
-    # `description` is the canonical content-neutral way to describe the
-    # data; `survey_question` is a soft-deprecated alias kept working for
-    # legacy callers (cat-survey, pre-rename notebooks, the ecosystem
-    # docs). Mirror it into `description` if `description` wasn't set, so
-    # downstream prompt assembly only needs to look in one place.
-    if survey_question:
-        warnings.warn(
-            "`survey_question=` is deprecated in classify(); use "
-            "`description=` instead. The value will be mirrored to "
-            "`description` for now.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        if not description:
-            description = survey_question
+    # Reconcile the canonical `description=` with the deprecated
+    # `survey_question=` (each is mirrored into the other when only one is
+    # given — see _resolve_description_context for the full rules).
+    from ._utils import _resolve_description_context
+    description, survey_question = _resolve_description_context(
+        description, survey_question, "classify"
+    )
     # Build models list
     if models is None:
@@ -620,6 +694,28 @@ def classify(
         print("\n\n".join(_strategy_warnings))
         print()
+    # =========================================================================
+    # Batch-mode cost nudge
+    # =========================================================================
+    # One-line tip when a large synchronous run would qualify for the async
+    # batch API (~50% cheaper, higher rate limits, identical prompts and
+    # results). Fires only when batch_mode=True would actually accept this
+    # run — text input, no batch-incompatible options, at least one
+    # batch-capable provider — so the tip is never a dead end. Informational
+    # only: must never affect or abort the run.
+    if not batch_mode:
+        try:
+            _maybe_print_batch_nudge(
+                input_data=input_data,
+                models=models,
+                categories_per_call=categories_per_call,
+                chain_of_verification=chain_of_verification,
+                embedding_tiebreaker=embedding_tiebreaker,
+                progress_callback=progress_callback,
+            )
+        except Exception:
+            pass
     # =========================================================================
     # JSON formatter fallback
     # =========================================================================

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/prompt_tune.py RENAMED Viewed

@@ -186,19 +186,15 @@ def prompt_tune(
         ...     system_prompt=result["system_prompt"],
         ... )
     """
-    # `description` is the canonical content-neutral way to describe the
-    # data; `survey_question` is a soft-deprecated alias kept working for
-    # legacy callers.
-    if survey_question:
-        warnings.warn(
-            "`survey_question=` is deprecated in prompt_tune(); use "
-            "`description=` instead. The value will be mirrored to "
-            "`description` for now.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        if not description:
-            description = survey_question
+    # Reconcile the canonical `description=` with the deprecated
+    # `survey_question=` (each is mirrored into the other when only one is
+    # given — see _resolve_description_context for the full rules). Without
+    # the description->survey_question direction, description-only callers
+    # ran the whole tuning loop with no "Context:" line in the prompts.
+    from ._utils import _resolve_description_context
+    description, survey_question = _resolve_description_context(
+        description, survey_question, "prompt_tune"
+    )
     # Build models list
     if models is None:

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/text_functions.py RENAMED Viewed

@@ -410,7 +410,7 @@ def explore_corpus(
     provider = detect_provider(model, provider)
     # Validate api_key
-    if provider not in ("ollama", "claude-code") and not api_key:
+    if provider not in ("ollama", "claude-code", "claude-agent") and not api_key:
         raise ValueError(f"api_key is required for provider '{provider}'")
     print(f"Exploring categories for question: '{survey_question}'")
@@ -596,7 +596,7 @@ def explore_common_categories(
     provider = detect_provider(model, provider)
     # Validate api_key
-    if provider not in ("ollama", "claude-code") and not api_key:
+    if provider not in ("ollama", "claude-code", "claude-agent") and not api_key:
         raise ValueError(f"api_key is required for provider '{provider}'")
     # Ollama-specific checks
@@ -1062,7 +1062,7 @@ def multi_class(
     provider = detect_provider(model, provider)
     # Validate api_key requirement
-    if provider not in ("ollama", "claude-code") and not api_key:
+    if provider not in ("ollama", "claude-code", "claude-agent") and not api_key:
         raise ValueError(f"api_key is required for provider '{provider}'")
     # Handle categories="auto" - auto-detect categories from the data

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/text_functions_ensemble.py RENAMED Viewed

@@ -660,6 +660,18 @@ def prepare_model_configs(
                     "Install: https://docs.anthropic.com/en/docs/claude-code\n"
                     + "="*60
                 )
+        elif detected_provider == "claude-agent":
+            try:
+                import catclaws  # noqa: F401
+            except ImportError:
+                raise ConnectionError(
+                    "\n" + "="*60 + "\n"
+                    "  CAT-AGENT NOT INSTALLED\n"
+                    "="*60 + "\n\n"
+                    "The cat-claws package is required to use claude-agent as a provider.\n"
+                    "Install: pip install cat-stack[agent]\n"
+                    + "="*60
+                )
         else:
             # Validate API key exists for cloud providers
             if not api_key:
@@ -670,7 +682,7 @@ def prepare_model_configs(
         # Preflight probe: test the model with a minimal JSON call to catch
         # issues (model not found, structured output not supported) before
         # processing thousands of rows.
-        if detected_provider not in ("ollama", "claude-code"):
+        if detected_provider not in ("ollama", "claude-code", "claude-agent"):
             try:
                 probe_client = UnifiedLLMClient(
                     provider=detected_provider,

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/.gitignore RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/LICENSE RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/README.md RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/__init__.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_batch.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_prompts.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/_wrapper_helpers.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/collapse_themes.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/explore.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/extract.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-2.0.0b6 → cat_stack-2.1.0}/src/catstack/summarize.py RENAMED Viewed

File without changes

cat-stack 2.0.0b6__tar.gz → 2.1.0__tar.gz

cat-stack 2.0.0b6tar.gz → 2.1.0tar.gz