PyPI - cat-stack - Versions diffs - 1.1.1__tar.gz → 1.3.0__tar.gz - Mend

cat-stack 1.1.1tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{cat_stack-1.1.1 → cat_stack-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 1.1.1
+Version: 1.3.0
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "1.1.1"
+__version__ = "1.3.0"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/__init__.py RENAMED Viewed

@@ -83,6 +83,17 @@ from .image_functions import (
     image_features,
 )
+# =============================================================================
+# Wrapper helpers (for thin language bindings: Stata, future Julia/CLI)
+# =============================================================================
+from ._wrapper_helpers import (
+    get_backend,
+    parse_kwargs_string,
+    parse_models_string,
+    short_label,
+    classify_labels,
+)
 # Define public API
 __all__ = [
     # Batch mode exceptions
@@ -127,4 +138,10 @@ __all__ = [
     "build_json_schema",
     "extract_json",
     "validate_classification_json",
+    # Wrapper helpers (for thin language bindings)
+    "get_backend",
+    "parse_kwargs_string",
+    "parse_models_string",
+    "short_label",
+    "classify_labels",
 ]

cat_stack-1.3.0/src/catstack/_wrapper_helpers.py ADDED Viewed

@@ -0,0 +1,330 @@
+"""
+Convenience helpers for thin language wrappers (Stata, future Julia/CLI).
+These functions exist so each language wrapper does not have to re-implement
+the same string-parsing and output-shaping logic.  They are thin layers
+over the main `classify()` / `extract()` / `explore()` / `summarize()` API
+— same kwargs, same behavior — plus a few parsers for the string formats
+that wrappers tend to accept from their host languages.
+R users typically pass native lists / tuples and do not need the string
+parsers, but `classify_labels()` is useful for getting one label per row
+without manually walking the DataFrame.
+These helpers are intentionally side-effect free and import-safe: nothing
+here imports a domain sub-package (cat-pol, cat-vader, etc.) until the user
+calls `get_backend("pol")`, so importing `catstack` does not require any
+domain package to be installed.
+"""
+from __future__ import annotations
+import ast
+import importlib
+import re
+from typing import Any, Dict, List, Optional, Tuple, Union
+# -----------------------------------------------------------------------------
+# Domain → module resolution
+# -----------------------------------------------------------------------------
+# Maps the user-facing short domain name to (python import name, pip package).
+# Note: import names and pip names differ for the historical cat-vader,
+# cat-ademic, and cat-web packages, which omit the underscore in their module
+# name.  This dict is the single source of truth across the ecosystem.
+_DOMAIN_PACKAGES: Dict[str, Tuple[str, str]] = {
+    "pol":    ("cat_pol",   "cat-pol"),
+    "vader":  ("catvader",  "cat-vader"),
+    "ademic": ("catademic", "cat-ademic"),
+    "survey": ("cat_survey", "cat-survey"),
+    "cog":    ("cat_cog",   "cat-cog"),
+    "web":    ("catweb",    "cat-web"),
+}
+def get_backend(domain: Optional[str] = None):
+    """Return the Python module to call for a given domain shortform.
+    Empty string or None returns the base `catstack` module.  Known domain
+    names ("pol", "vader", "ademic", "survey", "cog", "web") return their
+    respective sub-package module.
+    Raises:
+        ValueError: if `domain` is set but not in the known list.
+        ImportError: if the domain package is not installed.  The error
+            message tells the user the exact `catllm setup, domain(X)`
+            command to fix it.
+    Example:
+        >>> get_backend("").__name__
+        'catstack'
+        >>> get_backend(None).__name__
+        'catstack'
+        >>> # get_backend("pol") returns the cat_pol module if installed
+    """
+    if not domain or not str(domain).strip():
+        import catstack  # local import to avoid bootstrap cycles
+        return catstack
+    key = str(domain).strip().lower()
+    if key not in _DOMAIN_PACKAGES:
+        valid = ", ".join(_DOMAIN_PACKAGES.keys())
+        raise ValueError(
+            f"Unknown domain: {domain!r}. Valid: {valid}."
+        )
+    module_name, pip_name = _DOMAIN_PACKAGES[key]
+    try:
+        return importlib.import_module(module_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Domain package '{pip_name}' is not installed. "
+            f"Run: catllm setup, domain({key})"
+        ) from e
+# -----------------------------------------------------------------------------
+# String parsers (for wrappers whose host language passes options as strings)
+# -----------------------------------------------------------------------------
+def _strip_surrounding_quotes(s: str) -> str:
+    """Strip one balanced pair of surrounding ' or " — Stata `string asis`
+    artifact.  Leaves inner quotes untouched."""
+    s = s.strip()
+    if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"):
+        return s[1:-1]
+    return s
+def parse_kwargs_string(s: Optional[str]) -> Dict[str, Any]:
+    """Parse a `"key=val, key=val"` string into a Python kwargs dict.
+    Each value is run through `ast.literal_eval` so numbers, booleans,
+    strings, and lists all work naturally.  Values that don't parse fall
+    back to the raw string.
+    Commas inside quotes / brackets are respected (no naive split).
+    Returns an empty dict for empty / None input.
+    Example:
+        >>> parse_kwargs_string("max_retries=3, retry_delay=0.5")
+        {'max_retries': 3, 'retry_delay': 0.5}
+        >>> parse_kwargs_string("format='bullets', research_question='Why did you move?'")
+        {'format': 'bullets', 'research_question': 'Why did you move?'}
+    """
+    if not s:
+        return {}
+    s = _strip_surrounding_quotes(str(s))
+    if not s.strip():
+        return {}
+    # Walk character-by-character to split on commas at the top level only
+    # (not inside quotes or brackets).
+    pieces: List[str] = []
+    buf: List[str] = []
+    depth = 0
+    quote_char: Optional[str] = None
+    for ch in s:
+        if quote_char:
+            buf.append(ch)
+            if ch == quote_char:
+                quote_char = None
+        elif ch in ('"', "'"):
+            quote_char = ch
+            buf.append(ch)
+        elif ch in "([{":
+            depth += 1
+            buf.append(ch)
+        elif ch in ")]}":
+            depth -= 1
+            buf.append(ch)
+        elif ch == "," and depth == 0:
+            pieces.append("".join(buf))
+            buf = []
+        else:
+            buf.append(ch)
+    if buf:
+        pieces.append("".join(buf))
+    kwargs: Dict[str, Any] = {}
+    for p in pieces:
+        if "=" not in p:
+            continue
+        k, _, v = p.partition("=")
+        k = k.strip()
+        v = v.strip()
+        if not k:
+            continue
+        try:
+            kwargs[k] = ast.literal_eval(v)
+        except (ValueError, SyntaxError):
+            kwargs[k] = v
+    return kwargs
+def parse_models_string(
+    s: Optional[str],
+    default_api_key: Optional[str] = None,
+) -> Optional[List[Tuple[str, ...]]]:
+    """Parse `"model provider key; model provider key"` into a list of tuples.
+    Each entry is whitespace-split into 3 fields.  Two-field entries inherit
+    `default_api_key` for the third position (useful when the same API key
+    powers multiple cloud models in an ensemble).
+    Returns None for empty / None input so callers can do `if models: ...`.
+    Example:
+        >>> parse_models_string("gpt-4o openai sk-...; claude-haiku-4-5 anthropic sk-ant-...")
+        [('gpt-4o', 'openai', 'sk-...'), ('claude-haiku-4-5', 'anthropic', 'sk-ant-...')]
+        >>> parse_models_string("qwen2.5:7b ollama _")
+        [('qwen2.5:7b', 'ollama', '_')]
+    """
+    if not s or not str(s).strip():
+        return None
+    s = _strip_surrounding_quotes(str(s))
+    if not s.strip():
+        return None
+    out: List[Tuple[str, ...]] = []
+    for entry in s.split(";"):
+        parts = entry.strip().split()
+        if len(parts) >= 3:
+            out.append(tuple(parts[:3]))
+        elif len(parts) == 2 and default_api_key is not None:
+            out.append((parts[0], parts[1], default_api_key))
+        # 1-token or empty entries are silently dropped — they're malformed
+    return out or None
+# -----------------------------------------------------------------------------
+# Output shaping
+# -----------------------------------------------------------------------------
+def short_label(s: Any) -> Any:
+    """Return the short label from a "Label: definition..." string.
+    Verbose category labels improve classification accuracy but are awkward
+    to display in a single output cell.  `short_label("Positive: The
+    respondent expresses approval.")` returns `"Positive"`.
+    No-colon strings, empty strings, and non-string values are returned
+    unchanged.
+    """
+    if isinstance(s, str) and ":" in s:
+        head = s.split(":", 1)[0].strip()
+        if head:
+            return head
+    return s
+# Patterns used by classify_labels to find the per-category output columns.
+_CONSENSUS_COL_PAT = re.compile(r"^category_(\d+)_consensus$")
+_SINGLE_COL_PAT = re.compile(r"^category_(\d+)$")
+def classify_labels(
+    input_data,
+    categories,
+    *,
+    short_labels: bool = True,
+    multi_label_sep: str = "; ",
+    return_full: bool = False,
+    **kwargs,
+):
+    """Convenience wrapper around `classify()` returning one label per row.
+    The standard `classify()` returns a wide DataFrame with `category_1`,
+    `category_2`, ... (or `category_1_consensus`, ... in ensemble mode)
+    indicator columns.  `classify_labels()` collapses that to a `list[str]`
+    of length `len(input_data)`, where each entry is the assigned category
+    name (joined by `multi_label_sep` if more than one category applies).
+    This is the function thin language wrappers should call when the host
+    language wants one labeled column per row (Stata, simple CLI tools).
+    Args:
+        input_data: List of texts, paths, or otherwise — same as `classify()`.
+        categories: List of category names — same as `classify()`.
+        short_labels: If True (default), apply `short_label()` to each
+            assigned category — so `"Positive: definition..."` becomes
+            `"Positive"` in the output.  Pass False to keep the full text.
+        multi_label_sep: Separator used to join multiple matched categories
+            for a row.  Default `"; "`.  Has no effect when only one
+            category matches per row (the common case).
+        return_full: If True, return `(labels, df)` so callers also have
+            access to the underlying DataFrame.  Default False.
+        **kwargs: All other kwargs are forwarded to `classify()`.
+    Returns:
+        list[str] of length `len(input_data)`, or `(labels, df)` tuple if
+        `return_full=True`.
+    Raises:
+        RuntimeError: if `classify()` returns a DataFrame that contains
+            neither `category_N` nor `category_N_consensus` columns —
+            indicates that cat-stack's output schema has changed
+            incompatibly.
+    Example:
+        >>> labels = classify_labels(
+        ...     ["Great service", "Awful experience"],
+        ...     ["Positive: approval", "Negative: criticism"],
+        ...     api_key="...", user_model="gpt-4o-mini",
+        ... )
+        >>> labels
+        ['Positive', 'Negative']
+    """
+    # Local import — `classify` lives in catstack.classify, but importing it
+    # at module load time would create a circular import (classify.py
+    # imports from this package indirectly).
+    from .classify import classify
+    df = classify(input_data=input_data, categories=categories, **kwargs)
+    cols = list(df.columns)
+    # Ensemble path first (more specific suffix)
+    indexed: List[Tuple[int, str]] = []
+    for c in cols:
+        m = _CONSENSUS_COL_PAT.match(c)
+        if m:
+            indexed.append((int(m.group(1)), c))
+    if not indexed:
+        for c in cols:
+            m = _SINGLE_COL_PAT.match(c)
+            if m:
+                indexed.append((int(m.group(1)), c))
+    if not indexed:
+        raise RuntimeError(
+            "classify() returned no category_N or category_N_consensus "
+            "columns. The output schema may have changed; this version of "
+            "classify_labels cannot map the result back to user-provided "
+            "category names. Got columns: " + ", ".join(cols)
+        )
+    indexed.sort(key=lambda t: t[0])
+    # Pre-shorten the category list once if requested.
+    if short_labels:
+        display_cats = [short_label(c) for c in categories]
+    else:
+        display_cats = list(categories)
+    labels_per_row: List[str] = []
+    for _, row in df.iterrows():
+        matched: List[str] = []
+        for n, col in indexed:
+            try:
+                if int(row[col]) == 1:
+                    cat_idx = n - 1
+                    if 0 <= cat_idx < len(display_cats):
+                        matched.append(str(display_cats[cat_idx]))
+            except (ValueError, TypeError, KeyError):
+                continue
+        labels_per_row.append(multi_label_sep.join(matched))
+    if return_full:
+        return labels_per_row, df
+    return labels_per_row

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/prompt_tune.py RENAMED Viewed

@@ -18,7 +18,7 @@ Categories are never modified — only the system prompt changes.
 from typing import Union
 from ._category_analysis import has_other_category
-from ._pilot_test import collect_corrections
+from ._pilot_test import collect_corrections, compute_metrics
 from .text_functions_ensemble import classify_ensemble
 from ._providers import UnifiedLLMClient, detect_provider
@@ -290,16 +290,15 @@ def prompt_tune(
     corrections = result["corrections"]
     metrics = result["metrics"]
     total_flips = result["total_flips"]
-    baseline_target = _target_fn(metrics)
+    sample_indices = result["sample_indices"]
     # Save ground truth from user corrections for auto-scoring later iterations
-    sample_indices = result["sample_indices"]
     ground_truth = {
         i: c["corrected"] for i, c in zip(sample_indices, corrections)
     }
-    # Per-category metrics from baseline
     per_cat = _compute_per_category_metrics(corrections, categories)
+    baseline_target = _target_fn(metrics)
     # Print baseline summary
     _print_classification_summary("Baseline", metrics, per_cat, categories, total_flips)
@@ -334,6 +333,8 @@ def prompt_tune(
             print(f"  Category {cat_idx}/{len(cats_with_errors)}: {target_cat} ({cat_errors} errors)")
             print(f"  Up to {max_iterations} iteration(s)")
+            attempt_history = []
+            prev_score = baseline_target
             prev_instruction = cat_instructions.get(target_cat, "")
             for attempt in range(1, max_iterations + 1):
@@ -353,6 +354,7 @@ def prompt_tune(
                     meta_source=meta_source,
                     meta_key=meta_key,
                     max_retries=max_retries,
+                    attempt_history=attempt_history,
                 )
                 if not instruction:
@@ -379,7 +381,6 @@ def prompt_tune(
                 if result is None:
                     print("\n[CatLLM] Re-classification failed.")
-                    # Revert this category
                     if prev_instruction:
                         cat_instructions[target_cat] = prev_instruction
                     else:
@@ -395,6 +396,21 @@ def prompt_tune(
                 new_cat_errors = per_cat[target_cat]["fp"] + per_cat[target_cat]["fn"]
+                # Classify outcome
+                if target_score > prev_score + 0.001:
+                    outcome = "improved"
+                elif target_score < prev_score - 0.001:
+                    outcome = "regressed"
+                else:
+                    outcome = "no_change"
+                attempt_history.append({
+                    "instruction": instruction,
+                    "outcome": outcome,
+                    "score_before": prev_score,
+                    "score_after": target_score,
+                })
                 _print_classification_summary(
                     f"{target_cat} attempt {attempt}", metrics, per_cat, categories, total_flips,
                 )
@@ -412,21 +428,18 @@ def prompt_tune(
                     best_target = target_score
                     best_prompt = current_prompt
-                # Check improvement for this category
-                if new_cat_errors < cat_errors:
-                    print(f"    Improved: {target_cat} errors {cat_errors} -> {new_cat_errors}")
+                if outcome == "improved":
+                    print(f"    Improved: {target_cat} errors {cat_errors} → {new_cat_errors}")
+                    prev_score = target_score
                     prev_instruction = instruction
                     cat_errors = new_cat_errors
                     if new_cat_errors == 0:
                         print(f"    {target_cat}: all errors fixed!")
                         break
-                    # Continue trying if there are remaining errors and attempts left
-                elif new_cat_errors == cat_errors:
+                elif outcome == "no_change":
                     print(f"    No change for {target_cat} ({cat_errors} errors)")
-                    # Instruction didn't help — try again with a different one
                 else:
-                    print(f"    Regressed: {target_cat} errors {cat_errors} -> {new_cat_errors}")
-                    # Revert this attempt
+                    print(f"    Regressed: {target_cat} errors {cat_errors} → {new_cat_errors} — reverting")
                     if prev_instruction:
                         cat_instructions[target_cat] = prev_instruction
                     else:
@@ -654,6 +667,7 @@ def _generate_category_instruction(
     meta_source,
     meta_key,
     max_retries,
+    attempt_history=None,
 ):
     """
     Generate a targeted instruction for one category, given full error context.
@@ -735,6 +749,21 @@ def _generate_category_instruction(
     # Current instruction
     current_text = f'\nCURRENT INSTRUCTION FOR THIS CATEGORY:\n"{current_instruction}"\n' if current_instruction else ""
+    # History of previous attempts — capped at last 3 to avoid prompt bloat.
+    # Format is deliberately simple (no score numbers) so smaller models can follow it.
+    history_text = ""
+    if attempt_history:
+        recent = attempt_history[-3:]
+        history_lines = [
+            f'  - "{h["instruction"]}" [{h["outcome"]}]'
+            for h in recent
+        ]
+        history_text = (
+            "\nPREVIOUS INSTRUCTIONS TRIED FOR THIS CATEGORY (already tested — write something different):\n"
+            + "\n".join(history_lines)
+            + "\n"
+        )
     optimize_guidance = {
         "balanced": "",
         "precision": " Focus especially on reducing false positives.",
@@ -753,7 +782,7 @@ ALL ERRORS ACROSS ALL CATEGORIES (<<< marks errors involving your target):
 {all_error_lines and chr(10).join(all_error_lines) or "(no errors)"}
 {target_section}
-{current_text}
+{current_text}{history_text}
 Write a 1-2 sentence instruction for the category "{target_category}" that tells
 a classifier when to assign and when NOT to assign it. Use the full error context
 above to understand how this category relates to others, but only output guidance

{cat_stack-1.1.1 → cat_stack-1.3.0}/.gitignore RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/LICENSE RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/README.md RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/pyproject.toml RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_batch.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_prompts.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_providers.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_utils.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/all_calls.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/classify.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/explore.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/extract.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/summarize.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/text_functions.py RENAMED Viewed

File without changes

{cat_stack-1.1.1 → cat_stack-1.3.0}/src/catstack/text_functions_ensemble.py RENAMED Viewed

File without changes

cat-stack 1.1.1__tar.gz → 1.3.0__tar.gz

cat-stack 1.1.1tar.gz → 1.3.0tar.gz