PyPI - cat-stack - Versions diffs - 1.2.0__tar.gz → 1.4.0__tar.gz - Mend

cat-stack 1.2.0tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{cat_stack-1.2.0 → cat_stack-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 1.2.0
+Version: 1.4.0
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "1.2.0"
+__version__ = "1.4.0"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/__init__.py RENAMED Viewed

@@ -92,6 +92,7 @@ from ._wrapper_helpers import (
     parse_models_string,
     short_label,
     classify_labels,
+    classify_indicators,
 )
 # Define public API
@@ -144,4 +145,5 @@ __all__ = [
     "parse_models_string",
     "short_label",
     "classify_labels",
+    "classify_indicators",
 ]

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_wrapper_helpers.py RENAMED Viewed

@@ -328,3 +328,101 @@ def classify_labels(
     if return_full:
         return labels_per_row, df
     return labels_per_row
+def classify_indicators(
+    input_data,
+    categories,
+    *,
+    short_labels: bool = True,
+    return_full: bool = False,
+    **kwargs,
+):
+    """Convenience wrapper around `classify()` returning per-category indicators.
+    Like `classify_labels`, but instead of collapsing the wide DataFrame to
+    one assigned label per row, it returns a dict mapping each category to
+    a list of 0/1 indicators of length `len(input_data)`.
+    This is the right shape for language wrappers that want one indicator
+    variable per category (Stata's wide mode, future R `as_indicators=TRUE`
+    mode) instead of a single label per row.
+    Args:
+        input_data: Same as `classify()`.
+        categories: Same as `classify()` — list of category strings.
+        short_labels: If True (default), use `short_label()` on each
+            category to produce dict keys (`"Positive: defn"` → `"Positive"`).
+            If False, the dict keys are the full category strings.
+        return_full: If True, return `(indicators_dict, df)` so callers also
+            have access to the underlying DataFrame.  Default False.
+        **kwargs: All other kwargs are forwarded to `classify()`.
+    Returns:
+        dict[str, list[int]]: keys are category labels (short or full),
+        values are 0/1 lists of length `len(input_data)`.  In ensemble mode
+        the indicators come from the `category_N_consensus` columns; in
+        single-model mode from `category_N`.
+        Or `(dict, df)` tuple if `return_full=True`.
+    Raises:
+        RuntimeError: if `classify()` returns a DataFrame that contains
+            neither `category_N` nor `category_N_consensus` columns
+            (centralized schema canary, same trigger as `classify_labels`).
+    Example:
+        >>> indicators = classify_indicators(
+        ...     ["I moved for the job and to be near family.",
+        ...      "Lower cost of living was the only reason."],
+        ...     ["Job: career", "Family: relationships", "Cost: affordability"],
+        ...     api_key="...", user_model="gpt-4o-mini",
+        ... )
+        >>> indicators
+        {'Job': [1, 0], 'Family': [1, 0], 'Cost': [0, 1]}
+    """
+    # Reuse classify_labels for the df + centralized schema canary.  We
+    # pass short_labels=False because we want the raw df; we apply our own
+    # short_label() to the dict keys below.
+    _labels, df = classify_labels(
+        input_data,
+        categories,
+        short_labels=False,
+        return_full=True,
+        **kwargs,
+    )
+    cols = list(df.columns)
+    indexed: List[Tuple[int, str]] = []
+    for c in cols:
+        m = _CONSENSUS_COL_PAT.match(c)
+        if m:
+            indexed.append((int(m.group(1)), c))
+    if not indexed:
+        for c in cols:
+            m = _SINGLE_COL_PAT.match(c)
+            if m:
+                indexed.append((int(m.group(1)), c))
+    # classify_labels already raised RuntimeError if neither family is
+    # present, so we know `indexed` is non-empty here.
+    indexed.sort(key=lambda t: t[0])
+    keys = [short_label(c) if short_labels else c for c in categories]
+    out: Dict[str, List[int]] = {}
+    for n, col in indexed:
+        cat_idx = n - 1
+        if not (0 <= cat_idx < len(keys)):
+            continue
+        key = str(keys[cat_idx])
+        series = df[col]
+        values: List[int] = []
+        for v in series:
+            try:
+                values.append(1 if int(v) == 1 else 0)
+            except (ValueError, TypeError):
+                values.append(0)
+        out[key] = values
+    if return_full:
+        return out, df
+    return out

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/prompt_tune.py RENAMED Viewed

@@ -749,17 +749,17 @@ def _generate_category_instruction(
     # Current instruction
     current_text = f'\nCURRENT INSTRUCTION FOR THIS CATEGORY:\n"{current_instruction}"\n' if current_instruction else ""
-    # History of previous attempts for this category so the meta-LLM doesn't repeat itself
+    # History of previous attempts — capped at last 3 to avoid prompt bloat.
+    # Format is deliberately simple (no score numbers) so smaller models can follow it.
     history_text = ""
     if attempt_history:
-        history_lines = []
-        for i, h in enumerate(attempt_history, 1):
-            history_lines.append(
-                f'  Attempt {i}: "{h["instruction"]}"'
-                f' → {h["outcome"]} (holdout score {h["score_before"]:.2f}→{h["score_after"]:.2f})'
-            )
+        recent = attempt_history[-3:]
+        history_lines = [
+            f'  - "{h["instruction"]}" [{h["outcome"]}]'
+            for h in recent
+        ]
         history_text = (
-            "\nPREVIOUS ATTEMPTS FOR THIS CATEGORY — do not repeat these:\n"
+            "\nPREVIOUS INSTRUCTIONS TRIED FOR THIS CATEGORY (already tested — write something different):\n"
             + "\n".join(history_lines)
             + "\n"
         )

{cat_stack-1.2.0 → cat_stack-1.4.0}/.gitignore RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/LICENSE RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/README.md RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/pyproject.toml RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_batch.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_prompts.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_providers.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_utils.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/all_calls.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/classify.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/explore.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/extract.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/summarize.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/text_functions.py RENAMED Viewed

File without changes

{cat_stack-1.2.0 → cat_stack-1.4.0}/src/catstack/text_functions_ensemble.py RENAMED Viewed

File without changes

cat-stack 1.2.0__tar.gz → 1.4.0__tar.gz

cat-stack 1.2.0tar.gz → 1.4.0tar.gz