PyPI - cat-stack - Versions diffs - 1.0.7__tar.gz → 1.0.10__tar.gz - Mend

cat-stack 1.0.7tar.gz → 1.0.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{cat_stack-1.0.7 → cat_stack-1.0.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-stack
-Version: 1.0.7
+Version: 1.0.10
 Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
 Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -19,10 +19,8 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Requires-Python: >=3.8
-Requires-Dist: anthropic
-Requires-Dist: openai
 Requires-Dist: pandas
-Requires-Dist: perplexityai
+Requires-Dist: regex
 Requires-Dist: requests
 Requires-Dist: tqdm
 Provides-Extra: docx

{cat_stack-1.0.7 → cat_stack-1.0.10}/pyproject.toml RENAMED Viewed

@@ -28,9 +28,7 @@ dependencies = [
   "pandas",
   "tqdm",
   "requests",
-  "openai",
-  "anthropic",
-  "perplexityai"
+  "regex",
 ]
 [project.optional-dependencies]

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/__about__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "1.0.7"
+__version__ = "1.0.10"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-stack"

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_providers.py RENAMED Viewed

@@ -187,18 +187,13 @@ class UnifiedLLMClient:
         self.provider = provider.lower()
         self.api_key = api_key
-        # Strip router suffix from model name and detect endpoint
-        clean_model, router = _parse_hf_model_suffix(model)
-        self.model = clean_model if self.provider == "huggingface" else model
+        # Keep full model name with router suffix — the generic HF router
+        # uses the suffix (e.g. :novita, :together) for routing.
+        self.model = model
-        # Auto-detect HuggingFace endpoint
+        # Auto-detect HuggingFace endpoint (but always use generic router)
         if self.provider == "huggingface":
-            detected_url = _detect_huggingface_endpoint(api_key, model)
-            if "together" in detected_url:
-                self.provider = "huggingface-together"
-            elif router and router in _HF_ROUTER_ENDPOINTS:
-                # Use the router-specific endpoint as a custom provider config
-                self._custom_endpoint = _HF_ROUTER_ENDPOINTS[router] + "/chat/completions"
+            _detect_huggingface_endpoint(api_key, model)
         if self.provider not in PROVIDER_CONFIG:
             raise ValueError(f"Unsupported provider: {provider}. "

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_utils.py RENAMED Viewed

@@ -92,17 +92,37 @@ def validate_classification_json(json_str: str, num_categories: int) -> tuple[bo
         if not isinstance(parsed, dict):
             return False, None
+        # Build a mapping from numeric prefix to value, handling keys like
+        # "1", "1.", "1. Category name", etc.
+        numeric_map = {}
+        for key, val in parsed.items():
+            # Extract leading number from key
+            stripped = str(key).strip()
+            num_part = ""
+            for ch in stripped:
+                if ch.isdigit():
+                    num_part += ch
+                else:
+                    break
+            if num_part:
+                numeric_map[num_part] = val
         # Check that all expected keys are present and values are "0" or "1"
         for i in range(1, num_categories + 1):
             key = str(i)
-            if key not in parsed:
+            if key not in parsed and key not in numeric_map:
                 return False, None
-            val = str(parsed[key]).strip()
+            raw_val = parsed.get(key, numeric_map.get(key))
+            val = str(raw_val).strip()
             if val not in ("0", "1"):
                 return False, None
-        # Normalize values to strings
-        normalized = {str(i): str(parsed[str(i)]).strip() for i in range(1, num_categories + 1)}
+        # Normalize values to strings, preferring exact key match then numeric prefix
+        normalized = {}
+        for i in range(1, num_categories + 1):
+            key = str(i)
+            raw_val = parsed.get(key, numeric_map.get(key))
+            normalized[key] = str(raw_val).strip()
         return True, normalized
     except (json.JSONDecodeError, KeyError, TypeError):

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/summarize.py RENAMED Viewed

@@ -91,6 +91,8 @@ def summarize(
               Background, Key Provisions, Stakeholders/Impact, Implementation)
             - "detailed-report": Exhaustive report enumerating every provision,
               with an additional Details section for exceptions and cross-references
+            - "threads": Social media post for Threads (strict 500-character limit),
+              punchy and engaging, plain language, no hashtags/emojis
             - "alt-text": Factual visual description for blind/visually impaired
               users — no interpretation, only what is literally shown
         max_length (int): Maximum summary length in words
@@ -288,6 +290,22 @@ def summarize(
             ),
             "max_length": None,
         },
+        "threads": {
+            "instructions": (
+                "Write a social media post summarizing this content for Threads. "
+                "STRICT LIMIT: The entire output must be under 400 characters including spaces. "
+                "Structure:\n"
+                "- First line: A single standalone sentence that gives the high-level takeaway. "
+                "Start with who is acting and what they did, "
+                "e.g., 'The Senate just introduced a bill that...', 'House Republicans passed a measure to...', "
+                "'Congress is moving to...'. This sentence must make sense completely on its own.\n"
+                "- Then leave a blank line (two newlines).\n"
+                "- Then 2-3 short sentences with key supporting details — what it does, who it affects, why it matters.\n\n"
+                "No hashtags, no emojis, no bullet points — just clean, "
+                "compelling text. Use plain language."
+            ),
+            "max_length": 80,  # ~500 chars at ~6 chars/word
+        },
         # Keep "report" as alias for backward compat
         "report": {
             "instructions": (

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/text_functions.py RENAMED Viewed

@@ -173,17 +173,36 @@ def validate_classification_json(json_str: str, num_categories: int) -> tuple[bo
         if not isinstance(parsed, dict):
             return False, None
+        # Build a mapping from numeric prefix to value, handling keys like
+        # "1", "1.", "1. Category name", etc.
+        numeric_map = {}
+        for key, val in parsed.items():
+            stripped = str(key).strip()
+            num_part = ""
+            for ch in stripped:
+                if ch.isdigit():
+                    num_part += ch
+                else:
+                    break
+            if num_part:
+                numeric_map[num_part] = val
         # Check that all expected keys are present and values are "0" or "1"
         for i in range(1, num_categories + 1):
             key = str(i)
-            if key not in parsed:
+            if key not in parsed and key not in numeric_map:
                 return False, None
-            val = str(parsed[key]).strip()
+            raw_val = parsed.get(key, numeric_map.get(key))
+            val = str(raw_val).strip()
             if val not in ("0", "1"):
                 return False, None
-        # Normalize values to strings
-        normalized = {str(i): str(parsed[str(i)]).strip() for i in range(1, num_categories + 1)}
+        # Normalize values to strings, preferring exact key match then numeric prefix
+        normalized = {}
+        for i in range(1, num_categories + 1):
+            key = str(i)
+            raw_val = parsed.get(key, numeric_map.get(key))
+            normalized[key] = str(raw_val).strip()
         return True, normalized
     except (json.JSONDecodeError, KeyError, TypeError):

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/text_functions_ensemble.py RENAMED Viewed

@@ -688,6 +688,31 @@ def prepare_model_configs(models: list, auto_download: bool = False) -> list:
     return configs
+def _normalize_json_keys(parsed: dict, expected_keys: set) -> dict:
+    """Normalize JSON keys by extracting leading numeric prefix.
+    Handles keys like "1. Category name" -> "1", which some HuggingFace
+    models produce when using json_object mode (no strict schema enforcement).
+    """
+    normalized = {}
+    for k, v in parsed.items():
+        stripped = str(k).strip()
+        # Extract leading digits
+        num_part = ""
+        for ch in stripped:
+            if ch.isdigit():
+                num_part += ch
+            else:
+                break
+        norm_key = num_part if num_part else stripped
+        # Prefer exact match; only use normalized key if exact not already present
+        if stripped in expected_keys:
+            normalized[stripped] = v
+        elif norm_key in expected_keys and norm_key not in normalized:
+            normalized[norm_key] = v
+    return normalized
 def aggregate_results(
     model_results: dict,
     categories: list,
@@ -725,18 +750,19 @@ def aggregate_results(
         else:
             try:
                 parsed = json.loads(json_str)
+                normalized_parsed = _normalize_json_keys(parsed, expected_keys)
                 # Accept if at least one key is a valid numbered category
                 # with a 0/1 value. Models may only return present categories
                 # (e.g. {"3": "1"}) — missing keys default to 0 downstream.
-                # Strip out any keys with invalid values so they also
-                # default to 0 cleanly instead of hitting error paths.
                 valid_count = sum(
-                    1 for k, v in parsed.items()
+                    1 for k, v in normalized_parsed.items()
                     if k in expected_keys and str(v).strip() in ("0", "1")
                 )
                 if valid_count > 0:
                     cleaned = {
-                        k: str(v).strip() for k, v in parsed.items()
+                        k: str(v).strip() for k, v in normalized_parsed.items()
                         if k in expected_keys and str(v).strip() in ("0", "1")
                     }
                     successful[model_name] = cleaned
@@ -3213,9 +3239,9 @@ Categorize text responses {cove_categorize}:
                         # Check JSON parsing AND schema validation
                         try:
                             parsed = json.loads(json_str)
-                            # At least one valid numbered key with 0/1 value
+                            normalized = _normalize_json_keys(parsed, expected_keys)
                             valid_count = sum(
-                                1 for k, v in parsed.items()
+                                1 for k, v in normalized.items()
                                 if k in expected_keys and str(v).strip() in ("0", "1")
                             )
                             if valid_count == 0:
@@ -3252,8 +3278,9 @@ Categorize text responses {cove_categorize}:
                             # Verify JSON is valid and has correct schema
                             try:
                                 parsed = json.loads(json_result)
+                                normalized = _normalize_json_keys(parsed, expected_keys)
                                 valid_count = sum(
-                                    1 for k, v in parsed.items()
+                                    1 for k, v in normalized.items()
                                     if k in expected_keys and str(v).strip() in ("0", "1")
                                 )
                                 if valid_count > 0:
@@ -3267,8 +3294,9 @@ Categorize text responses {cove_categorize}:
                     if error is None:
                         try:
                             parsed = json.loads(json_result)
+                            normalized = _normalize_json_keys(parsed, expected_keys)
                             valid_count = sum(
-                                1 for k, v in parsed.items()
+                                1 for k, v in normalized.items()
                                 if k in expected_keys and str(v).strip() in ("0", "1")
                             )
                             if valid_count > 0:

{cat_stack-1.0.7 → cat_stack-1.0.10}/.gitignore RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/LICENSE RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/README.md RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_batch.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_category_analysis.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_chunked.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_embeddings.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_formatter.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_pilot_test.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_review_ui.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_tiebreaker.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/_web_fetch.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/__init__.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/all_calls.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/image_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/image_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/pdf_CoVe.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/pdf_stepback.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/stepback.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/calls/top_n.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/classify.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/explore.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/extract.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/image_functions.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/images/circle.png RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/images/cube.png RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/images/diamond.png RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/images/overlapping_pentagons.png RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/images/rectangles.png RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/model_reference_list.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/pdf_functions.py RENAMED Viewed

File without changes

{cat_stack-1.0.7 → cat_stack-1.0.10}/src/cat_stack/prompt_tune.py RENAMED Viewed

File without changes

cat-stack 1.0.7__tar.gz → 1.0.10__tar.gz

cat-stack 1.0.7tar.gz → 1.0.10tar.gz