cat-stack 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-0.3.0 → cat_stack-0.4.0}/PKG-INFO +1 -1
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/__about__.py +1 -1
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/classify.py +46 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/.gitignore +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/LICENSE +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/README.md +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/pyproject.toml +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/__init__.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_batch.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_category_analysis.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_chunked.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_embeddings.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_formatter.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_pilot_test.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_providers.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_review_ui.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_tiebreaker.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_utils.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_web_fetch.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/CoVe.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/__init__.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/all_calls.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/image_CoVe.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/image_stepback.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/pdf_CoVe.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/pdf_stepback.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/stepback.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/top_n.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/explore.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/extract.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/image_functions.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/circle.png +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/cube.png +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/diamond.png +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/overlapping_pentagons.png +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/rectangles.png +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/model_reference_list.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/pdf_functions.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/prompt_tune.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/summarize.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/text_functions.py +0 -0
- {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/text_functions_ensemble.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "0.
|
|
4
|
+
__version__ = "0.4.0"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -100,6 +100,10 @@ def classify(
|
|
|
100
100
|
categories_per_call: int = None,
|
|
101
101
|
pilot_test: Union[bool, int] = False,
|
|
102
102
|
system_prompt: str = "",
|
|
103
|
+
prompt_tune: Union[bool, int] = False,
|
|
104
|
+
tune_iterations: int = 3,
|
|
105
|
+
tune_ui: str = "browser",
|
|
106
|
+
tune_optimize: str = "balanced",
|
|
103
107
|
):
|
|
104
108
|
"""
|
|
105
109
|
Unified classification function for text, image, and PDF inputs.
|
|
@@ -242,6 +246,17 @@ def classify(
|
|
|
242
246
|
classification prompt. Use prompt_tune() to generate an optimized
|
|
243
247
|
instruction from labeled examples. Takes precedence over
|
|
244
248
|
context_prompt when provided. Default "".
|
|
249
|
+
prompt_tune (bool or int): Run automatic prompt optimization before the
|
|
250
|
+
full classification. Classifies a small sample, opens a browser UI
|
|
251
|
+
for corrections, then generates an optimized system_prompt.
|
|
252
|
+
- False (default): Skip prompt tuning.
|
|
253
|
+
- True: Tune on 10 random items.
|
|
254
|
+
- int: Tune on that many random items.
|
|
255
|
+
Overrides system_prompt if provided.
|
|
256
|
+
tune_iterations (int): Max optimization attempts per category. Default 3.
|
|
257
|
+
tune_ui (str): Review UI for prompt tuning — "browser" or "terminal".
|
|
258
|
+
tune_optimize (str): Metric to optimize — "balanced", "precision",
|
|
259
|
+
or "sensitivity". Default "balanced".
|
|
245
260
|
|
|
246
261
|
Returns:
|
|
247
262
|
pd.DataFrame: Results with classification columns.
|
|
@@ -400,6 +415,37 @@ def classify(
|
|
|
400
415
|
if pilot_result is None or not pilot_result["proceed"]:
|
|
401
416
|
return None
|
|
402
417
|
|
|
418
|
+
# =========================================================================
|
|
419
|
+
# Prompt tuning — optimize system_prompt before full classification
|
|
420
|
+
# =========================================================================
|
|
421
|
+
if prompt_tune and categories and categories != "auto":
|
|
422
|
+
from .prompt_tune import prompt_tune as _prompt_tune
|
|
423
|
+
|
|
424
|
+
tune_sample_size = prompt_tune if isinstance(prompt_tune, int) else 10
|
|
425
|
+
|
|
426
|
+
tune_result = _prompt_tune(
|
|
427
|
+
input_data=input_data,
|
|
428
|
+
categories=categories,
|
|
429
|
+
models=models,
|
|
430
|
+
description=description,
|
|
431
|
+
survey_question=survey_question,
|
|
432
|
+
sample_size=tune_sample_size,
|
|
433
|
+
max_iterations=tune_iterations,
|
|
434
|
+
multi_label=multi_label,
|
|
435
|
+
creativity=creativity,
|
|
436
|
+
use_json_schema=use_json_schema,
|
|
437
|
+
consensus_threshold=consensus_threshold,
|
|
438
|
+
max_retries=max_retries,
|
|
439
|
+
input_mode=input_mode,
|
|
440
|
+
ui=tune_ui,
|
|
441
|
+
optimize=tune_optimize,
|
|
442
|
+
add_other=False, # already handled above
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
if tune_result["system_prompt"]:
|
|
446
|
+
system_prompt = tune_result["system_prompt"]
|
|
447
|
+
print(f"\n[CatLLM] Using optimized prompt from prompt_tune.\n")
|
|
448
|
+
|
|
403
449
|
# =========================================================================
|
|
404
450
|
# Validate categories_per_call
|
|
405
451
|
# =========================================================================
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|