cat-stack 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_stack-1.0.0 → cat_stack-1.0.2}/PKG-INFO +1 -1
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/__about__.py +1 -1
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/summarize.py +59 -2
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/text_functions_ensemble.py +16 -3
- {cat_stack-1.0.0 → cat_stack-1.0.2}/.gitignore +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/LICENSE +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/README.md +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/pyproject.toml +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/__init__.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_batch.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_category_analysis.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_chunked.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_embeddings.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_formatter.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_pilot_test.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_providers.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_review_ui.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_tiebreaker.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_utils.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_web_fetch.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/CoVe.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/__init__.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/all_calls.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/image_CoVe.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/image_stepback.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/pdf_CoVe.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/pdf_stepback.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/stepback.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/top_n.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/classify.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/explore.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/extract.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/image_functions.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/circle.png +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/cube.png +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/diamond.png +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/overlapping_pentagons.png +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/rectangles.png +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/model_reference_list.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/pdf_functions.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/prompt_tune.py +0 -0
- {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/text_functions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-stack
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
-
__version__ = "1.0.
|
|
4
|
+
__version__ = "1.0.2"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-stack"
|
|
@@ -84,8 +84,13 @@ def summarize(
|
|
|
84
84
|
- "bullets": Bullet-point list of key points
|
|
85
85
|
- "one-liner": Single-sentence summary
|
|
86
86
|
- "structured": Labeled sections (What, Who, Why, Impact)
|
|
87
|
-
- "
|
|
88
|
-
|
|
87
|
+
- "few-paragraphs": 2-4 paragraph summary with context and details
|
|
88
|
+
- "single-page": Single-page summary, thorough but concise
|
|
89
|
+
- "few-pages": Thorough multi-page summary covering all significant points
|
|
90
|
+
- "report": Full-page structured report with headings (Overview,
|
|
91
|
+
Background, Key Provisions, Stakeholders/Impact, Implementation)
|
|
92
|
+
- "detailed-report": Exhaustive report enumerating every provision,
|
|
93
|
+
with an additional Details section for exceptions and cross-references
|
|
89
94
|
max_length (int): Maximum summary length in words
|
|
90
95
|
focus (str): What to focus on (e.g., "main arguments", "emotional content")
|
|
91
96
|
user_model (str): Model to use (default "gpt-4o")
|
|
@@ -217,6 +222,58 @@ def summarize(
|
|
|
217
222
|
),
|
|
218
223
|
"max_length": None,
|
|
219
224
|
},
|
|
225
|
+
"few-paragraphs": {
|
|
226
|
+
"instructions": (
|
|
227
|
+
"Write a summary of 2-4 paragraphs. The first paragraph should "
|
|
228
|
+
"state the main point. Subsequent paragraphs should cover key "
|
|
229
|
+
"details, context, and implications."
|
|
230
|
+
),
|
|
231
|
+
"max_length": 300,
|
|
232
|
+
},
|
|
233
|
+
"single-page": {
|
|
234
|
+
"instructions": (
|
|
235
|
+
"Write a single-page summary. Cover the main points, key details, "
|
|
236
|
+
"and implications in a well-organized format. Use paragraph breaks "
|
|
237
|
+
"between topics. Be thorough but fit everything on one page."
|
|
238
|
+
),
|
|
239
|
+
"max_length": 500,
|
|
240
|
+
},
|
|
241
|
+
"few-pages": {
|
|
242
|
+
"instructions": (
|
|
243
|
+
"Write a thorough multi-page summary. Cover all significant points "
|
|
244
|
+
"in detail. Use clear paragraph breaks between topics. Include "
|
|
245
|
+
"background context, specific provisions or arguments, affected "
|
|
246
|
+
"parties, and implications. Be comprehensive but well-organized."
|
|
247
|
+
),
|
|
248
|
+
"max_length": 1500,
|
|
249
|
+
},
|
|
250
|
+
"detailed-report": {
|
|
251
|
+
"instructions": (
|
|
252
|
+
"Write an exhaustive, detailed report covering every significant "
|
|
253
|
+
"aspect of this document. Use clear headings and be as thorough "
|
|
254
|
+
"as possible — do not omit details.\n\n"
|
|
255
|
+
"## Overview\n"
|
|
256
|
+
"An executive summary (2-3 sentences).\n\n"
|
|
257
|
+
"## Background and Context\n"
|
|
258
|
+
"What is the background? What problem or situation prompted this? "
|
|
259
|
+
"Include relevant history and prior actions.\n\n"
|
|
260
|
+
"## Key Provisions\n"
|
|
261
|
+
"Detail ALL main provisions, requirements, or arguments. "
|
|
262
|
+
"Be specific about numbers, dates, names, and conditions. "
|
|
263
|
+
"Do not summarize — enumerate each provision.\n\n"
|
|
264
|
+
"## Stakeholders and Impact\n"
|
|
265
|
+
"Who is affected? What are the expected consequences? "
|
|
266
|
+
"Include both intended effects and potential concerns.\n\n"
|
|
267
|
+
"## Implementation\n"
|
|
268
|
+
"How will this be implemented? What is the timeline? "
|
|
269
|
+
"Are there enforcement mechanisms or milestones?\n\n"
|
|
270
|
+
"## Additional Details\n"
|
|
271
|
+
"Any other noteworthy details, exceptions, amendments, "
|
|
272
|
+
"or cross-references not covered above."
|
|
273
|
+
),
|
|
274
|
+
"max_length": 3000,
|
|
275
|
+
},
|
|
276
|
+
# Keep "report" as alias for backward compat
|
|
220
277
|
"report": {
|
|
221
278
|
"instructions": (
|
|
222
279
|
"Write a comprehensive full-page report covering the following sections. "
|
|
@@ -3232,6 +3232,7 @@ def build_output_dataframes(
|
|
|
3232
3232
|
|
|
3233
3233
|
# Initialize data structures
|
|
3234
3234
|
combined_data = {
|
|
3235
|
+
"input_index": [],
|
|
3235
3236
|
"input_data": [],
|
|
3236
3237
|
"processing_status": [],
|
|
3237
3238
|
"failed_models": [],
|
|
@@ -3267,8 +3268,13 @@ def build_output_dataframes(
|
|
|
3267
3268
|
combined_data[f"category_{i}_resolved_by"] = []
|
|
3268
3269
|
|
|
3269
3270
|
# Populate data
|
|
3270
|
-
for result in all_results:
|
|
3271
|
-
combined_data["
|
|
3271
|
+
for idx, result in enumerate(all_results):
|
|
3272
|
+
combined_data["input_index"].append(idx)
|
|
3273
|
+
# Truncate input_data for readability
|
|
3274
|
+
raw = result["response"]
|
|
3275
|
+
clean = " ".join(str(raw).split()) # collapse whitespace/newlines
|
|
3276
|
+
preview = clean[:100] + "..." if len(clean) > 100 else clean
|
|
3277
|
+
combined_data["input_data"].append(preview)
|
|
3272
3278
|
aggregated = result["aggregated"]
|
|
3273
3279
|
|
|
3274
3280
|
# Add PDF metadata if present
|
|
@@ -4007,13 +4013,20 @@ def summarize_ensemble(
|
|
|
4007
4013
|
if is_pdf_mode and isinstance(item, tuple) and len(item) == 3:
|
|
4008
4014
|
pdf_path, page_index, page_label = item
|
|
4009
4015
|
row = {
|
|
4016
|
+
"input_index": entry["idx"],
|
|
4010
4017
|
"input_data": page_label,
|
|
4011
4018
|
"pdf_path": pdf_path,
|
|
4012
4019
|
"page_index": page_index,
|
|
4013
4020
|
}
|
|
4014
4021
|
original_text_for_synthesis = page_label # Use page label for synthesis context
|
|
4015
4022
|
else:
|
|
4016
|
-
|
|
4023
|
+
# Truncate input_data for readability; add input_index for joining
|
|
4024
|
+
clean = " ".join(str(item).split()) # collapse whitespace/newlines
|
|
4025
|
+
preview = clean[:100] + "..." if len(clean) > 100 else clean
|
|
4026
|
+
row = {
|
|
4027
|
+
"input_index": entry["idx"],
|
|
4028
|
+
"input_data": preview,
|
|
4029
|
+
}
|
|
4017
4030
|
original_text_for_synthesis = item
|
|
4018
4031
|
|
|
4019
4032
|
# Extract summaries from each model
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|