cat-stack 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {cat_stack-1.0.0 → cat_stack-1.0.2}/PKG-INFO +1 -1
  2. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/__about__.py +1 -1
  3. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/summarize.py +59 -2
  4. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/text_functions_ensemble.py +16 -3
  5. {cat_stack-1.0.0 → cat_stack-1.0.2}/.gitignore +0 -0
  6. {cat_stack-1.0.0 → cat_stack-1.0.2}/LICENSE +0 -0
  7. {cat_stack-1.0.0 → cat_stack-1.0.2}/README.md +0 -0
  8. {cat_stack-1.0.0 → cat_stack-1.0.2}/pyproject.toml +0 -0
  9. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/__init__.py +0 -0
  10. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_batch.py +0 -0
  11. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_category_analysis.py +0 -0
  12. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_chunked.py +0 -0
  13. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_embeddings.py +0 -0
  14. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_formatter.py +0 -0
  15. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_pilot_test.py +0 -0
  16. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_providers.py +0 -0
  17. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_review_ui.py +0 -0
  18. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_tiebreaker.py +0 -0
  19. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_utils.py +0 -0
  20. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/_web_fetch.py +0 -0
  21. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/CoVe.py +0 -0
  22. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/__init__.py +0 -0
  23. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/all_calls.py +0 -0
  24. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/image_CoVe.py +0 -0
  25. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/image_stepback.py +0 -0
  26. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/pdf_CoVe.py +0 -0
  27. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/pdf_stepback.py +0 -0
  28. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/stepback.py +0 -0
  29. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/calls/top_n.py +0 -0
  30. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/classify.py +0 -0
  31. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/explore.py +0 -0
  32. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/extract.py +0 -0
  33. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/image_functions.py +0 -0
  34. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/circle.png +0 -0
  35. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/cube.png +0 -0
  36. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/diamond.png +0 -0
  37. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/overlapping_pentagons.png +0 -0
  38. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/images/rectangles.png +0 -0
  39. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/model_reference_list.py +0 -0
  40. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/pdf_functions.py +0 -0
  41. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/prompt_tune.py +0 -0
  42. {cat_stack-1.0.0 → cat_stack-1.0.2}/src/cat_stack/text_functions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.0.0"
4
+ __version__ = "1.0.2"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -84,8 +84,13 @@ def summarize(
84
84
  - "bullets": Bullet-point list of key points
85
85
  - "one-liner": Single-sentence summary
86
86
  - "structured": Labeled sections (What, Who, Why, Impact)
87
- - "report": Comprehensive full-page report with Overview, Background,
88
- Key Provisions, Stakeholders/Impact, and Implementation sections
87
+ - "few-paragraphs": 2-4 paragraph summary with context and details
88
+ - "single-page": Single-page summary, thorough but concise
89
+ - "few-pages": Thorough multi-page summary covering all significant points
90
+ - "report": Full-page structured report with headings (Overview,
91
+ Background, Key Provisions, Stakeholders/Impact, Implementation)
92
+ - "detailed-report": Exhaustive report enumerating every provision,
93
+ with an additional Details section for exceptions and cross-references
89
94
  max_length (int): Maximum summary length in words
90
95
  focus (str): What to focus on (e.g., "main arguments", "emotional content")
91
96
  user_model (str): Model to use (default "gpt-4o")
@@ -217,6 +222,58 @@ def summarize(
217
222
  ),
218
223
  "max_length": None,
219
224
  },
225
+ "few-paragraphs": {
226
+ "instructions": (
227
+ "Write a summary of 2-4 paragraphs. The first paragraph should "
228
+ "state the main point. Subsequent paragraphs should cover key "
229
+ "details, context, and implications."
230
+ ),
231
+ "max_length": 300,
232
+ },
233
+ "single-page": {
234
+ "instructions": (
235
+ "Write a single-page summary. Cover the main points, key details, "
236
+ "and implications in a well-organized format. Use paragraph breaks "
237
+ "between topics. Be thorough but fit everything on one page."
238
+ ),
239
+ "max_length": 500,
240
+ },
241
+ "few-pages": {
242
+ "instructions": (
243
+ "Write a thorough multi-page summary. Cover all significant points "
244
+ "in detail. Use clear paragraph breaks between topics. Include "
245
+ "background context, specific provisions or arguments, affected "
246
+ "parties, and implications. Be comprehensive but well-organized."
247
+ ),
248
+ "max_length": 1500,
249
+ },
250
+ "detailed-report": {
251
+ "instructions": (
252
+ "Write an exhaustive, detailed report covering every significant "
253
+ "aspect of this document. Use clear headings and be as thorough "
254
+ "as possible — do not omit details.\n\n"
255
+ "## Overview\n"
256
+ "An executive summary (2-3 sentences).\n\n"
257
+ "## Background and Context\n"
258
+ "What is the background? What problem or situation prompted this? "
259
+ "Include relevant history and prior actions.\n\n"
260
+ "## Key Provisions\n"
261
+ "Detail ALL main provisions, requirements, or arguments. "
262
+ "Be specific about numbers, dates, names, and conditions. "
263
+ "Do not summarize — enumerate each provision.\n\n"
264
+ "## Stakeholders and Impact\n"
265
+ "Who is affected? What are the expected consequences? "
266
+ "Include both intended effects and potential concerns.\n\n"
267
+ "## Implementation\n"
268
+ "How will this be implemented? What is the timeline? "
269
+ "Are there enforcement mechanisms or milestones?\n\n"
270
+ "## Additional Details\n"
271
+ "Any other noteworthy details, exceptions, amendments, "
272
+ "or cross-references not covered above."
273
+ ),
274
+ "max_length": 3000,
275
+ },
276
+ # Keep "report" as alias for backward compat
220
277
  "report": {
221
278
  "instructions": (
222
279
  "Write a comprehensive full-page report covering the following sections. "
@@ -3232,6 +3232,7 @@ def build_output_dataframes(
3232
3232
 
3233
3233
  # Initialize data structures
3234
3234
  combined_data = {
3235
+ "input_index": [],
3235
3236
  "input_data": [],
3236
3237
  "processing_status": [],
3237
3238
  "failed_models": [],
@@ -3267,8 +3268,13 @@ def build_output_dataframes(
3267
3268
  combined_data[f"category_{i}_resolved_by"] = []
3268
3269
 
3269
3270
  # Populate data
3270
- for result in all_results:
3271
- combined_data["input_data"].append(result["response"])
3271
+ for idx, result in enumerate(all_results):
3272
+ combined_data["input_index"].append(idx)
3273
+ # Truncate input_data for readability
3274
+ raw = result["response"]
3275
+ clean = " ".join(str(raw).split()) # collapse whitespace/newlines
3276
+ preview = clean[:100] + "..." if len(clean) > 100 else clean
3277
+ combined_data["input_data"].append(preview)
3272
3278
  aggregated = result["aggregated"]
3273
3279
 
3274
3280
  # Add PDF metadata if present
@@ -4007,13 +4013,20 @@ def summarize_ensemble(
4007
4013
  if is_pdf_mode and isinstance(item, tuple) and len(item) == 3:
4008
4014
  pdf_path, page_index, page_label = item
4009
4015
  row = {
4016
+ "input_index": entry["idx"],
4010
4017
  "input_data": page_label,
4011
4018
  "pdf_path": pdf_path,
4012
4019
  "page_index": page_index,
4013
4020
  }
4014
4021
  original_text_for_synthesis = page_label # Use page label for synthesis context
4015
4022
  else:
4016
- row = {"input_data": item}
4023
+ # Truncate input_data for readability; add input_index for joining
4024
+ clean = " ".join(str(item).split()) # collapse whitespace/newlines
4025
+ preview = clean[:100] + "..." if len(clean) > 100 else clean
4026
+ row = {
4027
+ "input_index": entry["idx"],
4028
+ "input_data": preview,
4029
+ }
4017
4030
  original_text_for_synthesis = item
4018
4031
 
4019
4032
  # Extract summaries from each model
File without changes
File without changes
File without changes
File without changes