cat-stack 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {cat_stack-0.3.0 → cat_stack-0.4.0}/PKG-INFO +1 -1
  2. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/__about__.py +1 -1
  3. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/classify.py +46 -0
  4. {cat_stack-0.3.0 → cat_stack-0.4.0}/.gitignore +0 -0
  5. {cat_stack-0.3.0 → cat_stack-0.4.0}/LICENSE +0 -0
  6. {cat_stack-0.3.0 → cat_stack-0.4.0}/README.md +0 -0
  7. {cat_stack-0.3.0 → cat_stack-0.4.0}/pyproject.toml +0 -0
  8. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/__init__.py +0 -0
  9. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_batch.py +0 -0
  10. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_category_analysis.py +0 -0
  11. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_chunked.py +0 -0
  12. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_embeddings.py +0 -0
  13. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_formatter.py +0 -0
  14. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_pilot_test.py +0 -0
  15. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_providers.py +0 -0
  16. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_review_ui.py +0 -0
  17. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_tiebreaker.py +0 -0
  18. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_utils.py +0 -0
  19. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/_web_fetch.py +0 -0
  20. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/CoVe.py +0 -0
  21. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/__init__.py +0 -0
  22. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/all_calls.py +0 -0
  23. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/image_CoVe.py +0 -0
  24. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/image_stepback.py +0 -0
  25. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/pdf_CoVe.py +0 -0
  26. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/pdf_stepback.py +0 -0
  27. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/stepback.py +0 -0
  28. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/calls/top_n.py +0 -0
  29. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/explore.py +0 -0
  30. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/extract.py +0 -0
  31. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/image_functions.py +0 -0
  32. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/circle.png +0 -0
  33. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/cube.png +0 -0
  34. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/diamond.png +0 -0
  35. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/overlapping_pentagons.png +0 -0
  36. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/images/rectangles.png +0 -0
  37. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/model_reference_list.py +0 -0
  38. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/pdf_functions.py +0 -0
  39. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/prompt_tune.py +0 -0
  40. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/summarize.py +0 -0
  41. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/text_functions.py +0 -0
  42. {cat_stack-0.3.0 → cat_stack-0.4.0}/src/cat_stack/text_functions_ensemble.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-stack
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Domain-agnostic text, image, PDF, and DOCX classification engine powered by LLMs
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-stack#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-stack/issues
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "0.3.0"
4
+ __version__ = "0.4.0"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-stack"
@@ -100,6 +100,10 @@ def classify(
100
100
  categories_per_call: int = None,
101
101
  pilot_test: Union[bool, int] = False,
102
102
  system_prompt: str = "",
103
+ prompt_tune: Union[bool, int] = False,
104
+ tune_iterations: int = 3,
105
+ tune_ui: str = "browser",
106
+ tune_optimize: str = "balanced",
103
107
  ):
104
108
  """
105
109
  Unified classification function for text, image, and PDF inputs.
@@ -242,6 +246,17 @@ def classify(
242
246
  classification prompt. Use prompt_tune() to generate an optimized
243
247
  instruction from labeled examples. Takes precedence over
244
248
  context_prompt when provided. Default "".
249
+ prompt_tune (bool or int): Run automatic prompt optimization before the
250
+ full classification. Classifies a small sample, opens a browser UI
251
+ for corrections, then generates an optimized system_prompt.
252
+ - False (default): Skip prompt tuning.
253
+ - True: Tune on 10 random items.
254
+ - int: Tune on that many random items.
255
+ Overrides system_prompt if provided.
256
+ tune_iterations (int): Max optimization attempts per category. Default 3.
257
+ tune_ui (str): Review UI for prompt tuning — "browser" or "terminal".
258
+ tune_optimize (str): Metric to optimize — "balanced", "precision",
259
+ or "sensitivity". Default "balanced".
245
260
 
246
261
  Returns:
247
262
  pd.DataFrame: Results with classification columns.
@@ -400,6 +415,37 @@ def classify(
400
415
  if pilot_result is None or not pilot_result["proceed"]:
401
416
  return None
402
417
 
418
+ # =========================================================================
419
+ # Prompt tuning — optimize system_prompt before full classification
420
+ # =========================================================================
421
+ if prompt_tune and categories and categories != "auto":
422
+ from .prompt_tune import prompt_tune as _prompt_tune
423
+
424
+ tune_sample_size = prompt_tune if isinstance(prompt_tune, int) else 10
425
+
426
+ tune_result = _prompt_tune(
427
+ input_data=input_data,
428
+ categories=categories,
429
+ models=models,
430
+ description=description,
431
+ survey_question=survey_question,
432
+ sample_size=tune_sample_size,
433
+ max_iterations=tune_iterations,
434
+ multi_label=multi_label,
435
+ creativity=creativity,
436
+ use_json_schema=use_json_schema,
437
+ consensus_threshold=consensus_threshold,
438
+ max_retries=max_retries,
439
+ input_mode=input_mode,
440
+ ui=tune_ui,
441
+ optimize=tune_optimize,
442
+ add_other=False, # already handled above
443
+ )
444
+
445
+ if tune_result["system_prompt"]:
446
+ system_prompt = tune_result["system_prompt"]
447
+ print(f"\n[CatLLM] Using optimized prompt from prompt_tune.\n")
448
+
403
449
  # =========================================================================
404
450
  # Validate categories_per_call
405
451
  # =========================================================================
File without changes
File without changes
File without changes
File without changes