cat-llm 0.0.72__py3-none-any.whl → 0.0.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.72
3
+ Version: 0.0.74
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -189,16 +189,23 @@ Performs multi-label classification of text responses into user-defined categori
189
189
  Processes each text response individually, assigning one or more categories from the provided list. Supports flexible output formatting and optional saving of results to CSV for easy integration with data analysis workflows.
190
190
 
191
191
  **Parameters:**
192
- - `survey_question` (str): The survey question being analyzed
193
192
  - `survey_input` (list): List of text responses to classify
194
193
  - `categories` (list): List of predefined categories for classification
195
194
  - `api_key` (str): API key for the LLM service
196
- - `user_model` (str, default="gpt-4o"): Specific model to use
197
- - `creativity` (float, default=0): Temperature/randomness setting (0.0-1.0)
195
+ - `user_model` (str, default="gpt-5"): Specific model to use
196
+ - `user_prompt` (str, optional): Custom prompt template to override default prompting
197
+ - `survey_question` (str, default=""): The survey question being analyzed
198
+ - `example1` through `example6` (dict, optional): Few-shot learning examples (format: {"response": "...", "categories": [...]})
199
+ - `creativity` (float, optional): Temperature/randomness setting (0.0-1.0, varies by model)
198
200
  - `safety` (bool, default=False): Enable safety checks on responses and saves to CSV at each API call step
201
+ - `to_csv` (bool, default=False): Whether to save results to CSV
202
+ - `chain_of_verification` (bool, default=False): Enable Chain-of-Verification prompting technique for improved accuracy
203
+ - `chain_of_thought` (bool, default=False): Enable Chain-of-Thought prompting technique for improved accuracy
204
+ - `step_back_prompt` (bool, default=False): Enable step-back prompting to analyze higher-level context before classification
205
+ - `context_prompt` (bool, default=False): Add expert role and behavioral guidelines to the prompt
199
206
  - `filename` (str, default="categorized_data.csv"): Filename for CSV output
200
207
  - `save_directory` (str, optional): Directory path to save the CSV file
201
- - `model_source` (str, default="OpenAI"): Model provider ("OpenAI", "Anthropic", "Perplexity", "Mistral")
208
+ - `model_source` (str, default="auto"): Model provider ("auto", "OpenAI", "Anthropic", "Google", "Mistral", "Perplexity", "Huggingface")
202
209
 
203
210
  **Returns:**
204
211
  - `pandas.DataFrame`: DataFrame with classification results, columns formatted as specified
@@ -1,10 +1,10 @@
1
1
  catllm/CERAD_functions.py,sha256=q4HbP5e2Yu8NnZZ-2eX4sImyj6u3i8xWcq0pYU81iis,22676
2
- catllm/__about__.py,sha256=b1GxUdRsKq07s2Aa4Uh-4SoB3kpKG15Hf6HbSFsXn7s,430
2
+ catllm/__about__.py,sha256=E0enlOPQDj7XaMZv62lffULZGOUEAqpRIyZ12A6f3zk,430
3
3
  catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
4
4
  catllm/build_web_research.py,sha256=880dfE2bEQb-FrXP-42JoLLtyc9ox_sBULDr38xiTiQ,22655
5
5
  catllm/image_functions.py,sha256=8_FftRU285x1HT-AgNkaobefQVD-5q7ZY_t7JFdL3Sg,36177
6
6
  catllm/model_reference_list.py,sha256=37pWwMcgnf4biE3BVRluH5oz2P6ccdJJiCVNHodBH8k,2307
7
- catllm/text_functions.py,sha256=vVjHxgD0eRZrJOO4eStEawQ4suJ9hxFfvQ0zpu-cO4w,32551
7
+ catllm/text_functions.py,sha256=O6wfDh50Xtc0JvQtjWb9L9PgtBP6cjxWBw-PCNmbiaE,33371
8
8
  catllm/calls/CoVe.py,sha256=Y9OGJbaeJ3Odwira92cPXUlnm_ADFqvpOSFSNjFzMMU,10847
9
9
  catllm/calls/__init__.py,sha256=fWuMwLeSGa6zXJYd4s8IyNblsD62G-1NMUsOKrNIkoI,725
10
10
  catllm/calls/all_calls.py,sha256=E25KpZ_MakMDeCpNCOOM8kQvlfex6UMjnGN1wHkA4AI,14356
@@ -13,7 +13,7 @@ catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
13
13
  catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
14
14
  catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
15
15
  catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
16
- cat_llm-0.0.72.dist-info/METADATA,sha256=ruiNx2OfWoN_eBlJtqD-noOLBaZOHpWpurftV5jhvvg,22424
17
- cat_llm-0.0.72.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- cat_llm-0.0.72.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
19
- cat_llm-0.0.72.dist-info/RECORD,,
16
+ cat_llm-0.0.74.dist-info/METADATA,sha256=DYaL_OFgi9MuFpWLd1DHgqVi_osTwK1DJH-E5Q2kaa8,23214
17
+ cat_llm-0.0.74.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ cat_llm-0.0.74.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
19
+ cat_llm-0.0.74.dist-info/RECORD,,
catllm/__about__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.72"
4
+ __version__ = "0.0.74"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
catllm/text_functions.py CHANGED
@@ -260,6 +260,7 @@ def multi_class(
260
260
  safety = False,
261
261
  to_csv = False,
262
262
  chain_of_verification = False,
263
+ chain_of_thought = True,
263
264
  step_back_prompt = False,
264
265
  context_prompt = False,
265
266
  filename = "categorized_data.csv",
@@ -397,12 +398,27 @@ def multi_class(
397
398
  extracted_jsons.append(default_json)
398
399
  #print(f"Skipped NaN input.")
399
400
  else:
401
+ if chain_of_thought:
402
+ prompt = f"""{survey_question_context}
400
403
 
401
- prompt = f"""{survey_question_context} \
402
- Categorize this survey response "{response}" into the following categories that apply: \
403
- {categories_str}
404
- {examples_text}
405
- Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
404
+ Categorize this survey response "{response}" into the following categories that apply:
405
+ {categories_str}
406
+
407
+ Let's think step by step:
408
+ 1. First, identify the main themes mentioned in the response
409
+ 2. Then, match each theme to the relevant categories
410
+ 3. Finally, assign 1 to matching categories and 0 to non-matching categories
411
+
412
+ {examples_text}
413
+
414
+ Provide your reasoning for each category, then provide your final answer in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
415
+ else:
416
+
417
+ prompt = f"""{survey_question_context} \
418
+ Categorize this survey response "{response}" into the following categories that apply: \
419
+ {categories_str}
420
+ {examples_text}
421
+ Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
406
422
 
407
423
  if context_prompt:
408
424
  context = """You are an expert researcher in survey data categorization.
@@ -410,7 +426,6 @@ def multi_class(
410
426
  When uncertain, prioritize precision over recall."""
411
427
 
412
428
  prompt = context + prompt
413
- print(prompt)
414
429
 
415
430
  if chain_of_verification:
416
431
  step2_prompt = f"""You provided this initial categorization: