cat-llm 0.0.73__py3-none-any.whl → 0.0.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.73
3
+ Version: 0.0.74
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -200,6 +200,7 @@ Processes each text response individually, assigning one or more categories from
200
200
  - `safety` (bool, default=False): Enable safety checks on responses and saves to CSV at each API call step
201
201
  - `to_csv` (bool, default=False): Whether to save results to CSV
202
202
  - `chain_of_verification` (bool, default=False): Enable Chain-of-Verification prompting technique for improved accuracy
203
+ - `chain_of_thought` (bool, default=False): Enable Chain-of-Thought prompting technique for improved accuracy
203
204
  - `step_back_prompt` (bool, default=False): Enable step-back prompting to analyze higher-level context before classification
204
205
  - `context_prompt` (bool, default=False): Add expert role and behavioral guidelines to the prompt
205
206
  - `filename` (str, default="categorized_data.csv"): Filename for CSV output
@@ -1,10 +1,10 @@
1
1
  catllm/CERAD_functions.py,sha256=q4HbP5e2Yu8NnZZ-2eX4sImyj6u3i8xWcq0pYU81iis,22676
2
- catllm/__about__.py,sha256=QIxgPsG9zGrIHx80hZ1HkOlxVi_F52MocXySU5cHB7Q,430
2
+ catllm/__about__.py,sha256=E0enlOPQDj7XaMZv62lffULZGOUEAqpRIyZ12A6f3zk,430
3
3
  catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
4
4
  catllm/build_web_research.py,sha256=880dfE2bEQb-FrXP-42JoLLtyc9ox_sBULDr38xiTiQ,22655
5
5
  catllm/image_functions.py,sha256=8_FftRU285x1HT-AgNkaobefQVD-5q7ZY_t7JFdL3Sg,36177
6
6
  catllm/model_reference_list.py,sha256=37pWwMcgnf4biE3BVRluH5oz2P6ccdJJiCVNHodBH8k,2307
7
- catllm/text_functions.py,sha256=Yhb4ukFjNE9jHFdDC8Qa9rFhYAiyS6FTwAM7pG5TzR8,32521
7
+ catllm/text_functions.py,sha256=O6wfDh50Xtc0JvQtjWb9L9PgtBP6cjxWBw-PCNmbiaE,33371
8
8
  catllm/calls/CoVe.py,sha256=Y9OGJbaeJ3Odwira92cPXUlnm_ADFqvpOSFSNjFzMMU,10847
9
9
  catllm/calls/__init__.py,sha256=fWuMwLeSGa6zXJYd4s8IyNblsD62G-1NMUsOKrNIkoI,725
10
10
  catllm/calls/all_calls.py,sha256=E25KpZ_MakMDeCpNCOOM8kQvlfex6UMjnGN1wHkA4AI,14356
@@ -13,7 +13,7 @@ catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
13
13
  catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
14
14
  catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
15
15
  catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
16
- cat_llm-0.0.73.dist-info/METADATA,sha256=bLNW2lUDfJ32VOX56JglxWRszev6oYXTNvGSn80CyWo,23104
17
- cat_llm-0.0.73.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- cat_llm-0.0.73.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
19
- cat_llm-0.0.73.dist-info/RECORD,,
16
+ cat_llm-0.0.74.dist-info/METADATA,sha256=DYaL_OFgi9MuFpWLd1DHgqVi_osTwK1DJH-E5Q2kaa8,23214
17
+ cat_llm-0.0.74.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ cat_llm-0.0.74.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
19
+ cat_llm-0.0.74.dist-info/RECORD,,
catllm/__about__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.73"
4
+ __version__ = "0.0.74"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
catllm/text_functions.py CHANGED
@@ -260,6 +260,7 @@ def multi_class(
260
260
  safety = False,
261
261
  to_csv = False,
262
262
  chain_of_verification = False,
263
+ chain_of_thought = True,
263
264
  step_back_prompt = False,
264
265
  context_prompt = False,
265
266
  filename = "categorized_data.csv",
@@ -397,12 +398,27 @@ def multi_class(
397
398
  extracted_jsons.append(default_json)
398
399
  #print(f"Skipped NaN input.")
399
400
  else:
401
+ if chain_of_thought:
402
+ prompt = f"""{survey_question_context}
400
403
 
401
- prompt = f"""{survey_question_context} \
402
- Categorize this survey response "{response}" into the following categories that apply: \
403
- {categories_str}
404
- {examples_text}
405
- Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
404
+ Categorize this survey response "{response}" into the following categories that apply:
405
+ {categories_str}
406
+
407
+ Let's think step by step:
408
+ 1. First, identify the main themes mentioned in the response
409
+ 2. Then, match each theme to the relevant categories
410
+ 3. Finally, assign 1 to matching categories and 0 to non-matching categories
411
+
412
+ {examples_text}
413
+
414
+ Provide your reasoning for each category, then provide your final answer in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
415
+ else:
416
+
417
+ prompt = f"""{survey_question_context} \
418
+ Categorize this survey response "{response}" into the following categories that apply: \
419
+ {categories_str}
420
+ {examples_text}
421
+ Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
406
422
 
407
423
  if context_prompt:
408
424
  context = """You are an expert researcher in survey data categorization.