cat-llm 0.0.72__tar.gz → 0.0.74__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.72 → cat_llm-0.0.74}/PKG-INFO +12 -5
- {cat_llm-0.0.72 → cat_llm-0.0.74}/README.md +11 -4
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/__about__.py +1 -1
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/text_functions.py +21 -6
- {cat_llm-0.0.72 → cat_llm-0.0.74}/.gitignore +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/LICENSE +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/pyproject.toml +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/CERAD_functions.py +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/__init__.py +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/build_web_research.py +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/calls/CoVe.py +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/calls/__init__.py +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/calls/all_calls.py +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/image_functions.py +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/images/circle.png +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/images/cube.png +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/images/diamond.png +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/images/overlapping_pentagons.png +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/images/rectangles.png +0 -0
- {cat_llm-0.0.72 → cat_llm-0.0.74}/src/catllm/model_reference_list.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cat-llm
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.74
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
@@ -189,16 +189,23 @@ Performs multi-label classification of text responses into user-defined categori
|
|
189
189
|
Processes each text response individually, assigning one or more categories from the provided list. Supports flexible output formatting and optional saving of results to CSV for easy integration with data analysis workflows.
|
190
190
|
|
191
191
|
**Parameters:**
|
192
|
-
- `survey_question` (str): The survey question being analyzed
|
193
192
|
- `survey_input` (list): List of text responses to classify
|
194
193
|
- `categories` (list): List of predefined categories for classification
|
195
194
|
- `api_key` (str): API key for the LLM service
|
196
|
-
- `user_model` (str, default="gpt-
|
197
|
-
- `
|
195
|
+
- `user_model` (str, default="gpt-5"): Specific model to use
|
196
|
+
- `user_prompt` (str, optional): Custom prompt template to override default prompting
|
197
|
+
- `survey_question` (str, default=""): The survey question being analyzed
|
198
|
+
- `example1` through `example6` (dict, optional): Few-shot learning examples (format: {"response": "...", "categories": [...]})
|
199
|
+
- `creativity` (float, optional): Temperature/randomness setting (0.0-1.0, varies by model)
|
198
200
|
- `safety` (bool, default=False): Enable safety checks on responses and saves to CSV at each API call step
|
201
|
+
- `to_csv` (bool, default=False): Whether to save results to CSV
|
202
|
+
- `chain_of_verification` (bool, default=False): Enable Chain-of-Verification prompting technique for improved accuracy
|
203
|
+
- `chain_of_thought` (bool, default=False): Enable Chain-of-Thought prompting technique for improved accuracy
|
204
|
+
- `step_back_prompt` (bool, default=False): Enable step-back prompting to analyze higher-level context before classification
|
205
|
+
- `context_prompt` (bool, default=False): Add expert role and behavioral guidelines to the prompt
|
199
206
|
- `filename` (str, default="categorized_data.csv"): Filename for CSV output
|
200
207
|
- `save_directory` (str, optional): Directory path to save the CSV file
|
201
|
-
- `model_source` (str, default="
|
208
|
+
- `model_source` (str, default="auto"): Model provider ("auto", "OpenAI", "Anthropic", "Google", "Mistral", "Perplexity", "Huggingface")
|
202
209
|
|
203
210
|
**Returns:**
|
204
211
|
- `pandas.DataFrame`: DataFrame with classification results, columns formatted as specified
|
@@ -160,16 +160,23 @@ Performs multi-label classification of text responses into user-defined categori
|
|
160
160
|
Processes each text response individually, assigning one or more categories from the provided list. Supports flexible output formatting and optional saving of results to CSV for easy integration with data analysis workflows.
|
161
161
|
|
162
162
|
**Parameters:**
|
163
|
-
- `survey_question` (str): The survey question being analyzed
|
164
163
|
- `survey_input` (list): List of text responses to classify
|
165
164
|
- `categories` (list): List of predefined categories for classification
|
166
165
|
- `api_key` (str): API key for the LLM service
|
167
|
-
- `user_model` (str, default="gpt-
|
168
|
-
- `
|
166
|
+
- `user_model` (str, default="gpt-5"): Specific model to use
|
167
|
+
- `user_prompt` (str, optional): Custom prompt template to override default prompting
|
168
|
+
- `survey_question` (str, default=""): The survey question being analyzed
|
169
|
+
- `example1` through `example6` (dict, optional): Few-shot learning examples (format: {"response": "...", "categories": [...]})
|
170
|
+
- `creativity` (float, optional): Temperature/randomness setting (0.0-1.0, varies by model)
|
169
171
|
- `safety` (bool, default=False): Enable safety checks on responses and saves to CSV at each API call step
|
172
|
+
- `to_csv` (bool, default=False): Whether to save results to CSV
|
173
|
+
- `chain_of_verification` (bool, default=False): Enable Chain-of-Verification prompting technique for improved accuracy
|
174
|
+
- `chain_of_thought` (bool, default=False): Enable Chain-of-Thought prompting technique for improved accuracy
|
175
|
+
- `step_back_prompt` (bool, default=False): Enable step-back prompting to analyze higher-level context before classification
|
176
|
+
- `context_prompt` (bool, default=False): Add expert role and behavioral guidelines to the prompt
|
170
177
|
- `filename` (str, default="categorized_data.csv"): Filename for CSV output
|
171
178
|
- `save_directory` (str, optional): Directory path to save the CSV file
|
172
|
-
- `model_source` (str, default="
|
179
|
+
- `model_source` (str, default="auto"): Model provider ("auto", "OpenAI", "Anthropic", "Google", "Mistral", "Perplexity", "Huggingface")
|
173
180
|
|
174
181
|
**Returns:**
|
175
182
|
- `pandas.DataFrame`: DataFrame with classification results, columns formatted as specified
|
@@ -260,6 +260,7 @@ def multi_class(
|
|
260
260
|
safety = False,
|
261
261
|
to_csv = False,
|
262
262
|
chain_of_verification = False,
|
263
|
+
chain_of_thought = True,
|
263
264
|
step_back_prompt = False,
|
264
265
|
context_prompt = False,
|
265
266
|
filename = "categorized_data.csv",
|
@@ -397,12 +398,27 @@ def multi_class(
|
|
397
398
|
extracted_jsons.append(default_json)
|
398
399
|
#print(f"Skipped NaN input.")
|
399
400
|
else:
|
401
|
+
if chain_of_thought:
|
402
|
+
prompt = f"""{survey_question_context}
|
400
403
|
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
404
|
+
Categorize this survey response "{response}" into the following categories that apply:
|
405
|
+
{categories_str}
|
406
|
+
|
407
|
+
Let's think step by step:
|
408
|
+
1. First, identify the main themes mentioned in the response
|
409
|
+
2. Then, match each theme to the relevant categories
|
410
|
+
3. Finally, assign 1 to matching categories and 0 to non-matching categories
|
411
|
+
|
412
|
+
{examples_text}
|
413
|
+
|
414
|
+
Provide your reasoning for each category, then provide your final answer in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
|
415
|
+
else:
|
416
|
+
|
417
|
+
prompt = f"""{survey_question_context} \
|
418
|
+
Categorize this survey response "{response}" into the following categories that apply: \
|
419
|
+
{categories_str}
|
420
|
+
{examples_text}
|
421
|
+
Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
|
406
422
|
|
407
423
|
if context_prompt:
|
408
424
|
context = """You are an expert researcher in survey data categorization.
|
@@ -410,7 +426,6 @@ def multi_class(
|
|
410
426
|
When uncertain, prioritize precision over recall."""
|
411
427
|
|
412
428
|
prompt = context + prompt
|
413
|
-
print(prompt)
|
414
429
|
|
415
430
|
if chain_of_verification:
|
416
431
|
step2_prompt = f"""You provided this initial categorization:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|