cat-llm 0.0.75__py3-none-any.whl → 0.0.77__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.75.dist-info → cat_llm-0.0.77.dist-info}/METADATA +1 -1
- {cat_llm-0.0.75.dist-info → cat_llm-0.0.77.dist-info}/RECORD +6 -6
- catllm/__about__.py +1 -1
- catllm/text_functions.py +29 -12
- {cat_llm-0.0.75.dist-info → cat_llm-0.0.77.dist-info}/WHEEL +0 -0
- {cat_llm-0.0.75.dist-info → cat_llm-0.0.77.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cat-llm
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.77
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
@@ -1,10 +1,10 @@
|
|
1
1
|
catllm/CERAD_functions.py,sha256=q4HbP5e2Yu8NnZZ-2eX4sImyj6u3i8xWcq0pYU81iis,22676
|
2
|
-
catllm/__about__.py,sha256=
|
2
|
+
catllm/__about__.py,sha256=7ns6QhLryp-E7SvdG6rrirf5LNNgvqmDLZk-zvKaZRY,430
|
3
3
|
catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
|
4
4
|
catllm/build_web_research.py,sha256=880dfE2bEQb-FrXP-42JoLLtyc9ox_sBULDr38xiTiQ,22655
|
5
5
|
catllm/image_functions.py,sha256=8_FftRU285x1HT-AgNkaobefQVD-5q7ZY_t7JFdL3Sg,36177
|
6
6
|
catllm/model_reference_list.py,sha256=37pWwMcgnf4biE3BVRluH5oz2P6ccdJJiCVNHodBH8k,2307
|
7
|
-
catllm/text_functions.py,sha256=
|
7
|
+
catllm/text_functions.py,sha256=OHlWdErhUG6YPJKk-9vAR-jY3gj2zb5MOy3P0cJu_Fc,35714
|
8
8
|
catllm/calls/CoVe.py,sha256=Y9OGJbaeJ3Odwira92cPXUlnm_ADFqvpOSFSNjFzMMU,10847
|
9
9
|
catllm/calls/__init__.py,sha256=fWuMwLeSGa6zXJYd4s8IyNblsD62G-1NMUsOKrNIkoI,725
|
10
10
|
catllm/calls/all_calls.py,sha256=AeN1QocOvL3Z36lDkq6bO0LB3ruz6pXyedvdci0YCxQ,16627
|
@@ -13,7 +13,7 @@ catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
|
|
13
13
|
catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
|
14
14
|
catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
|
15
15
|
catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
|
16
|
-
cat_llm-0.0.
|
17
|
-
cat_llm-0.0.
|
18
|
-
cat_llm-0.0.
|
19
|
-
cat_llm-0.0.
|
16
|
+
cat_llm-0.0.77.dist-info/METADATA,sha256=Z2QqNq_gV_9RDkbG1c7R40qILnkqIk640pqToPZxCDc,23214
|
17
|
+
cat_llm-0.0.77.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
cat_llm-0.0.77.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
|
19
|
+
cat_llm-0.0.77.dist-info/RECORD,,
|
catllm/__about__.py
CHANGED
catllm/text_functions.py
CHANGED
@@ -313,14 +313,7 @@ Number your categories from 1 through {cat_num} and be concise with the category
|
|
313
313
|
return top_categories_final
|
314
314
|
|
315
315
|
#multi-class text classification
|
316
|
-
#
|
317
|
-
# does context prompting, giving the model a background on the task at hand and the user's survey question
|
318
|
-
# system prompting, overall context and purpose for the language model
|
319
|
-
# role prompting, assings a spacific identity to the model
|
320
|
-
# also enables few shot prompting, allowing the user to input a few examples
|
321
|
-
# provides POSITIVE INSTRUCTIONS reather than limitations/restrictions
|
322
|
-
# GOAL: enable step-back prompting
|
323
|
-
# GOAL 2: enable self-consistency
|
316
|
+
# GOAL: enable self-consistency
|
324
317
|
def multi_class(
|
325
318
|
survey_input,
|
326
319
|
categories,
|
@@ -341,6 +334,10 @@ def multi_class(
|
|
341
334
|
chain_of_thought = True,
|
342
335
|
step_back_prompt = False,
|
343
336
|
context_prompt = False,
|
337
|
+
top_n = 12,
|
338
|
+
cat_num = 10,
|
339
|
+
divisions = 10,
|
340
|
+
research_question = None,
|
344
341
|
filename = "categorized_data.csv",
|
345
342
|
save_directory = None,
|
346
343
|
model_source = "auto"
|
@@ -351,6 +348,7 @@ def multi_class(
|
|
351
348
|
import regex
|
352
349
|
from tqdm import tqdm
|
353
350
|
|
351
|
+
#used in chain of verification
|
354
352
|
def remove_numbering(line):
|
355
353
|
line = line.strip()
|
356
354
|
|
@@ -399,16 +397,35 @@ def multi_class(
|
|
399
397
|
raise ValueError(f"❌ Could not auto-detect model source from '{user_model}'. Please specify model_source explicitly: OpenAI, Anthropic, Perplexity, Google, Huggingface, or Mistral")
|
400
398
|
else:
|
401
399
|
model_source = model_source.lower()
|
402
|
-
|
400
|
+
|
401
|
+
if categories == "auto":
|
402
|
+
if survey_question == "": # step back requires the survey question to function well
|
403
|
+
raise TypeError("survey_question is required when using step_back_prompt. Please provide the survey question you are analyzing.")
|
404
|
+
|
405
|
+
categories = explore_common_categories(
|
406
|
+
survey_question=survey_question,
|
407
|
+
survey_input=survey_input,
|
408
|
+
research_question=research_question,
|
409
|
+
api_key=api_key,
|
410
|
+
model_source=model_source,
|
411
|
+
user_model=user_model,
|
412
|
+
top_n=top_n,
|
413
|
+
cat_num=cat_num,
|
414
|
+
divisions=divisions
|
415
|
+
)
|
416
|
+
|
403
417
|
categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
|
404
418
|
cat_num = len(categories)
|
405
419
|
category_dict = {str(i+1): "0" for i in range(cat_num)}
|
406
420
|
example_JSON = json.dumps(category_dict, indent=4)
|
407
421
|
|
408
|
-
# ensure number of categories is what user wants
|
409
422
|
print(f"\nThe categories you entered to be coded by {model_source} {user_model}:")
|
410
|
-
|
411
|
-
|
423
|
+
|
424
|
+
if categories != "auto":
|
425
|
+
# ensure number of categories is what user wants
|
426
|
+
|
427
|
+
for i, cat in enumerate(categories, 1):
|
428
|
+
print(f"{i}. {cat}")
|
412
429
|
|
413
430
|
link1 = []
|
414
431
|
extracted_jsons = []
|
File without changes
|
File without changes
|