cat-llm 0.0.75__tar.gz → 0.0.76__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.75 → cat_llm-0.0.76}/PKG-INFO +1 -1
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/__about__.py +1 -1
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/text_functions.py +26 -4
- {cat_llm-0.0.75 → cat_llm-0.0.76}/.gitignore +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/LICENSE +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/README.md +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/pyproject.toml +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/CERAD_functions.py +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/__init__.py +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/build_web_research.py +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/calls/CoVe.py +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/calls/__init__.py +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/calls/all_calls.py +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/image_functions.py +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/images/circle.png +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/images/cube.png +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/images/diamond.png +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/images/overlapping_pentagons.png +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/images/rectangles.png +0 -0
- {cat_llm-0.0.75 → cat_llm-0.0.76}/src/catllm/model_reference_list.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cat-llm
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.76
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
@@ -341,6 +341,10 @@ def multi_class(
|
|
341
341
|
chain_of_thought = True,
|
342
342
|
step_back_prompt = False,
|
343
343
|
context_prompt = False,
|
344
|
+
top_n = 12,
|
345
|
+
cat_num = 10,
|
346
|
+
divisions = 10,
|
347
|
+
research_question = None,
|
344
348
|
filename = "categorized_data.csv",
|
345
349
|
save_directory = None,
|
346
350
|
model_source = "auto"
|
@@ -351,6 +355,7 @@ def multi_class(
|
|
351
355
|
import regex
|
352
356
|
from tqdm import tqdm
|
353
357
|
|
358
|
+
#used in chain of verification
|
354
359
|
def remove_numbering(line):
|
355
360
|
line = line.strip()
|
356
361
|
|
@@ -399,16 +404,33 @@ def multi_class(
|
|
399
404
|
raise ValueError(f"❌ Could not auto-detect model source from '{user_model}'. Please specify model_source explicitly: OpenAI, Anthropic, Perplexity, Google, Huggingface, or Mistral")
|
400
405
|
else:
|
401
406
|
model_source = model_source.lower()
|
402
|
-
|
407
|
+
|
408
|
+
if categories == "auto":
|
409
|
+
if survey_question == "": # step back requires the survey question to function well
|
410
|
+
raise TypeError("survey_question is required when using step_back_prompt. Please provide the survey question you are analyzing.")
|
411
|
+
|
412
|
+
categories = explore_common_categories(
|
413
|
+
survey_question=survey_question,
|
414
|
+
survey_input=survey_input,
|
415
|
+
research_question=research_question,
|
416
|
+
api_key=api_key,
|
417
|
+
top_n=top_n,
|
418
|
+
cat_num=cat_num,
|
419
|
+
divisions=divisions
|
420
|
+
)
|
421
|
+
|
403
422
|
categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
|
404
423
|
cat_num = len(categories)
|
405
424
|
category_dict = {str(i+1): "0" for i in range(cat_num)}
|
406
425
|
example_JSON = json.dumps(category_dict, indent=4)
|
407
426
|
|
408
|
-
# ensure number of categories is what user wants
|
409
427
|
print(f"\nThe categories you entered to be coded by {model_source} {user_model}:")
|
410
|
-
|
411
|
-
|
428
|
+
|
429
|
+
if categories != "auto":
|
430
|
+
# ensure number of categories is what user wants
|
431
|
+
|
432
|
+
for i, cat in enumerate(categories, 1):
|
433
|
+
print(f"{i}. {cat}")
|
412
434
|
|
413
435
|
link1 = []
|
414
436
|
extracted_jsons = []
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|