cat-llm 0.0.75__py3-none-any.whl → 0.0.77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.75
3
+ Version: 0.0.77
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -1,10 +1,10 @@
1
1
  catllm/CERAD_functions.py,sha256=q4HbP5e2Yu8NnZZ-2eX4sImyj6u3i8xWcq0pYU81iis,22676
2
- catllm/__about__.py,sha256=v46F-9gCzROey89qHYByH-uJth4LZKkHhUfnitLnzzo,430
2
+ catllm/__about__.py,sha256=7ns6QhLryp-E7SvdG6rrirf5LNNgvqmDLZk-zvKaZRY,430
3
3
  catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
4
4
  catllm/build_web_research.py,sha256=880dfE2bEQb-FrXP-42JoLLtyc9ox_sBULDr38xiTiQ,22655
5
5
  catllm/image_functions.py,sha256=8_FftRU285x1HT-AgNkaobefQVD-5q7ZY_t7JFdL3Sg,36177
6
6
  catllm/model_reference_list.py,sha256=37pWwMcgnf4biE3BVRluH5oz2P6ccdJJiCVNHodBH8k,2307
7
- catllm/text_functions.py,sha256=Gj0BrhprOMBgLpuJByG3woqFclYFLNjbK_4jh72ZOo8,35355
7
+ catllm/text_functions.py,sha256=OHlWdErhUG6YPJKk-9vAR-jY3gj2zb5MOy3P0cJu_Fc,35714
8
8
  catllm/calls/CoVe.py,sha256=Y9OGJbaeJ3Odwira92cPXUlnm_ADFqvpOSFSNjFzMMU,10847
9
9
  catllm/calls/__init__.py,sha256=fWuMwLeSGa6zXJYd4s8IyNblsD62G-1NMUsOKrNIkoI,725
10
10
  catllm/calls/all_calls.py,sha256=AeN1QocOvL3Z36lDkq6bO0LB3ruz6pXyedvdci0YCxQ,16627
@@ -13,7 +13,7 @@ catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
13
13
  catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
14
14
  catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
15
15
  catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
16
- cat_llm-0.0.75.dist-info/METADATA,sha256=f20lG5JACNATG6u4tv7DWxShs86UHU_JgWmE74S803I,23214
17
- cat_llm-0.0.75.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- cat_llm-0.0.75.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
19
- cat_llm-0.0.75.dist-info/RECORD,,
16
+ cat_llm-0.0.77.dist-info/METADATA,sha256=Z2QqNq_gV_9RDkbG1c7R40qILnkqIk640pqToPZxCDc,23214
17
+ cat_llm-0.0.77.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ cat_llm-0.0.77.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
19
+ cat_llm-0.0.77.dist-info/RECORD,,
catllm/__about__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.75"
4
+ __version__ = "0.0.77"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
catllm/text_functions.py CHANGED
@@ -313,14 +313,7 @@ Number your categories from 1 through {cat_num} and be concise with the category
313
313
  return top_categories_final
314
314
 
315
315
  #multi-class text classification
316
- # what this function does:
317
- # does context prompting, giving the model a background on the task at hand and the user's survey question
318
- # system prompting, overall context and purpose for the language model
319
- # role prompting, assings a spacific identity to the model
320
- # also enables few shot prompting, allowing the user to input a few examples
321
- # provides POSITIVE INSTRUCTIONS reather than limitations/restrictions
322
- # GOAL: enable step-back prompting
323
- # GOAL 2: enable self-consistency
316
+ # GOAL: enable self-consistency
324
317
  def multi_class(
325
318
  survey_input,
326
319
  categories,
@@ -341,6 +334,10 @@ def multi_class(
341
334
  chain_of_thought = True,
342
335
  step_back_prompt = False,
343
336
  context_prompt = False,
337
+ top_n = 12,
338
+ cat_num = 10,
339
+ divisions = 10,
340
+ research_question = None,
344
341
  filename = "categorized_data.csv",
345
342
  save_directory = None,
346
343
  model_source = "auto"
@@ -351,6 +348,7 @@ def multi_class(
351
348
  import regex
352
349
  from tqdm import tqdm
353
350
 
351
+ #used in chain of verification
354
352
  def remove_numbering(line):
355
353
  line = line.strip()
356
354
 
@@ -399,16 +397,35 @@ def multi_class(
399
397
  raise ValueError(f"❌ Could not auto-detect model source from '{user_model}'. Please specify model_source explicitly: OpenAI, Anthropic, Perplexity, Google, Huggingface, or Mistral")
400
398
  else:
401
399
  model_source = model_source.lower()
402
-
400
+
401
+ if categories == "auto":
402
+ if survey_question == "": # step back requires the survey question to function well
403
+ raise TypeError("survey_question is required when using step_back_prompt. Please provide the survey question you are analyzing.")
404
+
405
+ categories = explore_common_categories(
406
+ survey_question=survey_question,
407
+ survey_input=survey_input,
408
+ research_question=research_question,
409
+ api_key=api_key,
410
+ model_source=model_source,
411
+ user_model=user_model,
412
+ top_n=top_n,
413
+ cat_num=cat_num,
414
+ divisions=divisions
415
+ )
416
+
403
417
  categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
404
418
  cat_num = len(categories)
405
419
  category_dict = {str(i+1): "0" for i in range(cat_num)}
406
420
  example_JSON = json.dumps(category_dict, indent=4)
407
421
 
408
- # ensure number of categories is what user wants
409
422
  print(f"\nThe categories you entered to be coded by {model_source} {user_model}:")
410
- for i, cat in enumerate(categories, 1):
411
- print(f"{i}. {cat}")
423
+
424
+ if categories != "auto":
425
+ # ensure number of categories is what user wants
426
+
427
+ for i, cat in enumerate(categories, 1):
428
+ print(f"{i}. {cat}")
412
429
 
413
430
  link1 = []
414
431
  extracted_jsons = []