cat-llm 0.0.75__tar.gz → 0.0.76__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.75
3
+ Version: 0.0.76
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.75"
4
+ __version__ = "0.0.76"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
@@ -341,6 +341,10 @@ def multi_class(
341
341
  chain_of_thought = True,
342
342
  step_back_prompt = False,
343
343
  context_prompt = False,
344
+ top_n = 12,
345
+ cat_num = 10,
346
+ divisions = 10,
347
+ research_question = None,
344
348
  filename = "categorized_data.csv",
345
349
  save_directory = None,
346
350
  model_source = "auto"
@@ -351,6 +355,7 @@ def multi_class(
351
355
  import regex
352
356
  from tqdm import tqdm
353
357
 
358
+ #used in chain of verification
354
359
  def remove_numbering(line):
355
360
  line = line.strip()
356
361
 
@@ -399,16 +404,33 @@ def multi_class(
399
404
  raise ValueError(f"❌ Could not auto-detect model source from '{user_model}'. Please specify model_source explicitly: OpenAI, Anthropic, Perplexity, Google, Huggingface, or Mistral")
400
405
  else:
401
406
  model_source = model_source.lower()
402
-
407
+
408
+ if categories == "auto":
409
+ if survey_question == "": # step back requires the survey question to function well
410
+ raise TypeError("survey_question is required when using step_back_prompt. Please provide the survey question you are analyzing.")
411
+
412
+ categories = explore_common_categories(
413
+ survey_question=survey_question,
414
+ survey_input=survey_input,
415
+ research_question=research_question,
416
+ api_key=api_key,
417
+ top_n=top_n,
418
+ cat_num=cat_num,
419
+ divisions=divisions
420
+ )
421
+
403
422
  categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
404
423
  cat_num = len(categories)
405
424
  category_dict = {str(i+1): "0" for i in range(cat_num)}
406
425
  example_JSON = json.dumps(category_dict, indent=4)
407
426
 
408
- # ensure number of categories is what user wants
409
427
  print(f"\nThe categories you entered to be coded by {model_source} {user_model}:")
410
- for i, cat in enumerate(categories, 1):
411
- print(f"{i}. {cat}")
428
+
429
+ if categories != "auto":
430
+ # ensure number of categories is what user wants
431
+
432
+ for i, cat in enumerate(categories, 1):
433
+ print(f"{i}. {cat}")
412
434
 
413
435
  link1 = []
414
436
  extracted_jsons = []
File without changes
File without changes
File without changes
File without changes