cat-llm 0.0.62__tar.gz → 0.0.63__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.62
3
+ Version: 0.0.63
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -44,6 +44,8 @@ def cerad_drawn_score(
44
44
  from pathlib import Path
45
45
  import pkg_resources
46
46
 
47
+ model_source = model_source.lower() # eliminating case sensitivity
48
+
47
49
  shape = shape.lower()
48
50
  shape = "rectangles" if shape == "overlapping rectangles" else shape
49
51
  if shape == "circle":
@@ -155,7 +157,7 @@ def cerad_drawn_score(
155
157
  else:
156
158
  reference_text = f"Image is expected to show within it a drawing of a {shape}.\n\n"
157
159
 
158
- if model_source == "OpenAI" and valid_image:
160
+ if model_source == "openai" and valid_image:
159
161
  prompt = [
160
162
  {
161
163
  "type": "text",
@@ -185,7 +187,7 @@ def cerad_drawn_score(
185
187
  "image_url": {"url": encoded_image, "detail": "high"}
186
188
  })
187
189
 
188
- elif model_source == "Anthropic" and valid_image:
190
+ elif model_source == "anthropic" and valid_image:
189
191
  prompt = [
190
192
  {
191
193
  "type": "text",
@@ -225,7 +227,7 @@ def cerad_drawn_score(
225
227
  }
226
228
  )
227
229
 
228
- elif model_source == "Mistral" and valid_image:
230
+ elif model_source == "mistral" and valid_image:
229
231
  prompt = [
230
232
  {
231
233
  "type": "text",
@@ -254,7 +256,7 @@ def cerad_drawn_score(
254
256
  "image_url": f"data:image/{ext};base64,{encoded_image}"
255
257
  })
256
258
 
257
- if model_source == "OpenAI" and valid_image:
259
+ if model_source == "openai" and valid_image:
258
260
  from openai import OpenAI
259
261
  client = OpenAI(api_key=api_key)
260
262
  try:
@@ -272,7 +274,7 @@ def cerad_drawn_score(
272
274
  print("An error occurred: {e}")
273
275
  link1.append("Error processing input: {e}")
274
276
 
275
- elif model_source == "Anthropic" and valid_image:
277
+ elif model_source == "anthropic" and valid_image:
276
278
  import anthropic
277
279
  client = anthropic.Anthropic(api_key=api_key)
278
280
  try:
@@ -291,7 +293,7 @@ def cerad_drawn_score(
291
293
  print("An error occurred: {e}")
292
294
  link1.append("Error processing input: {e}")
293
295
 
294
- elif model_source == "Mistral" and valid_image:
296
+ elif model_source == "mistral" and valid_image:
295
297
  from mistralai import Mistral
296
298
  reply = None
297
299
  client = Mistral(api_key=api_key)
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.62"
4
+ __version__ = "0.0.63"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
@@ -33,6 +33,8 @@ def image_multi_class(
33
33
  '*.psd'
34
34
  ]
35
35
 
36
+ model_source = model_source.lower() # eliminating case sensitivity
37
+
36
38
  if not isinstance(image_input, list):
37
39
  # If image_input is a filepath (string)
38
40
  image_files = []
@@ -86,7 +88,7 @@ def image_multi_class(
86
88
 
87
89
  # Handle extension safely
88
90
  ext = Path(img_path).suffix.lstrip(".").lower()
89
- if model_source == "OpenAI" or model_source == "Mistral":
91
+ if model_source == "openai" or model_source == "mistral":
90
92
  encoded_image = f"data:image/{ext};base64,{encoded}"
91
93
  prompt = [
92
94
  {
@@ -110,7 +112,7 @@ def image_multi_class(
110
112
  },
111
113
  ]
112
114
 
113
- elif model_source == "Anthropic":
115
+ elif model_source == "anthropic":
114
116
  encoded_image = f"data:image/{ext};base64,{encoded}"
115
117
  prompt = [
116
118
  {"type": "text",
@@ -136,7 +138,7 @@ def image_multi_class(
136
138
  }
137
139
  }
138
140
  ]
139
- if model_source == "OpenAI":
141
+ if model_source == "openAI":
140
142
  from openai import OpenAI
141
143
  client = OpenAI(api_key=api_key)
142
144
  try:
@@ -154,7 +156,7 @@ def image_multi_class(
154
156
  print("An error occurred: {e}")
155
157
  link1.append("Error processing input: {e}")
156
158
 
157
- elif model_source == "Anthropic":
159
+ elif model_source == "anthropic":
158
160
  import anthropic
159
161
  reply = None
160
162
  client = anthropic.Anthropic(api_key=api_key)
@@ -174,7 +176,7 @@ def image_multi_class(
174
176
  print("An error occurred: {e}")
175
177
  link1.append("Error processing input: {e}")
176
178
 
177
- elif model_source == "Mistral":
179
+ elif model_source == "mistral":
178
180
  from mistralai import Mistral
179
181
  client = Mistral(api_key=api_key)
180
182
  try:
@@ -305,6 +307,8 @@ def image_score_drawing(
305
307
  '*.psd'
306
308
  ]
307
309
 
310
+ model_source = model_source.lower() # eliminating case sensitivity
311
+
308
312
  if not isinstance(image_input, list):
309
313
  # If image_input is a filepath (string)
310
314
  image_files = []
@@ -354,7 +358,7 @@ def image_score_drawing(
354
358
  ext = Path(img_path).suffix.lstrip(".").lower()
355
359
  encoded_image = f"data:image/{ext};base64,{encoded}"
356
360
 
357
- if model_source == "OpenAI" or model_source == "Mistral":
361
+ if model_source == "openai" or model_source == "mistral":
358
362
  prompt = [
359
363
  {
360
364
  "type": "text",
@@ -390,7 +394,7 @@ def image_score_drawing(
390
394
  }
391
395
  ]
392
396
 
393
- elif model_source == "Anthropic": # Changed to elif
397
+ elif model_source == "anthropic": # Changed to elif
394
398
  prompt = [
395
399
  {
396
400
  "type": "text",
@@ -435,7 +439,7 @@ def image_score_drawing(
435
439
  ]
436
440
 
437
441
 
438
- if model_source == "OpenAI":
442
+ if model_source == "openai":
439
443
  from openai import OpenAI
440
444
  client = OpenAI(api_key=api_key)
441
445
  try:
@@ -453,7 +457,7 @@ def image_score_drawing(
453
457
  print("An error occurred: {e}")
454
458
  link1.append("Error processing input: {e}")
455
459
 
456
- elif model_source == "Anthropic":
460
+ elif model_source == "anthropic":
457
461
  import anthropic
458
462
  client = anthropic.Anthropic(api_key=api_key)
459
463
  try:
@@ -472,7 +476,7 @@ def image_score_drawing(
472
476
  print("An error occurred: {e}")
473
477
  link1.append("Error processing input: {e}")
474
478
 
475
- elif model_source == "Mistral":
479
+ elif model_source == "mistral":
476
480
  from mistralai import Mistral
477
481
  client = Mistral(api_key=api_key)
478
482
  try:
@@ -598,6 +602,8 @@ def image_features(
598
602
  '*.psd'
599
603
  ]
600
604
 
605
+ model_source = model_source.lower() # eliminating case sensitivity
606
+
601
607
  if not isinstance(image_input, list):
602
608
  # If image_input is a filepath (string)
603
609
  image_files = []
@@ -644,7 +650,7 @@ def image_features(
644
650
  encoded_image = f"data:image/{ext};base64,{encoded}"
645
651
  valid_image = True
646
652
 
647
- if model_source == "OpenAI" or model_source == "Mistral":
653
+ if model_source == "openai" or model_source == "mistral":
648
654
  prompt = [
649
655
  {
650
656
  "type": "text",
@@ -674,7 +680,7 @@ def image_features(
674
680
  "image_url": {"url": encoded_image, "detail": "high"},
675
681
  },
676
682
  ]
677
- elif model_source == "Anthropic":
683
+ elif model_source == "anthropic":
678
684
  prompt = [
679
685
  {
680
686
  "type": "text",
@@ -708,7 +714,7 @@ def image_features(
708
714
  }
709
715
  }
710
716
  ]
711
- if model_source == "OpenAI":
717
+ if model_source == "openai":
712
718
  from openai import OpenAI
713
719
  client = OpenAI(api_key=api_key)
714
720
  try:
@@ -726,7 +732,7 @@ def image_features(
726
732
  print("An error occurred: {e}")
727
733
  link1.append("Error processing input: {e}")
728
734
 
729
- elif model_source == "Perplexity":
735
+ elif model_source == "perplexity":
730
736
  from openai import OpenAI
731
737
  client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
732
738
  try:
@@ -744,7 +750,7 @@ def image_features(
744
750
  print("An error occurred: {e}")
745
751
  link1.append("Error processing input: {e}")
746
752
 
747
- elif model_source == "Anthropic":
753
+ elif model_source == "anthropic":
748
754
  import anthropic
749
755
  client = anthropic.Anthropic(api_key=api_key)
750
756
  try:
@@ -763,7 +769,7 @@ def image_features(
763
769
  print("An error occurred: {e}")
764
770
  link1.append("Error processing input: {e}")
765
771
 
766
- elif model_source == "Mistral":
772
+ elif model_source == "mistral":
767
773
  from mistralai import Mistral
768
774
  client = Mistral(api_key=api_key)
769
775
  try:
@@ -22,6 +22,8 @@ def explore_corpus(
22
22
  print(f"Exploring class for question: '{survey_question}'.\n {cat_num * divisions} unique categories to be extracted.")
23
23
  print()
24
24
 
25
+ model_source = model_source.lower() # eliminating case sensitivity
26
+
25
27
  chunk_size = round(max(1, len(survey_input) / divisions),0)
26
28
  chunk_size = int(chunk_size)
27
29
 
@@ -46,7 +48,7 @@ Responses are each separated by a semicolon. \
46
48
  Responses are contained within triple backticks here: ```{survey_participant_chunks}``` \
47
49
  Number your categories from 1 through {cat_num} and be concise with the category labels and provide no description of the categories."""
48
50
 
49
- if model_source == "OpenAI":
51
+ if model_source == "openai":
50
52
  client = OpenAI(api_key=api_key)
51
53
  try:
52
54
  response_obj = client.chat.completions.create(
@@ -123,6 +125,8 @@ def explore_common_categories(
123
125
  print(f"Exploring class for question: '{survey_question}'.\n {cat_num * divisions} unique categories to be extracted and {top_n} to be identified as the most common.")
124
126
  print()
125
127
 
128
+ model_source = model_source.lower() # eliminating case sensitivity
129
+
126
130
  chunk_size = round(max(1, len(survey_input) / divisions),0)
127
131
  chunk_size = int(chunk_size)
128
132
 
@@ -147,7 +151,7 @@ Responses are each separated by a semicolon. \
147
151
  Responses are contained within triple backticks here: ```{survey_participant_chunks}``` \
148
152
  Number your categories from 1 through {cat_num} and be concise with the category labels and provide no description of the categories."""
149
153
 
150
- if model_source == "OpenAI":
154
+ if model_source == "openai":
151
155
  client = OpenAI(api_key=api_key)
152
156
  try:
153
157
  response_obj = client.chat.completions.create(
@@ -198,7 +202,7 @@ Number your categories from 1 through {cat_num} and be concise with the category
198
202
  The categories are contained within triple backticks here: ```{df['Category'].tolist()}``` \
199
203
  Return the top {top_n} categories as a numbered list sorted from the most to least common and keep the categories {specificity}, with no additional text or explanation."""
200
204
 
201
- if model_source == "OpenAI":
205
+ if model_source == "openai":
202
206
  client = OpenAI(api_key=api_key)
203
207
  response_obj = client.chat.completions.create(
204
208
  model=user_model,
@@ -237,6 +241,8 @@ def multi_class(
237
241
  import pandas as pd
238
242
  import regex
239
243
  from tqdm import tqdm
244
+
245
+ model_source = model_source.lower() # eliminating case sensitivity
240
246
 
241
247
  categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
242
248
  cat_num = len(categories)
@@ -265,7 +271,7 @@ Categorize this survey response "{response}" into the following categories that
265
271
  {categories_str} \
266
272
  Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
267
273
  #print(prompt)
268
- if model_source == ("OpenAI"):
274
+ if model_source == ("openai"):
269
275
  from openai import OpenAI
270
276
  client = OpenAI(api_key=api_key)
271
277
  try:
@@ -279,7 +285,7 @@ Provide your work in JSON format where the number belonging to each category is
279
285
  except Exception as e:
280
286
  print(f"An error occurred: {e}")
281
287
  link1.append(f"Error processing input: {e}")
282
- elif model_source == "Perplexity":
288
+ elif model_source == "perplexity":
283
289
  from openai import OpenAI
284
290
  client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
285
291
  try:
@@ -293,7 +299,7 @@ Provide your work in JSON format where the number belonging to each category is
293
299
  except Exception as e:
294
300
  print(f"An error occurred: {e}")
295
301
  link1.append(f"Error processing input: {e}")
296
- elif model_source == "Anthropic":
302
+ elif model_source == "anthropic":
297
303
  import anthropic
298
304
  client = anthropic.Anthropic(api_key=api_key)
299
305
  try:
@@ -309,7 +315,7 @@ Provide your work in JSON format where the number belonging to each category is
309
315
  print(f"An error occurred: {e}")
310
316
  link1.append(f"Error processing input: {e}")
311
317
 
312
- elif model_source == "Google":
318
+ elif model_source == "google":
313
319
  import requests
314
320
  url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
315
321
  try:
@@ -339,7 +345,7 @@ Provide your work in JSON format where the number belonging to each category is
339
345
  print(f"An error occurred: {e}")
340
346
  link1.append(f"Error processing input: {e}")
341
347
 
342
- elif model_source == "Mistral":
348
+ elif model_source == "mistral":
343
349
  from mistralai import Mistral
344
350
  client = Mistral(api_key=api_key)
345
351
  try:
File without changes
File without changes
File without changes
File without changes