cat-llm 0.0.62__py3-none-any.whl → 0.0.63__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.62.dist-info → cat_llm-0.0.63.dist-info}/METADATA +1 -1
- {cat_llm-0.0.62.dist-info → cat_llm-0.0.63.dist-info}/RECORD +8 -8
- catllm/CERAD_functions.py +8 -6
- catllm/__about__.py +1 -1
- catllm/image_functions.py +22 -16
- catllm/text_functions.py +14 -8
- {cat_llm-0.0.62.dist-info → cat_llm-0.0.63.dist-info}/WHEEL +0 -0
- {cat_llm-0.0.62.dist-info → cat_llm-0.0.63.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-llm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.63
|
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
catllm/CERAD_functions.py,sha256=
|
|
2
|
-
catllm/__about__.py,sha256=
|
|
1
|
+
catllm/CERAD_functions.py,sha256=q4HbP5e2Yu8NnZZ-2eX4sImyj6u3i8xWcq0pYU81iis,22676
|
|
2
|
+
catllm/__about__.py,sha256=NCCTWpJ4xKqiFOIwB1tnv4BLyeGVzeNfFegpbJQicgw,408
|
|
3
3
|
catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
|
|
4
4
|
catllm/build_web_research.py,sha256=nAKfkg7lihjXrYrLvltsKCvpb5zRFYpNp95A-0zpDb8,9159
|
|
5
|
-
catllm/image_functions.py,sha256=
|
|
6
|
-
catllm/text_functions.py,sha256=
|
|
5
|
+
catllm/image_functions.py,sha256=8_FftRU285x1HT-AgNkaobefQVD-5q7ZY_t7JFdL3Sg,36177
|
|
6
|
+
catllm/text_functions.py,sha256=Jf51lNaFtcS2QGnNLkhM8rFVJSD4tN0Bm_VfELvb47g,18686
|
|
7
7
|
catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
|
|
8
8
|
catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
|
|
9
9
|
catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
|
|
10
10
|
catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
|
|
11
11
|
catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
|
|
12
|
-
cat_llm-0.0.
|
|
13
|
-
cat_llm-0.0.
|
|
14
|
-
cat_llm-0.0.
|
|
15
|
-
cat_llm-0.0.
|
|
12
|
+
cat_llm-0.0.63.dist-info/METADATA,sha256=y9kIgflPVthWZEiNVkabYIJ8p82IW-pO4FliuS5T8AE,22395
|
|
13
|
+
cat_llm-0.0.63.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
+
cat_llm-0.0.63.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
|
|
15
|
+
cat_llm-0.0.63.dist-info/RECORD,,
|
catllm/CERAD_functions.py
CHANGED
|
@@ -44,6 +44,8 @@ def cerad_drawn_score(
|
|
|
44
44
|
from pathlib import Path
|
|
45
45
|
import pkg_resources
|
|
46
46
|
|
|
47
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
48
|
+
|
|
47
49
|
shape = shape.lower()
|
|
48
50
|
shape = "rectangles" if shape == "overlapping rectangles" else shape
|
|
49
51
|
if shape == "circle":
|
|
@@ -155,7 +157,7 @@ def cerad_drawn_score(
|
|
|
155
157
|
else:
|
|
156
158
|
reference_text = f"Image is expected to show within it a drawing of a {shape}.\n\n"
|
|
157
159
|
|
|
158
|
-
if model_source == "
|
|
160
|
+
if model_source == "openai" and valid_image:
|
|
159
161
|
prompt = [
|
|
160
162
|
{
|
|
161
163
|
"type": "text",
|
|
@@ -185,7 +187,7 @@ def cerad_drawn_score(
|
|
|
185
187
|
"image_url": {"url": encoded_image, "detail": "high"}
|
|
186
188
|
})
|
|
187
189
|
|
|
188
|
-
elif model_source == "
|
|
190
|
+
elif model_source == "anthropic" and valid_image:
|
|
189
191
|
prompt = [
|
|
190
192
|
{
|
|
191
193
|
"type": "text",
|
|
@@ -225,7 +227,7 @@ def cerad_drawn_score(
|
|
|
225
227
|
}
|
|
226
228
|
)
|
|
227
229
|
|
|
228
|
-
elif model_source == "
|
|
230
|
+
elif model_source == "mistral" and valid_image:
|
|
229
231
|
prompt = [
|
|
230
232
|
{
|
|
231
233
|
"type": "text",
|
|
@@ -254,7 +256,7 @@ def cerad_drawn_score(
|
|
|
254
256
|
"image_url": f"data:image/{ext};base64,{encoded_image}"
|
|
255
257
|
})
|
|
256
258
|
|
|
257
|
-
if model_source == "
|
|
259
|
+
if model_source == "openai" and valid_image:
|
|
258
260
|
from openai import OpenAI
|
|
259
261
|
client = OpenAI(api_key=api_key)
|
|
260
262
|
try:
|
|
@@ -272,7 +274,7 @@ def cerad_drawn_score(
|
|
|
272
274
|
print("An error occurred: {e}")
|
|
273
275
|
link1.append("Error processing input: {e}")
|
|
274
276
|
|
|
275
|
-
elif model_source == "
|
|
277
|
+
elif model_source == "anthropic" and valid_image:
|
|
276
278
|
import anthropic
|
|
277
279
|
client = anthropic.Anthropic(api_key=api_key)
|
|
278
280
|
try:
|
|
@@ -291,7 +293,7 @@ def cerad_drawn_score(
|
|
|
291
293
|
print("An error occurred: {e}")
|
|
292
294
|
link1.append("Error processing input: {e}")
|
|
293
295
|
|
|
294
|
-
elif model_source == "
|
|
296
|
+
elif model_source == "mistral" and valid_image:
|
|
295
297
|
from mistralai import Mistral
|
|
296
298
|
reply = None
|
|
297
299
|
client = Mistral(api_key=api_key)
|
catllm/__about__.py
CHANGED
catllm/image_functions.py
CHANGED
|
@@ -33,6 +33,8 @@ def image_multi_class(
|
|
|
33
33
|
'*.psd'
|
|
34
34
|
]
|
|
35
35
|
|
|
36
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
37
|
+
|
|
36
38
|
if not isinstance(image_input, list):
|
|
37
39
|
# If image_input is a filepath (string)
|
|
38
40
|
image_files = []
|
|
@@ -86,7 +88,7 @@ def image_multi_class(
|
|
|
86
88
|
|
|
87
89
|
# Handle extension safely
|
|
88
90
|
ext = Path(img_path).suffix.lstrip(".").lower()
|
|
89
|
-
if model_source == "
|
|
91
|
+
if model_source == "openai" or model_source == "mistral":
|
|
90
92
|
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
91
93
|
prompt = [
|
|
92
94
|
{
|
|
@@ -110,7 +112,7 @@ def image_multi_class(
|
|
|
110
112
|
},
|
|
111
113
|
]
|
|
112
114
|
|
|
113
|
-
elif model_source == "
|
|
115
|
+
elif model_source == "anthropic":
|
|
114
116
|
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
115
117
|
prompt = [
|
|
116
118
|
{"type": "text",
|
|
@@ -136,7 +138,7 @@ def image_multi_class(
|
|
|
136
138
|
}
|
|
137
139
|
}
|
|
138
140
|
]
|
|
139
|
-
if model_source == "
|
|
141
|
+
if model_source == "openAI":
|
|
140
142
|
from openai import OpenAI
|
|
141
143
|
client = OpenAI(api_key=api_key)
|
|
142
144
|
try:
|
|
@@ -154,7 +156,7 @@ def image_multi_class(
|
|
|
154
156
|
print("An error occurred: {e}")
|
|
155
157
|
link1.append("Error processing input: {e}")
|
|
156
158
|
|
|
157
|
-
elif model_source == "
|
|
159
|
+
elif model_source == "anthropic":
|
|
158
160
|
import anthropic
|
|
159
161
|
reply = None
|
|
160
162
|
client = anthropic.Anthropic(api_key=api_key)
|
|
@@ -174,7 +176,7 @@ def image_multi_class(
|
|
|
174
176
|
print("An error occurred: {e}")
|
|
175
177
|
link1.append("Error processing input: {e}")
|
|
176
178
|
|
|
177
|
-
elif model_source == "
|
|
179
|
+
elif model_source == "mistral":
|
|
178
180
|
from mistralai import Mistral
|
|
179
181
|
client = Mistral(api_key=api_key)
|
|
180
182
|
try:
|
|
@@ -305,6 +307,8 @@ def image_score_drawing(
|
|
|
305
307
|
'*.psd'
|
|
306
308
|
]
|
|
307
309
|
|
|
310
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
311
|
+
|
|
308
312
|
if not isinstance(image_input, list):
|
|
309
313
|
# If image_input is a filepath (string)
|
|
310
314
|
image_files = []
|
|
@@ -354,7 +358,7 @@ def image_score_drawing(
|
|
|
354
358
|
ext = Path(img_path).suffix.lstrip(".").lower()
|
|
355
359
|
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
356
360
|
|
|
357
|
-
if model_source == "
|
|
361
|
+
if model_source == "openai" or model_source == "mistral":
|
|
358
362
|
prompt = [
|
|
359
363
|
{
|
|
360
364
|
"type": "text",
|
|
@@ -390,7 +394,7 @@ def image_score_drawing(
|
|
|
390
394
|
}
|
|
391
395
|
]
|
|
392
396
|
|
|
393
|
-
elif model_source == "
|
|
397
|
+
elif model_source == "anthropic": # Changed to elif
|
|
394
398
|
prompt = [
|
|
395
399
|
{
|
|
396
400
|
"type": "text",
|
|
@@ -435,7 +439,7 @@ def image_score_drawing(
|
|
|
435
439
|
]
|
|
436
440
|
|
|
437
441
|
|
|
438
|
-
if model_source == "
|
|
442
|
+
if model_source == "openai":
|
|
439
443
|
from openai import OpenAI
|
|
440
444
|
client = OpenAI(api_key=api_key)
|
|
441
445
|
try:
|
|
@@ -453,7 +457,7 @@ def image_score_drawing(
|
|
|
453
457
|
print("An error occurred: {e}")
|
|
454
458
|
link1.append("Error processing input: {e}")
|
|
455
459
|
|
|
456
|
-
elif model_source == "
|
|
460
|
+
elif model_source == "anthropic":
|
|
457
461
|
import anthropic
|
|
458
462
|
client = anthropic.Anthropic(api_key=api_key)
|
|
459
463
|
try:
|
|
@@ -472,7 +476,7 @@ def image_score_drawing(
|
|
|
472
476
|
print("An error occurred: {e}")
|
|
473
477
|
link1.append("Error processing input: {e}")
|
|
474
478
|
|
|
475
|
-
elif model_source == "
|
|
479
|
+
elif model_source == "mistral":
|
|
476
480
|
from mistralai import Mistral
|
|
477
481
|
client = Mistral(api_key=api_key)
|
|
478
482
|
try:
|
|
@@ -598,6 +602,8 @@ def image_features(
|
|
|
598
602
|
'*.psd'
|
|
599
603
|
]
|
|
600
604
|
|
|
605
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
606
|
+
|
|
601
607
|
if not isinstance(image_input, list):
|
|
602
608
|
# If image_input is a filepath (string)
|
|
603
609
|
image_files = []
|
|
@@ -644,7 +650,7 @@ def image_features(
|
|
|
644
650
|
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
645
651
|
valid_image = True
|
|
646
652
|
|
|
647
|
-
if model_source == "
|
|
653
|
+
if model_source == "openai" or model_source == "mistral":
|
|
648
654
|
prompt = [
|
|
649
655
|
{
|
|
650
656
|
"type": "text",
|
|
@@ -674,7 +680,7 @@ def image_features(
|
|
|
674
680
|
"image_url": {"url": encoded_image, "detail": "high"},
|
|
675
681
|
},
|
|
676
682
|
]
|
|
677
|
-
elif model_source == "
|
|
683
|
+
elif model_source == "anthropic":
|
|
678
684
|
prompt = [
|
|
679
685
|
{
|
|
680
686
|
"type": "text",
|
|
@@ -708,7 +714,7 @@ def image_features(
|
|
|
708
714
|
}
|
|
709
715
|
}
|
|
710
716
|
]
|
|
711
|
-
if model_source == "
|
|
717
|
+
if model_source == "openai":
|
|
712
718
|
from openai import OpenAI
|
|
713
719
|
client = OpenAI(api_key=api_key)
|
|
714
720
|
try:
|
|
@@ -726,7 +732,7 @@ def image_features(
|
|
|
726
732
|
print("An error occurred: {e}")
|
|
727
733
|
link1.append("Error processing input: {e}")
|
|
728
734
|
|
|
729
|
-
elif model_source == "
|
|
735
|
+
elif model_source == "perplexity":
|
|
730
736
|
from openai import OpenAI
|
|
731
737
|
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
|
732
738
|
try:
|
|
@@ -744,7 +750,7 @@ def image_features(
|
|
|
744
750
|
print("An error occurred: {e}")
|
|
745
751
|
link1.append("Error processing input: {e}")
|
|
746
752
|
|
|
747
|
-
elif model_source == "
|
|
753
|
+
elif model_source == "anthropic":
|
|
748
754
|
import anthropic
|
|
749
755
|
client = anthropic.Anthropic(api_key=api_key)
|
|
750
756
|
try:
|
|
@@ -763,7 +769,7 @@ def image_features(
|
|
|
763
769
|
print("An error occurred: {e}")
|
|
764
770
|
link1.append("Error processing input: {e}")
|
|
765
771
|
|
|
766
|
-
elif model_source == "
|
|
772
|
+
elif model_source == "mistral":
|
|
767
773
|
from mistralai import Mistral
|
|
768
774
|
client = Mistral(api_key=api_key)
|
|
769
775
|
try:
|
catllm/text_functions.py
CHANGED
|
@@ -22,6 +22,8 @@ def explore_corpus(
|
|
|
22
22
|
print(f"Exploring class for question: '{survey_question}'.\n {cat_num * divisions} unique categories to be extracted.")
|
|
23
23
|
print()
|
|
24
24
|
|
|
25
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
26
|
+
|
|
25
27
|
chunk_size = round(max(1, len(survey_input) / divisions),0)
|
|
26
28
|
chunk_size = int(chunk_size)
|
|
27
29
|
|
|
@@ -46,7 +48,7 @@ Responses are each separated by a semicolon. \
|
|
|
46
48
|
Responses are contained within triple backticks here: ```{survey_participant_chunks}``` \
|
|
47
49
|
Number your categories from 1 through {cat_num} and be concise with the category labels and provide no description of the categories."""
|
|
48
50
|
|
|
49
|
-
if model_source == "
|
|
51
|
+
if model_source == "openai":
|
|
50
52
|
client = OpenAI(api_key=api_key)
|
|
51
53
|
try:
|
|
52
54
|
response_obj = client.chat.completions.create(
|
|
@@ -123,6 +125,8 @@ def explore_common_categories(
|
|
|
123
125
|
print(f"Exploring class for question: '{survey_question}'.\n {cat_num * divisions} unique categories to be extracted and {top_n} to be identified as the most common.")
|
|
124
126
|
print()
|
|
125
127
|
|
|
128
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
129
|
+
|
|
126
130
|
chunk_size = round(max(1, len(survey_input) / divisions),0)
|
|
127
131
|
chunk_size = int(chunk_size)
|
|
128
132
|
|
|
@@ -147,7 +151,7 @@ Responses are each separated by a semicolon. \
|
|
|
147
151
|
Responses are contained within triple backticks here: ```{survey_participant_chunks}``` \
|
|
148
152
|
Number your categories from 1 through {cat_num} and be concise with the category labels and provide no description of the categories."""
|
|
149
153
|
|
|
150
|
-
if model_source == "
|
|
154
|
+
if model_source == "openai":
|
|
151
155
|
client = OpenAI(api_key=api_key)
|
|
152
156
|
try:
|
|
153
157
|
response_obj = client.chat.completions.create(
|
|
@@ -198,7 +202,7 @@ Number your categories from 1 through {cat_num} and be concise with the category
|
|
|
198
202
|
The categories are contained within triple backticks here: ```{df['Category'].tolist()}``` \
|
|
199
203
|
Return the top {top_n} categories as a numbered list sorted from the most to least common and keep the categories {specificity}, with no additional text or explanation."""
|
|
200
204
|
|
|
201
|
-
if model_source == "
|
|
205
|
+
if model_source == "openai":
|
|
202
206
|
client = OpenAI(api_key=api_key)
|
|
203
207
|
response_obj = client.chat.completions.create(
|
|
204
208
|
model=user_model,
|
|
@@ -237,6 +241,8 @@ def multi_class(
|
|
|
237
241
|
import pandas as pd
|
|
238
242
|
import regex
|
|
239
243
|
from tqdm import tqdm
|
|
244
|
+
|
|
245
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
240
246
|
|
|
241
247
|
categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
|
|
242
248
|
cat_num = len(categories)
|
|
@@ -265,7 +271,7 @@ Categorize this survey response "{response}" into the following categories that
|
|
|
265
271
|
{categories_str} \
|
|
266
272
|
Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
|
|
267
273
|
#print(prompt)
|
|
268
|
-
if model_source == ("
|
|
274
|
+
if model_source == ("openai"):
|
|
269
275
|
from openai import OpenAI
|
|
270
276
|
client = OpenAI(api_key=api_key)
|
|
271
277
|
try:
|
|
@@ -279,7 +285,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
279
285
|
except Exception as e:
|
|
280
286
|
print(f"An error occurred: {e}")
|
|
281
287
|
link1.append(f"Error processing input: {e}")
|
|
282
|
-
elif model_source == "
|
|
288
|
+
elif model_source == "perplexity":
|
|
283
289
|
from openai import OpenAI
|
|
284
290
|
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
|
285
291
|
try:
|
|
@@ -293,7 +299,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
293
299
|
except Exception as e:
|
|
294
300
|
print(f"An error occurred: {e}")
|
|
295
301
|
link1.append(f"Error processing input: {e}")
|
|
296
|
-
elif model_source == "
|
|
302
|
+
elif model_source == "anthropic":
|
|
297
303
|
import anthropic
|
|
298
304
|
client = anthropic.Anthropic(api_key=api_key)
|
|
299
305
|
try:
|
|
@@ -309,7 +315,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
309
315
|
print(f"An error occurred: {e}")
|
|
310
316
|
link1.append(f"Error processing input: {e}")
|
|
311
317
|
|
|
312
|
-
elif model_source == "
|
|
318
|
+
elif model_source == "google":
|
|
313
319
|
import requests
|
|
314
320
|
url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
|
|
315
321
|
try:
|
|
@@ -339,7 +345,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
339
345
|
print(f"An error occurred: {e}")
|
|
340
346
|
link1.append(f"Error processing input: {e}")
|
|
341
347
|
|
|
342
|
-
elif model_source == "
|
|
348
|
+
elif model_source == "mistral":
|
|
343
349
|
from mistralai import Mistral
|
|
344
350
|
client = Mistral(api_key=api_key)
|
|
345
351
|
try:
|
|
File without changes
|
|
File without changes
|