cat-llm 0.0.42__tar.gz → 0.0.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.42 → cat_llm-0.0.50}/PKG-INFO +1 -1
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/CERAD_functions.py +20 -8
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/__about__.py +1 -1
- cat_llm-0.0.50/src/catllm/build_web_research.py +169 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/image_functions.py +86 -40
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/text_functions.py +4 -10
- {cat_llm-0.0.42 → cat_llm-0.0.50}/.gitignore +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/LICENSE +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/README.md +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/pyproject.toml +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/__init__.py +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/images/circle.png +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/images/cube.png +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/images/diamond.png +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/images/overlapping_pentagons.png +0 -0
- {cat_llm-0.0.42 → cat_llm-0.0.50}/src/catllm/images/rectangles.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-llm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.50
|
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
|
@@ -21,6 +21,7 @@ Areas for improvement:
|
|
|
21
21
|
10. Test variety: expanding or adding functions to handle score more tests relevant for cogntive assesment, such as the MMSE.
|
|
22
22
|
11. Error handling: improving error handling to better manage unexpected inputs or model failures.
|
|
23
23
|
"""
|
|
24
|
+
|
|
24
25
|
def cerad_drawn_score(
|
|
25
26
|
shape,
|
|
26
27
|
image_input,
|
|
@@ -265,8 +266,11 @@ def cerad_drawn_score(
|
|
|
265
266
|
reply = response_obj.choices[0].message.content
|
|
266
267
|
link1.append(reply)
|
|
267
268
|
except Exception as e:
|
|
268
|
-
|
|
269
|
-
|
|
269
|
+
if "model" in str(e).lower():
|
|
270
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
271
|
+
else:
|
|
272
|
+
print("An error occurred: {e}")
|
|
273
|
+
link1.append("Error processing input: {e}")
|
|
270
274
|
|
|
271
275
|
elif model_source == "Anthropic" and valid_image:
|
|
272
276
|
import anthropic
|
|
@@ -281,8 +285,11 @@ def cerad_drawn_score(
|
|
|
281
285
|
reply = message.content[0].text # Anthropic returns content as list
|
|
282
286
|
link1.append(reply)
|
|
283
287
|
except Exception as e:
|
|
284
|
-
|
|
285
|
-
|
|
288
|
+
if "model" in str(e).lower():
|
|
289
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
290
|
+
else:
|
|
291
|
+
print("An error occurred: {e}")
|
|
292
|
+
link1.append("Error processing input: {e}")
|
|
286
293
|
|
|
287
294
|
elif model_source == "Mistral" and valid_image:
|
|
288
295
|
from mistralai import Mistral
|
|
@@ -299,9 +306,11 @@ def cerad_drawn_score(
|
|
|
299
306
|
reply = response.choices[0].message.content
|
|
300
307
|
link1.append(reply)
|
|
301
308
|
except Exception as e:
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
309
|
+
if "model" in str(e).lower():
|
|
310
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
311
|
+
else:
|
|
312
|
+
print("An error occurred: {e}")
|
|
313
|
+
link1.append("Error processing input: {e}")
|
|
305
314
|
#if no valid image path is provided
|
|
306
315
|
elif valid_image == False:
|
|
307
316
|
reply = "invalid image path"
|
|
@@ -365,7 +374,10 @@ def cerad_drawn_score(
|
|
|
365
374
|
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
366
375
|
|
|
367
376
|
categorized_data = pd.DataFrame({
|
|
368
|
-
'image_input':
|
|
377
|
+
'image_input': (
|
|
378
|
+
image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
|
|
379
|
+
else pd.Series(image_files)
|
|
380
|
+
),
|
|
369
381
|
'link1': pd.Series(link1).reset_index(drop=True),
|
|
370
382
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
371
383
|
})
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
#build dataset classification
|
|
2
|
+
def build_web_research_dataset(
|
|
3
|
+
search_question,
|
|
4
|
+
search_input,
|
|
5
|
+
api_key,
|
|
6
|
+
answer_format = "concise",
|
|
7
|
+
additional_instructions = "",
|
|
8
|
+
categories = ['Answer','URL'],
|
|
9
|
+
user_model="claude-3-7-sonnet-20250219",
|
|
10
|
+
creativity=0,
|
|
11
|
+
safety=False,
|
|
12
|
+
filename="categorized_data.csv",
|
|
13
|
+
save_directory=None,
|
|
14
|
+
model_source="Anthropic",
|
|
15
|
+
time_delay=5
|
|
16
|
+
):
|
|
17
|
+
import os
|
|
18
|
+
import json
|
|
19
|
+
import pandas as pd
|
|
20
|
+
import regex
|
|
21
|
+
from tqdm import tqdm
|
|
22
|
+
import time
|
|
23
|
+
|
|
24
|
+
categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
|
|
25
|
+
print(categories_str)
|
|
26
|
+
cat_num = len(categories)
|
|
27
|
+
category_dict = {str(i+1): "0" for i in range(cat_num)}
|
|
28
|
+
example_JSON = json.dumps(category_dict, indent=4)
|
|
29
|
+
|
|
30
|
+
# ensure number of categories is what user wants
|
|
31
|
+
#print("\nThe information to be extracted:")
|
|
32
|
+
#for i, cat in enumerate(categories, 1):
|
|
33
|
+
#print(f"{i}. {cat}")
|
|
34
|
+
|
|
35
|
+
link1 = []
|
|
36
|
+
extracted_jsons = []
|
|
37
|
+
|
|
38
|
+
for idx, item in enumerate(tqdm(search_input, desc="Building dataset")):
|
|
39
|
+
if idx == 0: # delay the first item just to be safe
|
|
40
|
+
time.sleep(time_delay)
|
|
41
|
+
reply = None
|
|
42
|
+
|
|
43
|
+
if pd.isna(item):
|
|
44
|
+
link1.append("Skipped NaN input")
|
|
45
|
+
default_json = example_JSON
|
|
46
|
+
extracted_jsons.append(default_json)
|
|
47
|
+
#print(f"Skipped NaN input.")
|
|
48
|
+
else:
|
|
49
|
+
prompt = f"""<role>You are a research assistant specializing in finding current, factual information.</role>
|
|
50
|
+
|
|
51
|
+
<task>Find information about {item}'s {search_question}</task>
|
|
52
|
+
|
|
53
|
+
<rules>
|
|
54
|
+
- Search for the most current and authoritative information available
|
|
55
|
+
- Provide your answer as {answer_format}
|
|
56
|
+
- Prioritize official sources when possible
|
|
57
|
+
- If information is not found, state "Information not found"
|
|
58
|
+
- Include exactly one source URL where you found the information
|
|
59
|
+
- Do not include any explanatory text or commentary beyond the JSON
|
|
60
|
+
{additional_instructions}
|
|
61
|
+
</rules>
|
|
62
|
+
|
|
63
|
+
<format>
|
|
64
|
+
Return your response as valid JSON with this exact structure:
|
|
65
|
+
{{
|
|
66
|
+
"answer": "Your factual answer or 'Information not found'",
|
|
67
|
+
"url": "Source URL or 'No source available'"
|
|
68
|
+
}}
|
|
69
|
+
</format>"""
|
|
70
|
+
#print(prompt)
|
|
71
|
+
if model_source == "Anthropic":
|
|
72
|
+
import anthropic
|
|
73
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
74
|
+
try:
|
|
75
|
+
message = client.messages.create(
|
|
76
|
+
model=user_model,
|
|
77
|
+
max_tokens=1024,
|
|
78
|
+
temperature=creativity,
|
|
79
|
+
messages=[{"role": "user", "content": prompt}],
|
|
80
|
+
tools=[{
|
|
81
|
+
"type": "web_search_20250305",
|
|
82
|
+
"name": "web_search"
|
|
83
|
+
}]
|
|
84
|
+
)
|
|
85
|
+
reply = " ".join(
|
|
86
|
+
block.text
|
|
87
|
+
for block in message.content
|
|
88
|
+
if getattr(block, "type", "") == "text"
|
|
89
|
+
).strip()
|
|
90
|
+
link1.append(reply)
|
|
91
|
+
time.sleep(time_delay)
|
|
92
|
+
print(reply)
|
|
93
|
+
|
|
94
|
+
except Exception as e:
|
|
95
|
+
print(f"An error occurred: {e}")
|
|
96
|
+
link1.append(f"Error processing input: {e}")
|
|
97
|
+
time.sleep(time_delay)
|
|
98
|
+
else:
|
|
99
|
+
raise ValueError("Unknown source! Currently this function only supports 'Anthropic' as model_source.")
|
|
100
|
+
# in situation that no JSON is found
|
|
101
|
+
if reply is not None:
|
|
102
|
+
extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
|
|
103
|
+
if extracted_json:
|
|
104
|
+
raw_json = extracted_json[0].strip() # Only strip leading/trailing whitespace
|
|
105
|
+
try:
|
|
106
|
+
# Parse to validate JSON structure
|
|
107
|
+
parsed_obj = json.loads(raw_json)
|
|
108
|
+
# Re-serialize for consistent formatting (optional)
|
|
109
|
+
cleaned_json = json.dumps(parsed_obj)
|
|
110
|
+
extracted_jsons.append(cleaned_json)
|
|
111
|
+
except json.JSONDecodeError as e:
|
|
112
|
+
print(f"JSON parsing error: {e}")
|
|
113
|
+
# Fallback to raw extraction if parsing fails
|
|
114
|
+
extracted_jsons.append(raw_json)
|
|
115
|
+
else:
|
|
116
|
+
# Use consistent schema for errors
|
|
117
|
+
error_message = json.dumps({"answer": "e", "url": "e"})
|
|
118
|
+
extracted_jsons.append(error_message)
|
|
119
|
+
print(error_message)
|
|
120
|
+
else:
|
|
121
|
+
# Handle None reply case
|
|
122
|
+
error_message = json.dumps({"answer": "e", "url": "e"})
|
|
123
|
+
extracted_jsons.append(error_message)
|
|
124
|
+
#print(error_message)
|
|
125
|
+
|
|
126
|
+
# --- Safety Save ---
|
|
127
|
+
if safety:
|
|
128
|
+
# Save progress so far
|
|
129
|
+
temp_df = pd.DataFrame({
|
|
130
|
+
'survey_response': search_input[:idx+1],
|
|
131
|
+
'link1': link1,
|
|
132
|
+
'json': extracted_jsons
|
|
133
|
+
})
|
|
134
|
+
# Normalize processed jsons so far
|
|
135
|
+
normalized_data_list = []
|
|
136
|
+
for json_str in extracted_jsons:
|
|
137
|
+
try:
|
|
138
|
+
parsed_obj = json.loads(json_str)
|
|
139
|
+
normalized_data_list.append(pd.json_normalize(parsed_obj))
|
|
140
|
+
except json.JSONDecodeError:
|
|
141
|
+
normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
|
|
142
|
+
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
143
|
+
temp_df = pd.concat([temp_df, normalized_data], axis=1)
|
|
144
|
+
# Save to CSV
|
|
145
|
+
if save_directory is None:
|
|
146
|
+
save_directory = os.getcwd()
|
|
147
|
+
temp_df.to_csv(os.path.join(save_directory, filename), index=False)
|
|
148
|
+
|
|
149
|
+
# --- Final DataFrame ---
|
|
150
|
+
normalized_data_list = []
|
|
151
|
+
for json_str in extracted_jsons:
|
|
152
|
+
try:
|
|
153
|
+
parsed_obj = json.loads(json_str)
|
|
154
|
+
normalized_data_list.append(pd.json_normalize(parsed_obj))
|
|
155
|
+
except json.JSONDecodeError:
|
|
156
|
+
normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
|
|
157
|
+
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
158
|
+
|
|
159
|
+
categorized_data = pd.DataFrame({
|
|
160
|
+
'survey_response': (
|
|
161
|
+
search_input.reset_index(drop=True) if isinstance(search_input, (pd.DataFrame, pd.Series))
|
|
162
|
+
else pd.Series(search_input)
|
|
163
|
+
),
|
|
164
|
+
'link1': pd.Series(link1).reset_index(drop=True),
|
|
165
|
+
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
166
|
+
})
|
|
167
|
+
categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
|
|
168
|
+
|
|
169
|
+
return categorized_data
|
|
@@ -148,8 +148,11 @@ def image_multi_class(
|
|
|
148
148
|
reply = response_obj.choices[0].message.content
|
|
149
149
|
link1.append(reply)
|
|
150
150
|
except Exception as e:
|
|
151
|
-
|
|
152
|
-
|
|
151
|
+
if "model" in str(e).lower():
|
|
152
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
153
|
+
else:
|
|
154
|
+
print("An error occurred: {e}")
|
|
155
|
+
link1.append("Error processing input: {e}")
|
|
153
156
|
|
|
154
157
|
elif model_source == "Anthropic":
|
|
155
158
|
import anthropic
|
|
@@ -165,8 +168,11 @@ def image_multi_class(
|
|
|
165
168
|
reply = message.content[0].text
|
|
166
169
|
link1.append(reply)
|
|
167
170
|
except Exception as e:
|
|
168
|
-
|
|
169
|
-
|
|
171
|
+
if "model" in str(e).lower():
|
|
172
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
173
|
+
else:
|
|
174
|
+
print("An error occurred: {e}")
|
|
175
|
+
link1.append("Error processing input: {e}")
|
|
170
176
|
|
|
171
177
|
elif model_source == "Mistral":
|
|
172
178
|
from mistralai import Mistral
|
|
@@ -182,8 +188,11 @@ def image_multi_class(
|
|
|
182
188
|
reply = response.choices[0].message.content
|
|
183
189
|
link1.append(reply)
|
|
184
190
|
except Exception as e:
|
|
185
|
-
|
|
186
|
-
|
|
191
|
+
if "model" in str(e).lower():
|
|
192
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
193
|
+
else:
|
|
194
|
+
print("An error occurred: {e}")
|
|
195
|
+
link1.append("Error processing input: {e}")
|
|
187
196
|
#if no valid image path is provided
|
|
188
197
|
elif valid_image == False:
|
|
189
198
|
reply = "invalid image path"
|
|
@@ -243,9 +252,11 @@ def image_multi_class(
|
|
|
243
252
|
except json.JSONDecodeError:
|
|
244
253
|
normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
|
|
245
254
|
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
246
|
-
|
|
247
255
|
categorized_data = pd.DataFrame({
|
|
248
|
-
'image_input':
|
|
256
|
+
'image_input': (
|
|
257
|
+
image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
|
|
258
|
+
else pd.Series(image_files)
|
|
259
|
+
),
|
|
249
260
|
'link1': pd.Series(link1).reset_index(drop=True),
|
|
250
261
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
251
262
|
})
|
|
@@ -436,8 +447,11 @@ def image_score_drawing(
|
|
|
436
447
|
reply = response_obj.choices[0].message.content
|
|
437
448
|
link1.append(reply)
|
|
438
449
|
except Exception as e:
|
|
439
|
-
|
|
440
|
-
|
|
450
|
+
if "model" in str(e).lower():
|
|
451
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
452
|
+
else:
|
|
453
|
+
print("An error occurred: {e}")
|
|
454
|
+
link1.append("Error processing input: {e}")
|
|
441
455
|
|
|
442
456
|
elif model_source == "Anthropic":
|
|
443
457
|
import anthropic
|
|
@@ -452,8 +466,11 @@ def image_score_drawing(
|
|
|
452
466
|
reply = message.content[0].text # Anthropic returns content as list
|
|
453
467
|
link1.append(reply)
|
|
454
468
|
except Exception as e:
|
|
455
|
-
|
|
456
|
-
|
|
469
|
+
if "model" in str(e).lower():
|
|
470
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
471
|
+
else:
|
|
472
|
+
print("An error occurred: {e}")
|
|
473
|
+
link1.append("Error processing input: {e}")
|
|
457
474
|
|
|
458
475
|
elif model_source == "Mistral":
|
|
459
476
|
from mistralai import Mistral
|
|
@@ -469,8 +486,11 @@ def image_score_drawing(
|
|
|
469
486
|
reply = response.choices[0].message.content
|
|
470
487
|
link1.append(reply)
|
|
471
488
|
except Exception as e:
|
|
472
|
-
|
|
473
|
-
|
|
489
|
+
if "model" in str(e).lower():
|
|
490
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
491
|
+
else:
|
|
492
|
+
print("An error occurred: {e}")
|
|
493
|
+
link1.append("Error processing input: {e}")
|
|
474
494
|
#if no valid image path is provided
|
|
475
495
|
elif valid_image == False:
|
|
476
496
|
reply = "invalid image path"
|
|
@@ -531,7 +551,10 @@ def image_score_drawing(
|
|
|
531
551
|
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
532
552
|
|
|
533
553
|
categorized_data = pd.DataFrame({
|
|
534
|
-
'image_input':
|
|
554
|
+
'image_input': (
|
|
555
|
+
image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
|
|
556
|
+
else pd.Series(image_files)
|
|
557
|
+
),
|
|
535
558
|
'link1': pd.Series(link1).reset_index(drop=True),
|
|
536
559
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
537
560
|
})
|
|
@@ -567,10 +590,6 @@ def image_features(
|
|
|
567
590
|
import base64
|
|
568
591
|
from pathlib import Path
|
|
569
592
|
|
|
570
|
-
if save_directory is not None and not os.path.isdir(save_directory):
|
|
571
|
-
# Directory doesn't exist - raise an exception to halt execution
|
|
572
|
-
raise FileNotFoundError(f"Directory {save_directory} doesn't exist")
|
|
573
|
-
|
|
574
593
|
image_extensions = [
|
|
575
594
|
'*.png', '*.jpg', '*.jpeg',
|
|
576
595
|
'*.gif', '*.webp', '*.svg', '*.svgz', '*.avif', '*.apng',
|
|
@@ -595,26 +614,35 @@ def image_features(
|
|
|
595
614
|
cat_num = len(features_to_extract)
|
|
596
615
|
category_dict = {str(i+1): "0" for i in range(cat_num)}
|
|
597
616
|
example_JSON = json.dumps(category_dict, indent=4)
|
|
598
|
-
|
|
599
|
-
# ensure number of categories is what user wants
|
|
600
|
-
print("\nThe image features to be extracted are:")
|
|
601
|
-
for i, cat in enumerate(features_to_extract, 1):
|
|
602
|
-
print(f"{i}. {cat}")
|
|
603
617
|
|
|
604
618
|
link1 = []
|
|
605
619
|
extracted_jsons = []
|
|
606
620
|
|
|
607
|
-
for i, img_path in enumerate(
|
|
608
|
-
|
|
621
|
+
for i, img_path in enumerate(tqdm(image_files, desc="Scoring images"), start=0):
|
|
622
|
+
# Check validity first
|
|
609
623
|
if img_path is None or not os.path.exists(img_path):
|
|
610
624
|
link1.append("Skipped NaN input or invalid path")
|
|
611
625
|
extracted_jsons.append("""{"no_valid_image": 1}""")
|
|
612
626
|
continue # Skip the rest of the loop iteration
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
627
|
+
|
|
628
|
+
# Only open the file if path is valid
|
|
629
|
+
if os.path.isdir(img_path):
|
|
630
|
+
encoded = "Not a Valid Image, contains file path"
|
|
631
|
+
else:
|
|
632
|
+
try:
|
|
633
|
+
with open(img_path, "rb") as f:
|
|
634
|
+
encoded = base64.b64encode(f.read()).decode("utf-8")
|
|
635
|
+
except Exception as e:
|
|
636
|
+
encoded = f"Error: {str(e)}"
|
|
637
|
+
# Handle extension safely
|
|
638
|
+
if encoded.startswith("Error:") or encoded == "Not a Valid Image, contains file path":
|
|
639
|
+
encoded_image = encoded
|
|
640
|
+
valid_image = False
|
|
641
|
+
|
|
642
|
+
else:
|
|
643
|
+
ext = Path(img_path).suffix.lstrip(".").lower()
|
|
644
|
+
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
645
|
+
valid_image = True
|
|
618
646
|
|
|
619
647
|
if model_source == "OpenAI" or model_source == "Mistral":
|
|
620
648
|
prompt = [
|
|
@@ -692,8 +720,11 @@ def image_features(
|
|
|
692
720
|
reply = response_obj.choices[0].message.content
|
|
693
721
|
link1.append(reply)
|
|
694
722
|
except Exception as e:
|
|
695
|
-
|
|
696
|
-
|
|
723
|
+
if "model" in str(e).lower():
|
|
724
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
725
|
+
else:
|
|
726
|
+
print("An error occurred: {e}")
|
|
727
|
+
link1.append("Error processing input: {e}")
|
|
697
728
|
|
|
698
729
|
elif model_source == "Perplexity":
|
|
699
730
|
from openai import OpenAI
|
|
@@ -707,8 +738,12 @@ def image_features(
|
|
|
707
738
|
reply = response_obj.choices[0].message.content
|
|
708
739
|
link1.append(reply)
|
|
709
740
|
except Exception as e:
|
|
710
|
-
|
|
711
|
-
|
|
741
|
+
if "model" in str(e).lower():
|
|
742
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
743
|
+
else:
|
|
744
|
+
print("An error occurred: {e}")
|
|
745
|
+
link1.append("Error processing input: {e}")
|
|
746
|
+
|
|
712
747
|
elif model_source == "Anthropic":
|
|
713
748
|
import anthropic
|
|
714
749
|
client = anthropic.Anthropic(api_key=api_key)
|
|
@@ -722,8 +757,12 @@ def image_features(
|
|
|
722
757
|
reply = message.content[0].text # Anthropic returns content as list
|
|
723
758
|
link1.append(reply)
|
|
724
759
|
except Exception as e:
|
|
725
|
-
|
|
726
|
-
|
|
760
|
+
if "model" in str(e).lower():
|
|
761
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
762
|
+
else:
|
|
763
|
+
print("An error occurred: {e}")
|
|
764
|
+
link1.append("Error processing input: {e}")
|
|
765
|
+
|
|
727
766
|
elif model_source == "Mistral":
|
|
728
767
|
from mistralai import Mistral
|
|
729
768
|
client = Mistral(api_key=api_key)
|
|
@@ -738,8 +777,12 @@ def image_features(
|
|
|
738
777
|
reply = response.choices[0].message.content
|
|
739
778
|
link1.append(reply)
|
|
740
779
|
except Exception as e:
|
|
741
|
-
|
|
742
|
-
|
|
780
|
+
if "model" in str(e).lower():
|
|
781
|
+
raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
|
|
782
|
+
else:
|
|
783
|
+
print("An error occurred: {e}")
|
|
784
|
+
link1.append("Error processing input: {e}")
|
|
785
|
+
|
|
743
786
|
elif valid_image == False:
|
|
744
787
|
print("Skipped NaN input or invalid path")
|
|
745
788
|
reply = None
|
|
@@ -797,7 +840,10 @@ def image_features(
|
|
|
797
840
|
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
798
841
|
|
|
799
842
|
categorized_data = pd.DataFrame({
|
|
800
|
-
'image_input':
|
|
843
|
+
'image_input': (
|
|
844
|
+
image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
|
|
845
|
+
else pd.Series(image_files)
|
|
846
|
+
),
|
|
801
847
|
'link1': pd.Series(link1).reset_index(drop=True),
|
|
802
848
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
803
849
|
})
|
|
@@ -373,20 +373,14 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
373
373
|
except json.JSONDecodeError:
|
|
374
374
|
normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
|
|
375
375
|
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
376
|
-
|
|
377
376
|
categorized_data = pd.DataFrame({
|
|
378
|
-
'
|
|
377
|
+
'image_input': (
|
|
378
|
+
survey_input.reset_index(drop=True) if isinstance(survey_input, (pd.DataFrame, pd.Series))
|
|
379
|
+
else pd.Series(survey_input)
|
|
380
|
+
),
|
|
379
381
|
'link1': pd.Series(link1).reset_index(drop=True),
|
|
380
382
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
381
383
|
})
|
|
382
384
|
categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
|
|
383
385
|
|
|
384
|
-
if columns != "numbered": #if user wants text columns
|
|
385
|
-
categorized_data.columns = list(categorized_data.columns[:3]) + categories[:len(categorized_data.columns) - 3]
|
|
386
|
-
|
|
387
|
-
if to_csv:
|
|
388
|
-
if save_directory is None:
|
|
389
|
-
save_directory = os.getcwd()
|
|
390
|
-
categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
|
|
391
|
-
|
|
392
386
|
return categorized_data
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|