cat-llm 0.0.25__py3-none-any.whl → 0.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,689 @@
1
+ # image multi-class (binary) function
2
+ def extract_image_multi_class(
3
+ image_description,
4
+ image_input,
5
+ categories,
6
+ api_key,
7
+ columns="numbered",
8
+ user_model="gpt-4o",
9
+ creativity=0,
10
+ to_csv=False,
11
+ safety=False,
12
+ filename="categorized_data.csv",
13
+ save_directory=None,
14
+ model_source="OpenAI"
15
+ ):
16
+ import os
17
+ import json
18
+ import pandas as pd
19
+ import regex
20
+ from tqdm import tqdm
21
+ import glob
22
+ import base64
23
+ from pathlib import Path
24
+
25
+ if save_directory is not None and not os.path.isdir(save_directory):
26
+ # Directory doesn't exist - raise an exception to halt execution
27
+ raise FileNotFoundError(f"Directory {save_directory} doesn't exist")
28
+
29
+ image_extensions = [
30
+ '*.png', '*.jpg', '*.jpeg',
31
+ '*.gif', '*.webp', '*.svg', '*.svgz', '*.avif', '*.apng',
32
+ '*.tif', '*.tiff', '*.bmp',
33
+ '*.heif', '*.heic', '*.ico',
34
+ '*.psd'
35
+ ]
36
+
37
+ if not isinstance(image_input, list):
38
+ # If image_input is a filepath (string)
39
+ image_files = []
40
+ for ext in image_extensions:
41
+ image_files.extend(glob.glob(os.path.join(image_input, ext)))
42
+
43
+ print(f"Found {len(image_files)} images.")
44
+ else:
45
+ # If image_files is already a list
46
+ image_files = image_input
47
+ print(f"Provided a list of {len(image_input)} images.")
48
+
49
+ categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
50
+ cat_num = len(categories)
51
+ category_dict = {str(i+1): "0" for i in range(cat_num)}
52
+ example_JSON = json.dumps(category_dict, indent=4)
53
+
54
+ # ensure number of categories is what user wants
55
+ print("Categories to classify:")
56
+ for i, cat in enumerate(categories, 1):
57
+ print(f"{i}. {cat}")
58
+
59
+ link1 = []
60
+ extracted_jsons = []
61
+
62
+ for i, img_path in enumerate(tqdm(image_files, desc="Categorising images"), start=0):
63
+ # Check validity first
64
+ if img_path is None or not os.path.exists(img_path):
65
+ link1.append("Skipped NaN input or invalid path")
66
+ extracted_jsons.append("""{"no_valid_image": 1}""")
67
+ continue # Skip the rest of the loop iteration
68
+
69
+ # Only open the file if path is valid
70
+ with open(img_path, "rb") as f:
71
+ encoded = base64.b64encode(f.read()).decode("utf-8")
72
+
73
+ # Handle extension safely
74
+ ext = Path(img_path).suffix.lstrip(".").lower()
75
+ encoded_image = f"data:image/{ext};base64,{encoded}"
76
+
77
+ prompt = [
78
+ {
79
+ "type": "text",
80
+ "text": (
81
+ f"You are an image-tagging assistant.\n"
82
+ f"Task ► Examine the attached image and decide, **for each category below**, "
83
+ f"whether it is PRESENT (1) or NOT PRESENT (0).\n\n"
84
+ f"Image is expected to show: {image_description}\n\n"
85
+ f"Categories:\n{categories_str}\n\n"
86
+ f"Output format ► Respond with **only** a JSON object whose keys are the "
87
+ f"quoted category numbers ('1', '2', …) and whose values are 1 or 0. "
88
+ f"No additional keys, comments, or text.\n\n"
89
+ f"Example (three categories):\n"
90
+ f"{example_JSON}"
91
+ ),
92
+ },
93
+ {
94
+ "type": "image_url",
95
+ "image_url": {"url": encoded_image, "detail": "high"},
96
+ },
97
+ ]
98
+ if model_source == "OpenAI":
99
+ from openai import OpenAI
100
+ client = OpenAI(api_key=api_key)
101
+ try:
102
+ response_obj = client.chat.completions.create(
103
+ model=user_model,
104
+ messages=[{'role': 'user', 'content': prompt}],
105
+ temperature=creativity
106
+ )
107
+ reply = response_obj.choices[0].message.content
108
+ link1.append(reply)
109
+ except Exception as e:
110
+ print(f"An error occurred: {e}")
111
+ link1.append(f"Error processing input: {e}")
112
+
113
+ elif model_source == "Anthropic":
114
+ prompt = [
115
+ {"type": "text",
116
+ "text": (
117
+ f"You are an image-tagging assistant.\n"
118
+ f"Task ► Examine the attached image and decide, **for each category below**, "
119
+ f"whether it is PRESENT (1) or NOT PRESENT (0).\n\n"
120
+ f"Image is expected to show: {image_description}\n\n"
121
+ f"Categories:\n{categories_str}\n\n"
122
+ f"Output format ► Respond with **only** a JSON object whose keys are the "
123
+ f"quoted category numbers ('1', '2', …) and whose values are 1 or 0. "
124
+ f"No additional keys, comments, or text.\n\n"
125
+ f"Example (three categories):\n"
126
+ f"{example_JSON}"
127
+ ),
128
+ },
129
+ {
130
+ "type": "image",
131
+ "source": {
132
+ "type": "base64",
133
+ "media_type": "image/jpeg",
134
+ "data": encoded_image
135
+ }
136
+ }
137
+ ]
138
+
139
+ import anthropic
140
+ client = anthropic.Anthropic(api_key=api_key)
141
+ try:
142
+ message = client.messages.create(
143
+ model=user_model,
144
+ max_tokens=1024,
145
+ temperature=creativity,
146
+ messages=[{"role": "user", "content": prompt}]
147
+ )
148
+ reply = message.content[0].text
149
+ link1.append(reply)
150
+ except Exception as e:
151
+ print(f"An error occurred: {e}")
152
+ link1.append(f"Error processing input: {e}")
153
+
154
+ elif model_source == "Mistral":
155
+ from mistralai import Mistral
156
+ client = Mistral(api_key=api_key)
157
+ try:
158
+ response = client.chat.complete(
159
+ model=user_model,
160
+ messages=[
161
+ {'role': 'user', 'content': prompt}
162
+ ],
163
+ temperature=creativity
164
+ )
165
+ reply = response.choices[0].message.content
166
+ link1.append(reply)
167
+ except Exception as e:
168
+ print(f"An error occurred: {e}")
169
+ link1.append(f"Error processing input: {e}")
170
+ else:
171
+ raise ValueError("Unknown source! Choose from OpenAI, Anthropic, Perplexity, or Mistral")
172
+ # in situation that no JSON is found
173
+ if reply is not None:
174
+ extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
175
+ if extracted_json:
176
+ cleaned_json = extracted_json[0].replace('[', '').replace(']', '').replace('\n', '').replace(" ", '').replace(" ", '')
177
+ extracted_jsons.append(cleaned_json)
178
+ #print(cleaned_json)
179
+ else:
180
+ error_message = """{"1":"e"}"""
181
+ extracted_jsons.append(error_message)
182
+ print(error_message)
183
+ else:
184
+ error_message = """{"1":"e"}"""
185
+ extracted_jsons.append(error_message)
186
+ #print(error_message)
187
+
188
+ # --- Safety Save ---
189
+ if safety:
190
+ #print(f"Saving CSV to: {save_directory}")
191
+ # Save progress so far
192
+ temp_df = pd.DataFrame({
193
+ 'image_input': image_files[:i+1],
194
+ 'link1': link1,
195
+ 'json': extracted_jsons
196
+ })
197
+ # Normalize processed jsons so far
198
+ normalized_data_list = []
199
+ for json_str in extracted_jsons:
200
+ try:
201
+ parsed_obj = json.loads(json_str)
202
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
203
+ except json.JSONDecodeError:
204
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
205
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
206
+ temp_df = pd.concat([temp_df, normalized_data], axis=1)
207
+ # Save to CSV
208
+ if save_directory is None:
209
+ save_directory = os.getcwd()
210
+ temp_df.to_csv(os.path.join(save_directory, filename), index=False)
211
+
212
+ # --- Final DataFrame ---
213
+ normalized_data_list = []
214
+ for json_str in extracted_jsons:
215
+ try:
216
+ parsed_obj = json.loads(json_str)
217
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
218
+ except json.JSONDecodeError:
219
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
220
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
221
+
222
+ categorized_data = pd.DataFrame({
223
+ 'image_input': image_files,
224
+ 'link1': pd.Series(link1).reset_index(drop=True),
225
+ 'json': pd.Series(extracted_jsons).reset_index(drop=True)
226
+ })
227
+ categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
228
+
229
+ if columns != "numbered": #if user wants text columns
230
+ categorized_data.columns = list(categorized_data.columns[:3]) + categories[:len(categorized_data.columns) - 3]
231
+
232
+ if to_csv:
233
+ if save_directory is None:
234
+ save_directory = os.getcwd()
235
+ categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
236
+
237
+ return categorized_data
238
+
239
+ #image score function
240
+ def extract_image_score(
241
+ reference_image_description,
242
+ image_input,
243
+ reference_image,
244
+ api_key,
245
+ columns="numbered",
246
+ user_model="gpt-4o-2024-11-20",
247
+ creativity=0,
248
+ to_csv=False,
249
+ safety=False,
250
+ filename="categorized_data.csv",
251
+ save_directory=None,
252
+ model_source="OpenAI"
253
+ ):
254
+ import os
255
+ import json
256
+ import pandas as pd
257
+ import regex
258
+ from tqdm import tqdm
259
+ import glob
260
+ import base64
261
+ from pathlib import Path
262
+
263
+ if save_directory is not None and not os.path.isdir(save_directory):
264
+ # Directory doesn't exist - raise an exception to halt execution
265
+ raise FileNotFoundError(f"Directory {save_directory} doesn't exist")
266
+
267
+ image_extensions = [
268
+ '*.png', '*.jpg', '*.jpeg',
269
+ '*.gif', '*.webp', '*.svg', '*.svgz', '*.avif', '*.apng',
270
+ '*.tif', '*.tiff', '*.bmp',
271
+ '*.heif', '*.heic', '*.ico',
272
+ '*.psd'
273
+ ]
274
+
275
+ if not isinstance(image_input, list):
276
+ # If image_input is a filepath (string)
277
+ image_files = []
278
+ for ext in image_extensions:
279
+ image_files.extend(glob.glob(os.path.join(image_input, ext)))
280
+
281
+ print(f"Found {len(image_files)} images.")
282
+ else:
283
+ # If image_files is already a list
284
+ image_files = image_input
285
+ print(f"Provided a list of {len(image_input)} images.")
286
+
287
+ with open(reference_image, 'rb') as f:
288
+ reference_image = f"data:image/{reference_image.split('.')[-1]};base64,{base64.b64encode(f.read()).decode('utf-8')}"
289
+
290
+ link1 = []
291
+ extracted_jsons = []
292
+
293
+ for i, img_path in enumerate(tqdm(image_files, desc="Categorising images"), start=0):
294
+ # Check validity first
295
+ if img_path is None or not os.path.exists(img_path):
296
+ link1.append("Skipped NaN input or invalid path")
297
+ extracted_jsons.append("""{"no_valid_image": 1}""")
298
+ continue # Skip the rest of the loop iteration
299
+
300
+ # Only open the file if path is valid
301
+ with open(img_path, "rb") as f:
302
+ encoded = base64.b64encode(f.read()).decode("utf-8")
303
+
304
+ # Handle extension safely
305
+ ext = Path(img_path).suffix.lstrip(".").lower()
306
+ encoded_image = f"data:image/{ext};base64,{encoded}"
307
+
308
+ prompt = [
309
+ {
310
+ "type": "text",
311
+ "text": (
312
+ f"You are a visual similarity assessment system.\n"
313
+ f"Task ► Compare these two images:\n"
314
+ f"1. REFERENCE (left): {reference_image_description}\n"
315
+ f"2. INPUT (right): User-provided drawing\n\n"
316
+ f"Rating criteria:\n"
317
+ f"1: No meaningful similarity (fundamentally different)\n"
318
+ f"2: Barely recognizable similarity (25% match)\n"
319
+ f"3: Partial match (50% key features)\n"
320
+ f"4: Strong alignment (75% features)\n"
321
+ f"5: Near-perfect match (90%+ similarity)\n\n"
322
+ f"Output format ► Return ONLY:\n"
323
+ "{\n"
324
+ ' "score": [1-5],\n'
325
+ ' "summary": "reason you scored"\n'
326
+ "}\n\n"
327
+ f"Critical rules:\n"
328
+ f"- Score must reflect shape, proportions, and key details\n"
329
+ f"- List only concrete matching elements from reference\n"
330
+ f"- No markdown or additional text"
331
+ ),
332
+ },
333
+ {"type": "image_url",
334
+ "image_url": {"url": reference_image, "detail": "high"}
335
+ },
336
+ {
337
+ "type": "image_url",
338
+
339
+ "image_url": {"url": encoded_image, "detail": "high"},
340
+ },
341
+ ]
342
+ if model_source == "OpenAI":
343
+ from openai import OpenAI
344
+ client = OpenAI(api_key=api_key)
345
+ try:
346
+ response_obj = client.chat.completions.create(
347
+ model=user_model,
348
+ messages=[{'role': 'user', 'content': prompt}],
349
+ temperature=creativity
350
+ )
351
+ reply = response_obj.choices[0].message.content
352
+ link1.append(reply)
353
+ except Exception as e:
354
+ print(f"An error occurred: {e}")
355
+ link1.append(f"Error processing input: {e}")
356
+
357
+ elif model_source == "Perplexity":
358
+ from openai import OpenAI
359
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
360
+ try:
361
+ response_obj = client.chat.completions.create(
362
+ model=user_model,
363
+ messages=[{'role': 'user', 'content': prompt}],
364
+ temperature=creativity
365
+ )
366
+ reply = response_obj.choices[0].message.content
367
+ link1.append(reply)
368
+ except Exception as e:
369
+ print(f"An error occurred: {e}")
370
+ link1.append(f"Error processing input: {e}")
371
+ elif model_source == "Anthropic":
372
+ import anthropic
373
+ client = anthropic.Anthropic(api_key=api_key)
374
+ try:
375
+ message = client.messages.create(
376
+ model=user_model,
377
+ max_tokens=1024,
378
+ temperature=creativity,
379
+ messages=[{"role": "user", "content": prompt}]
380
+ )
381
+ reply = message.content[0].text # Anthropic returns content as list
382
+ link1.append(reply)
383
+ except Exception as e:
384
+ print(f"An error occurred: {e}")
385
+ link1.append(f"Error processing input: {e}")
386
+ elif model_source == "Mistral":
387
+ from mistralai import Mistral
388
+ client = Mistral(api_key=api_key)
389
+ try:
390
+ response = client.chat.complete(
391
+ model=user_model,
392
+ messages=[
393
+ {'role': 'user', 'content': prompt}
394
+ ],
395
+ temperature=creativity
396
+ )
397
+ reply = response.choices[0].message.content
398
+ link1.append(reply)
399
+ except Exception as e:
400
+ print(f"An error occurred: {e}")
401
+ link1.append(f"Error processing input: {e}")
402
+ else:
403
+ raise ValueError("Unknown source! Choose from OpenAI, Anthropic, Perplexity, or Mistral")
404
+ # in situation that no JSON is found
405
+ if reply is not None:
406
+ extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
407
+ if extracted_json:
408
+ cleaned_json = extracted_json[0].replace('[', '').replace(']', '').replace('\n', '').replace(" ", '').replace(" ", '')
409
+ extracted_jsons.append(cleaned_json)
410
+ #print(cleaned_json)
411
+ else:
412
+ error_message = """{"1":"e"}"""
413
+ extracted_jsons.append(error_message)
414
+ print(error_message)
415
+ else:
416
+ error_message = """{"1":"e"}"""
417
+ extracted_jsons.append(error_message)
418
+ #print(error_message)
419
+
420
+ # --- Safety Save ---
421
+ if safety:
422
+ # Save progress so far
423
+ temp_df = pd.DataFrame({
424
+ 'image_input': image_files[:i+1],
425
+ 'link1': link1,
426
+ 'json': extracted_jsons
427
+ })
428
+ # Normalize processed jsons so far
429
+ normalized_data_list = []
430
+ for json_str in extracted_jsons:
431
+ try:
432
+ parsed_obj = json.loads(json_str)
433
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
434
+ except json.JSONDecodeError:
435
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
436
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
437
+ temp_df = pd.concat([temp_df, normalized_data], axis=1)
438
+ # Save to CSV
439
+ if save_directory is None:
440
+ save_directory = os.getcwd()
441
+ temp_df.to_csv(os.path.join(save_directory, filename), index=False)
442
+
443
+ # --- Final DataFrame ---
444
+ normalized_data_list = []
445
+ for json_str in extracted_jsons:
446
+ try:
447
+ parsed_obj = json.loads(json_str)
448
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
449
+ except json.JSONDecodeError:
450
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
451
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
452
+
453
+ categorized_data = pd.DataFrame({
454
+ 'image_input': image_files,
455
+ 'link1': pd.Series(link1).reset_index(drop=True),
456
+ 'json': pd.Series(extracted_jsons).reset_index(drop=True)
457
+ })
458
+ categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
459
+
460
+ if to_csv:
461
+ if save_directory is None:
462
+ save_directory = os.getcwd()
463
+ categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
464
+
465
+ return categorized_data
466
+
467
+ # image features function
468
+ def extract_image_features(
469
+ image_description,
470
+ image_input,
471
+ features_to_extract,
472
+ api_key,
473
+ columns="numbered",
474
+ user_model="gpt-4o-2024-11-20",
475
+ creativity=0,
476
+ to_csv=False,
477
+ safety=False,
478
+ filename="categorized_data.csv",
479
+ save_directory=None,
480
+ model_source="OpenAI"
481
+ ):
482
+ import os
483
+ import json
484
+ import pandas as pd
485
+ import regex
486
+ from tqdm import tqdm
487
+ import glob
488
+ import base64
489
+ from pathlib import Path
490
+
491
+ if save_directory is not None and not os.path.isdir(save_directory):
492
+ # Directory doesn't exist - raise an exception to halt execution
493
+ raise FileNotFoundError(f"Directory {save_directory} doesn't exist")
494
+
495
+ image_extensions = [
496
+ '*.png', '*.jpg', '*.jpeg',
497
+ '*.gif', '*.webp', '*.svg', '*.svgz', '*.avif', '*.apng',
498
+ '*.tif', '*.tiff', '*.bmp',
499
+ '*.heif', '*.heic', '*.ico',
500
+ '*.psd'
501
+ ]
502
+
503
+ if not isinstance(image_input, list):
504
+ # If image_input is a filepath (string)
505
+ image_files = []
506
+ for ext in image_extensions:
507
+ image_files.extend(glob.glob(os.path.join(image_input, ext)))
508
+
509
+ print(f"Found {len(image_files)} images.")
510
+ else:
511
+ # If image_files is already a list
512
+ image_files = image_input
513
+ print(f"Provided a list of {len(image_input)} images.")
514
+
515
+ categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(features_to_extract))
516
+ cat_num = len(features_to_extract)
517
+ category_dict = {str(i+1): "0" for i in range(cat_num)}
518
+ example_JSON = json.dumps(category_dict, indent=4)
519
+
520
+ # ensure number of categories is what user wants
521
+ print("\nThe image features to be extracted are:")
522
+ for i, cat in enumerate(features_to_extract, 1):
523
+ print(f"{i}. {cat}")
524
+
525
+ link1 = []
526
+ extracted_jsons = []
527
+
528
+ for i, img_path in enumerate(
529
+ tqdm(image_files, desc="Categorising images"), start=0):
530
+ # encode this specific image once
531
+ with open(img_path, "rb") as f:
532
+ encoded = base64.b64encode(f.read()).decode("utf-8")
533
+ ext = Path(img_path).suffix.lstrip(".").lower()
534
+ encoded_image = f"data:image/{ext};base64,{encoded}"
535
+
536
+ prompt = [
537
+ {
538
+ "type": "text",
539
+ "text": (
540
+ f"You are a visual question answering assistant.\n"
541
+ f"Task ► Analyze the attached image and answer these specific questions:\n\n"
542
+ f"Image context: {image_description}\n\n"
543
+ f"Questions to answer:\n{categories_str}\n\n"
544
+ f"Output format ► Return **only** a JSON object where:\n"
545
+ f"- Keys are question numbers ('1', '2', ...)\n"
546
+ f"- Values are concise answers (numbers, short phrases)\n\n"
547
+ f"Example for 3 questions:\n"
548
+ "{\n"
549
+ ' "1": "4",\n'
550
+ ' "2": "blue",\n'
551
+ ' "3": "yes"\n'
552
+ "}\n\n"
553
+ f"Important rules:\n"
554
+ f"1. Answer directly - no explanations\n"
555
+ f"2. Use exact numerical values when possible\n"
556
+ f"3. For yes/no questions, use 'yes' or 'no'\n"
557
+ f"4. Never add extra keys or formatting"
558
+ ),
559
+ },
560
+ {
561
+ "type": "image_url",
562
+ "image_url": {"url": encoded_image, "detail": "high"},
563
+ },
564
+ ]
565
+ if model_source == "OpenAI":
566
+ from openai import OpenAI
567
+ client = OpenAI(api_key=api_key)
568
+ try:
569
+ response_obj = client.chat.completions.create(
570
+ model=user_model,
571
+ messages=[{'role': 'user', 'content': prompt}],
572
+ temperature=creativity
573
+ )
574
+ reply = response_obj.choices[0].message.content
575
+ link1.append(reply)
576
+ except Exception as e:
577
+ print(f"An error occurred: {e}")
578
+ link1.append(f"Error processing input: {e}")
579
+
580
+ elif model_source == "Perplexity":
581
+ from openai import OpenAI
582
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
583
+ try:
584
+ response_obj = client.chat.completions.create(
585
+ model=user_model,
586
+ messages=[{'role': 'user', 'content': prompt}],
587
+ temperature=creativity
588
+ )
589
+ reply = response_obj.choices[0].message.content
590
+ link1.append(reply)
591
+ except Exception as e:
592
+ print(f"An error occurred: {e}")
593
+ link1.append(f"Error processing input: {e}")
594
+ elif model_source == "Anthropic":
595
+ import anthropic
596
+ client = anthropic.Anthropic(api_key=api_key)
597
+ try:
598
+ message = client.messages.create(
599
+ model=user_model,
600
+ max_tokens=1024,
601
+ temperature=creativity,
602
+ messages=[{"role": "user", "content": prompt}]
603
+ )
604
+ reply = message.content[0].text # Anthropic returns content as list
605
+ link1.append(reply)
606
+ except Exception as e:
607
+ print(f"An error occurred: {e}")
608
+ link1.append(f"Error processing input: {e}")
609
+ elif model_source == "Mistral":
610
+ from mistralai import Mistral
611
+ client = Mistral(api_key=api_key)
612
+ try:
613
+ response = client.chat.complete(
614
+ model=user_model,
615
+ messages=[
616
+ {'role': 'user', 'content': prompt}
617
+ ],
618
+ temperature=creativity
619
+ )
620
+ reply = response.choices[0].message.content
621
+ link1.append(reply)
622
+ except Exception as e:
623
+ print(f"An error occurred: {e}")
624
+ link1.append(f"Error processing input: {e}")
625
+ else:
626
+ raise ValueError("Unknown source! Choose from OpenAI, Anthropic, Perplexity, or Mistral")
627
+ # in situation that no JSON is found
628
+ if reply is not None:
629
+ extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
630
+ if extracted_json:
631
+ cleaned_json = extracted_json[0].replace('[', '').replace(']', '').replace('\n', '').replace(" ", '').replace(" ", '')
632
+ extracted_jsons.append(cleaned_json)
633
+ #print(cleaned_json)
634
+ else:
635
+ error_message = """{"1":"e"}"""
636
+ extracted_jsons.append(error_message)
637
+ print(error_message)
638
+ else:
639
+ error_message = """{"1":"e"}"""
640
+ extracted_jsons.append(error_message)
641
+ #print(error_message)
642
+
643
+ # --- Safety Save ---
644
+ if safety:
645
+ #print(f"Saving CSV to: {save_directory}")
646
+ # Save progress so far
647
+ temp_df = pd.DataFrame({
648
+ 'image_input': image_files[:i+1],
649
+ 'link1': link1,
650
+ 'json': extracted_jsons
651
+ })
652
+ # Normalize processed jsons so far
653
+ normalized_data_list = []
654
+ for json_str in extracted_jsons:
655
+ try:
656
+ parsed_obj = json.loads(json_str)
657
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
658
+ except json.JSONDecodeError:
659
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
660
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
661
+ temp_df = pd.concat([temp_df, normalized_data], axis=1)
662
+ # Save to CSV
663
+ if save_directory is None:
664
+ save_directory = os.getcwd()
665
+ temp_df.to_csv(os.path.join(save_directory, filename), index=False)
666
+
667
+ # --- Final DataFrame ---
668
+ normalized_data_list = []
669
+ for json_str in extracted_jsons:
670
+ try:
671
+ parsed_obj = json.loads(json_str)
672
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
673
+ except json.JSONDecodeError:
674
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
675
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
676
+
677
+ categorized_data = pd.DataFrame({
678
+ 'image_input': image_files,
679
+ 'link1': pd.Series(link1).reset_index(drop=True),
680
+ 'json': pd.Series(extracted_jsons).reset_index(drop=True)
681
+ })
682
+ categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
683
+
684
+ if to_csv:
685
+ if save_directory is None:
686
+ save_directory = os.getcwd()
687
+ categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
688
+
689
+ return categorized_data
@@ -1,8 +0,0 @@
1
- catllm/CERAD_functions.py,sha256=0tuFryVZ2CDVsq3FBhdx0KI23avELxHiDrg7-i5sXPU,16281
2
- catllm/__about__.py,sha256=W-f0L_ZEOs0IGlGa-98dY1aw1uqUgr-qkJ-M9qKND0U,404
3
- catllm/__init__.py,sha256=mNp5MQx2aNTtpNBHJ-U9INd1hX3u6jRkOoAewEI25MI,298
4
- catllm/cat_llm.py,sha256=TJmdM_O9oL7wvTuwohQLY5vgaAttIElCcfXEJHzdhfM,58311
5
- cat_llm-0.0.25.dist-info/METADATA,sha256=y3Sy6PnVane93b64UENte7uULeAGitvwjPjMaxZGOqg,1679
6
- cat_llm-0.0.25.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
7
- cat_llm-0.0.25.dist-info/licenses/LICENSE,sha256=wJLsvOr6lrFUDcoPXExa01HOKFWrS3JC9f0RudRw8uw,1075
8
- cat_llm-0.0.25.dist-info/RECORD,,