cat-llm 0.0.27__tar.gz → 0.0.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.27 → cat_llm-0.0.28}/PKG-INFO +1 -1
- {cat_llm-0.0.27 → cat_llm-0.0.28}/src/catllm/__about__.py +1 -1
- {cat_llm-0.0.27 → cat_llm-0.0.28}/src/catllm/image_functions.py +157 -83
- {cat_llm-0.0.27 → cat_llm-0.0.28}/LICENSE +0 -0
- {cat_llm-0.0.27 → cat_llm-0.0.28}/README.md +0 -0
- {cat_llm-0.0.27 → cat_llm-0.0.28}/pyproject.toml +0 -0
- {cat_llm-0.0.27 → cat_llm-0.0.28}/src/catllm/CERAD_functions.py +0 -0
- {cat_llm-0.0.27 → cat_llm-0.0.28}/src/catllm/__init__.py +0 -0
- {cat_llm-0.0.27 → cat_llm-0.0.28}/src/catllm/cat_llm.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-llm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.28
|
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# image multi-class (binary) function
|
|
2
|
-
def
|
|
2
|
+
def image_multi_class(
|
|
3
3
|
image_description,
|
|
4
4
|
image_input,
|
|
5
5
|
categories,
|
|
@@ -96,7 +96,7 @@ def extract_image_multi_class(
|
|
|
96
96
|
},
|
|
97
97
|
]
|
|
98
98
|
|
|
99
|
-
|
|
99
|
+
elif model_source == "Anthropic":
|
|
100
100
|
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
101
101
|
prompt = [
|
|
102
102
|
{"type": "text",
|
|
@@ -171,7 +171,7 @@ def extract_image_multi_class(
|
|
|
171
171
|
print(f"An error occurred: {e}")
|
|
172
172
|
link1.append(f"Error processing input: {e}")
|
|
173
173
|
else:
|
|
174
|
-
raise ValueError("Unknown source! Choose from OpenAI, Anthropic,
|
|
174
|
+
raise ValueError("Unknown source! Choose from OpenAI, Anthropic, or Mistral")
|
|
175
175
|
# in situation that no JSON is found
|
|
176
176
|
if reply is not None:
|
|
177
177
|
extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
|
|
@@ -240,7 +240,7 @@ def extract_image_multi_class(
|
|
|
240
240
|
return categorized_data
|
|
241
241
|
|
|
242
242
|
#image score function
|
|
243
|
-
def
|
|
243
|
+
def image_score(
|
|
244
244
|
reference_image_description,
|
|
245
245
|
image_input,
|
|
246
246
|
reference_image,
|
|
@@ -288,7 +288,8 @@ def extract_image_score(
|
|
|
288
288
|
print(f"Provided a list of {len(image_input)} images.")
|
|
289
289
|
|
|
290
290
|
with open(reference_image, 'rb') as f:
|
|
291
|
-
|
|
291
|
+
reference = base64.b64encode(f.read()).decode('utf-8')
|
|
292
|
+
reference_image = f"data:image/{reference_image.split('.')[-1]};base64,{reference}"
|
|
292
293
|
|
|
293
294
|
link1 = []
|
|
294
295
|
extracted_jsons = []
|
|
@@ -308,40 +309,87 @@ def extract_image_score(
|
|
|
308
309
|
ext = Path(img_path).suffix.lstrip(".").lower()
|
|
309
310
|
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
310
311
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
312
|
+
if model_source == "OpenAI":
|
|
313
|
+
prompt = [
|
|
314
|
+
{
|
|
315
|
+
"type": "text",
|
|
316
|
+
"text": (
|
|
317
|
+
f"You are a visual similarity assessment system.\n"
|
|
318
|
+
f"Task ► Compare these two images:\n"
|
|
319
|
+
f"1. REFERENCE (left): {reference_image_description}\n"
|
|
320
|
+
f"2. INPUT (right): User-provided drawing\n\n"
|
|
321
|
+
f"Rating criteria:\n"
|
|
322
|
+
f"1: No meaningful similarity (fundamentally different)\n"
|
|
323
|
+
f"2: Barely recognizable similarity (25% match)\n"
|
|
324
|
+
f"3: Partial match (50% key features)\n"
|
|
325
|
+
f"4: Strong alignment (75% features)\n"
|
|
326
|
+
f"5: Near-perfect match (90%+ similarity)\n\n"
|
|
327
|
+
f"Output format ► Return ONLY:\n"
|
|
328
|
+
"{\n"
|
|
329
|
+
' "score": [1-5],\n'
|
|
330
|
+
' "summary": "reason you scored"\n'
|
|
331
|
+
"}\n\n"
|
|
332
|
+
f"Critical rules:\n"
|
|
333
|
+
f"- Score must reflect shape, proportions, and key details\n"
|
|
334
|
+
f"- List only concrete matching elements from reference\n"
|
|
335
|
+
f"- No markdown or additional text"
|
|
336
|
+
)
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
"type": "image_url",
|
|
340
|
+
"image_url": {"url": reference_image, "detail": "high"}
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
"type": "image_url",
|
|
344
|
+
"image_url": {"url": encoded_image, "detail": "high"}
|
|
345
|
+
}
|
|
346
|
+
]
|
|
347
|
+
|
|
348
|
+
elif model_source == "Anthropic": # Changed to elif
|
|
349
|
+
prompt = [
|
|
350
|
+
{
|
|
351
|
+
"type": "text",
|
|
352
|
+
"text": (
|
|
353
|
+
f"You are a visual similarity assessment system.\n"
|
|
354
|
+
f"Task ► Compare these two images:\n"
|
|
355
|
+
f"1. REFERENCE (left): {reference_image_description}\n"
|
|
356
|
+
f"2. INPUT (right): User-provided drawing\n\n"
|
|
357
|
+
f"Rating criteria:\n"
|
|
358
|
+
f"1: No meaningful similarity (fundamentally different)\n"
|
|
359
|
+
f"2: Barely recognizable similarity (25% match)\n"
|
|
360
|
+
f"3: Partial match (50% key features)\n"
|
|
361
|
+
f"4: Strong alignment (75% features)\n"
|
|
362
|
+
f"5: Near-perfect match (90%+ similarity)\n\n"
|
|
363
|
+
f"Output format ► Return ONLY:\n"
|
|
364
|
+
"{\n"
|
|
365
|
+
' "score": [1-5],\n'
|
|
366
|
+
' "summary": "reason you scored"\n'
|
|
367
|
+
"}\n\n"
|
|
368
|
+
f"Critical rules:\n"
|
|
369
|
+
f"- Score must reflect shape, proportions, and key details\n"
|
|
370
|
+
f"- List only concrete matching elements from reference\n"
|
|
371
|
+
f"- No markdown or additional text"
|
|
372
|
+
)
|
|
373
|
+
},
|
|
374
|
+
{
|
|
375
|
+
"type": "image", # Added missing type
|
|
376
|
+
"source": {
|
|
377
|
+
"type": "base64",
|
|
378
|
+
"media_type": "image/png",
|
|
379
|
+
"data": reference
|
|
380
|
+
}
|
|
381
|
+
},
|
|
382
|
+
{
|
|
383
|
+
"type": "image", # Added missing type
|
|
384
|
+
"source": {
|
|
385
|
+
"type": "base64",
|
|
386
|
+
"media_type": "image/jpeg",
|
|
387
|
+
"data": encoded
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
]
|
|
391
|
+
|
|
392
|
+
|
|
345
393
|
if model_source == "OpenAI":
|
|
346
394
|
from openai import OpenAI
|
|
347
395
|
client = OpenAI(api_key=api_key)
|
|
@@ -357,20 +405,6 @@ def extract_image_score(
|
|
|
357
405
|
print(f"An error occurred: {e}")
|
|
358
406
|
link1.append(f"Error processing input: {e}")
|
|
359
407
|
|
|
360
|
-
elif model_source == "Perplexity":
|
|
361
|
-
from openai import OpenAI
|
|
362
|
-
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
|
363
|
-
try:
|
|
364
|
-
response_obj = client.chat.completions.create(
|
|
365
|
-
model=user_model,
|
|
366
|
-
messages=[{'role': 'user', 'content': prompt}],
|
|
367
|
-
temperature=creativity
|
|
368
|
-
)
|
|
369
|
-
reply = response_obj.choices[0].message.content
|
|
370
|
-
link1.append(reply)
|
|
371
|
-
except Exception as e:
|
|
372
|
-
print(f"An error occurred: {e}")
|
|
373
|
-
link1.append(f"Error processing input: {e}")
|
|
374
408
|
elif model_source == "Anthropic":
|
|
375
409
|
import anthropic
|
|
376
410
|
client = anthropic.Anthropic(api_key=api_key)
|
|
@@ -386,6 +420,7 @@ def extract_image_score(
|
|
|
386
420
|
except Exception as e:
|
|
387
421
|
print(f"An error occurred: {e}")
|
|
388
422
|
link1.append(f"Error processing input: {e}")
|
|
423
|
+
|
|
389
424
|
elif model_source == "Mistral":
|
|
390
425
|
from mistralai import Mistral
|
|
391
426
|
client = Mistral(api_key=api_key)
|
|
@@ -468,7 +503,7 @@ def extract_image_score(
|
|
|
468
503
|
return categorized_data
|
|
469
504
|
|
|
470
505
|
# image features function
|
|
471
|
-
def
|
|
506
|
+
def image_features(
|
|
472
507
|
image_description,
|
|
473
508
|
image_input,
|
|
474
509
|
features_to_extract,
|
|
@@ -530,41 +565,80 @@ def extract_image_features(
|
|
|
530
565
|
|
|
531
566
|
for i, img_path in enumerate(
|
|
532
567
|
tqdm(image_files, desc="Categorising images"), start=0):
|
|
568
|
+
if img_path is None or not os.path.exists(img_path):
|
|
569
|
+
link1.append("Skipped NaN input or invalid path")
|
|
570
|
+
extracted_jsons.append("""{"no_valid_image": 1}""")
|
|
571
|
+
continue # Skip the rest of the loop iteration
|
|
533
572
|
# encode this specific image once
|
|
534
573
|
with open(img_path, "rb") as f:
|
|
535
574
|
encoded = base64.b64encode(f.read()).decode("utf-8")
|
|
536
575
|
ext = Path(img_path).suffix.lstrip(".").lower()
|
|
537
576
|
encoded_image = f"data:image/{ext};base64,{encoded}"
|
|
538
577
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
578
|
+
if model_source == "OpenAI":
|
|
579
|
+
prompt = [
|
|
580
|
+
{
|
|
581
|
+
"type": "text",
|
|
582
|
+
"text": (
|
|
583
|
+
f"You are a visual question answering assistant.\n"
|
|
584
|
+
f"Task ► Analyze the attached image and answer these specific questions:\n\n"
|
|
585
|
+
f"Image context: {image_description}\n\n"
|
|
586
|
+
f"Questions to answer:\n{categories_str}\n\n"
|
|
587
|
+
f"Output format ► Return **only** a JSON object where:\n"
|
|
588
|
+
f"- Keys are question numbers ('1', '2', ...)\n"
|
|
589
|
+
f"- Values are concise answers (numbers, short phrases)\n\n"
|
|
590
|
+
f"Example for 3 questions:\n"
|
|
591
|
+
"{\n"
|
|
592
|
+
' "1": "4",\n'
|
|
593
|
+
' "2": "blue",\n'
|
|
594
|
+
' "3": "yes"\n'
|
|
595
|
+
"}\n\n"
|
|
596
|
+
f"Important rules:\n"
|
|
597
|
+
f"1. Answer directly - no explanations\n"
|
|
598
|
+
f"2. Use exact numerical values when possible\n"
|
|
599
|
+
f"3. For yes/no questions, use 'yes' or 'no'\n"
|
|
600
|
+
f"4. Never add extra keys or formatting"
|
|
601
|
+
),
|
|
602
|
+
},
|
|
603
|
+
{
|
|
604
|
+
"type": "image_url",
|
|
605
|
+
"image_url": {"url": encoded_image, "detail": "high"},
|
|
606
|
+
},
|
|
607
|
+
]
|
|
608
|
+
elif model_source == "Anthropic":
|
|
609
|
+
prompt = [
|
|
610
|
+
{
|
|
611
|
+
"type": "text",
|
|
612
|
+
"text": (
|
|
613
|
+
f"You are a visual question answering assistant.\n"
|
|
614
|
+
f"Task ► Analyze the attached image and answer these specific questions:\n\n"
|
|
615
|
+
f"Image context: {image_description}\n\n"
|
|
616
|
+
f"Questions to answer:\n{categories_str}\n\n"
|
|
617
|
+
f"Output format ► Return **only** a JSON object where:\n"
|
|
618
|
+
f"- Keys are question numbers ('1', '2', ...)\n"
|
|
619
|
+
f"- Values are concise answers (numbers, short phrases)\n\n"
|
|
620
|
+
f"Example for 3 questions:\n"
|
|
621
|
+
"{\n"
|
|
622
|
+
' "1": "4",\n'
|
|
623
|
+
' "2": "blue",\n'
|
|
624
|
+
' "3": "yes"\n'
|
|
625
|
+
"}\n\n"
|
|
626
|
+
f"Important rules:\n"
|
|
627
|
+
f"1. Answer directly - no explanations\n"
|
|
628
|
+
f"2. Use exact numerical values when possible\n"
|
|
629
|
+
f"3. For yes/no questions, use 'yes' or 'no'\n"
|
|
630
|
+
f"4. Never add extra keys or formatting"
|
|
631
|
+
)
|
|
632
|
+
},
|
|
633
|
+
{
|
|
634
|
+
"type": "image",
|
|
635
|
+
"source": {
|
|
636
|
+
"type": "base64",
|
|
637
|
+
"media_type": "image/jpeg",
|
|
638
|
+
"data": encoded
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
]
|
|
568
642
|
if model_source == "OpenAI":
|
|
569
643
|
from openai import OpenAI
|
|
570
644
|
client = OpenAI(api_key=api_key)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|