cat-llm 0.0.26__tar.gz → 0.0.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.26
3
+ Version: 0.0.28
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.26"
4
+ __version__ = "0.0.28"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
@@ -1,5 +1,5 @@
1
1
  # image multi-class (binary) function
2
- def extract_image_multi_class(
2
+ def image_multi_class(
3
3
  image_description,
4
4
  image_input,
5
5
  categories,
@@ -72,29 +72,56 @@ def extract_image_multi_class(
72
72
 
73
73
  # Handle extension safely
74
74
  ext = Path(img_path).suffix.lstrip(".").lower()
75
- encoded_image = f"data:image/{ext};base64,{encoded}"
76
-
77
- prompt = [
78
- {
79
- "type": "text",
80
- "text": (
81
- f"You are an image-tagging assistant.\n"
82
- f"Task ► Examine the attached image and decide, **for each category below**, "
83
- f"whether it is PRESENT (1) or NOT PRESENT (0).\n\n"
84
- f"Image is expected to show: {image_description}\n\n"
85
- f"Categories:\n{categories_str}\n\n"
86
- f"Output format ► Respond with **only** a JSON object whose keys are the "
87
- f"quoted category numbers ('1', '2', …) and whose values are 1 or 0. "
88
- f"No additional keys, comments, or text.\n\n"
89
- f"Example (three categories):\n"
90
- f"{example_JSON}"
91
- ),
92
- },
93
- {
94
- "type": "image_url",
95
- "image_url": {"url": encoded_image, "detail": "high"},
96
- },
97
- ]
75
+ if model_source == "OpenAI":
76
+ encoded_image = f"data:image/{ext};base64,{encoded}"
77
+ prompt = [
78
+ {
79
+ "type": "text",
80
+ "text": (
81
+ f"You are an image-tagging assistant.\n"
82
+ f"Task ► Examine the attached image and decide, **for each category below**, "
83
+ f"whether it is PRESENT (1) or NOT PRESENT (0).\n\n"
84
+ f"Image is expected to show: {image_description}\n\n"
85
+ f"Categories:\n{categories_str}\n\n"
86
+ f"Output format ► Respond with **only** a JSON object whose keys are the "
87
+ f"quoted category numbers ('1', '2', …) and whose values are 1 or 0. "
88
+ f"No additional keys, comments, or text.\n\n"
89
+ f"Example (three categories):\n"
90
+ f"{example_JSON}"
91
+ ),
92
+ },
93
+ {
94
+ "type": "image_url",
95
+ "image_url": {"url": encoded_image, "detail": "high"},
96
+ },
97
+ ]
98
+
99
+ elif model_source == "Anthropic":
100
+ encoded_image = f"data:image/{ext};base64,{encoded}"
101
+ prompt = [
102
+ {"type": "text",
103
+ "text": (
104
+ f"You are an image-tagging assistant.\n"
105
+ f"Task ► Examine the attached image and decide, **for each category below**, "
106
+ f"whether it is PRESENT (1) or NOT PRESENT (0).\n\n"
107
+ f"Image is expected to show: {image_description}\n\n"
108
+ f"Categories:\n{categories_str}\n\n"
109
+ f"Output format ► Respond with **only** a JSON object whose keys are the "
110
+ f"quoted category numbers ('1', '2', …) and whose values are 1 or 0. "
111
+ f"No additional keys, comments, or text.\n\n"
112
+ f"Example (three categories):\n"
113
+ f"{example_JSON}"
114
+ ),
115
+ },
116
+ {
117
+ "type": "image",
118
+ "source": {
119
+ "type": "base64",
120
+ "media_type": "image/jpeg",
121
+ "data": encoded
122
+ }
123
+ }
124
+ ]
98
125
  if model_source == "OpenAI":
99
126
  from openai import OpenAI
100
127
  client = OpenAI(api_key=api_key)
@@ -111,32 +138,8 @@ def extract_image_multi_class(
111
138
  link1.append(f"Error processing input: {e}")
112
139
 
113
140
  elif model_source == "Anthropic":
114
- prompt = [
115
- {"type": "text",
116
- "text": (
117
- f"You are an image-tagging assistant.\n"
118
- f"Task ► Examine the attached image and decide, **for each category below**, "
119
- f"whether it is PRESENT (1) or NOT PRESENT (0).\n\n"
120
- f"Image is expected to show: {image_description}\n\n"
121
- f"Categories:\n{categories_str}\n\n"
122
- f"Output format ► Respond with **only** a JSON object whose keys are the "
123
- f"quoted category numbers ('1', '2', …) and whose values are 1 or 0. "
124
- f"No additional keys, comments, or text.\n\n"
125
- f"Example (three categories):\n"
126
- f"{example_JSON}"
127
- ),
128
- },
129
- {
130
- "type": "image",
131
- "source": {
132
- "type": "base64",
133
- "media_type": "image/jpeg",
134
- "data": encoded_image
135
- }
136
- }
137
- ]
138
-
139
141
  import anthropic
142
+ reply = None
140
143
  client = anthropic.Anthropic(api_key=api_key)
141
144
  try:
142
145
  message = client.messages.create(
@@ -168,7 +171,7 @@ def extract_image_multi_class(
168
171
  print(f"An error occurred: {e}")
169
172
  link1.append(f"Error processing input: {e}")
170
173
  else:
171
- raise ValueError("Unknown source! Choose from OpenAI, Anthropic, Perplexity, or Mistral")
174
+ raise ValueError("Unknown source! Choose from OpenAI, Anthropic, or Mistral")
172
175
  # in situation that no JSON is found
173
176
  if reply is not None:
174
177
  extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
@@ -237,7 +240,7 @@ def extract_image_multi_class(
237
240
  return categorized_data
238
241
 
239
242
  #image score function
240
- def extract_image_score(
243
+ def image_score(
241
244
  reference_image_description,
242
245
  image_input,
243
246
  reference_image,
@@ -285,7 +288,8 @@ def extract_image_score(
285
288
  print(f"Provided a list of {len(image_input)} images.")
286
289
 
287
290
  with open(reference_image, 'rb') as f:
288
- reference_image = f"data:image/{reference_image.split('.')[-1]};base64,{base64.b64encode(f.read()).decode('utf-8')}"
291
+ reference = base64.b64encode(f.read()).decode('utf-8')
292
+ reference_image = f"data:image/{reference_image.split('.')[-1]};base64,{reference}"
289
293
 
290
294
  link1 = []
291
295
  extracted_jsons = []
@@ -305,40 +309,87 @@ def extract_image_score(
305
309
  ext = Path(img_path).suffix.lstrip(".").lower()
306
310
  encoded_image = f"data:image/{ext};base64,{encoded}"
307
311
 
308
- prompt = [
309
- {
310
- "type": "text",
311
- "text": (
312
- f"You are a visual similarity assessment system.\n"
313
- f"Task Compare these two images:\n"
314
- f"1. REFERENCE (left): {reference_image_description}\n"
315
- f"2. INPUT (right): User-provided drawing\n\n"
316
- f"Rating criteria:\n"
317
- f"1: No meaningful similarity (fundamentally different)\n"
318
- f"2: Barely recognizable similarity (25% match)\n"
319
- f"3: Partial match (50% key features)\n"
320
- f"4: Strong alignment (75% features)\n"
321
- f"5: Near-perfect match (90%+ similarity)\n\n"
322
- f"Output format Return ONLY:\n"
323
- "{\n"
324
- ' "score": [1-5],\n'
325
- ' "summary": "reason you scored"\n'
326
- "}\n\n"
327
- f"Critical rules:\n"
328
- f"- Score must reflect shape, proportions, and key details\n"
329
- f"- List only concrete matching elements from reference\n"
330
- f"- No markdown or additional text"
331
- ),
332
- },
333
- {"type": "image_url",
334
- "image_url": {"url": reference_image, "detail": "high"}
335
- },
336
- {
337
- "type": "image_url",
338
-
339
- "image_url": {"url": encoded_image, "detail": "high"},
340
- },
341
- ]
312
+ if model_source == "OpenAI":
313
+ prompt = [
314
+ {
315
+ "type": "text",
316
+ "text": (
317
+ f"You are a visual similarity assessment system.\n"
318
+ f"Task Compare these two images:\n"
319
+ f"1. REFERENCE (left): {reference_image_description}\n"
320
+ f"2. INPUT (right): User-provided drawing\n\n"
321
+ f"Rating criteria:\n"
322
+ f"1: No meaningful similarity (fundamentally different)\n"
323
+ f"2: Barely recognizable similarity (25% match)\n"
324
+ f"3: Partial match (50% key features)\n"
325
+ f"4: Strong alignment (75% features)\n"
326
+ f"5: Near-perfect match (90%+ similarity)\n\n"
327
+ f"Output format ► Return ONLY:\n"
328
+ "{\n"
329
+ ' "score": [1-5],\n'
330
+ ' "summary": "reason you scored"\n'
331
+ "}\n\n"
332
+ f"Critical rules:\n"
333
+ f"- Score must reflect shape, proportions, and key details\n"
334
+ f"- List only concrete matching elements from reference\n"
335
+ f"- No markdown or additional text"
336
+ )
337
+ },
338
+ {
339
+ "type": "image_url",
340
+ "image_url": {"url": reference_image, "detail": "high"}
341
+ },
342
+ {
343
+ "type": "image_url",
344
+ "image_url": {"url": encoded_image, "detail": "high"}
345
+ }
346
+ ]
347
+
348
+ elif model_source == "Anthropic": # Changed to elif
349
+ prompt = [
350
+ {
351
+ "type": "text",
352
+ "text": (
353
+ f"You are a visual similarity assessment system.\n"
354
+ f"Task ► Compare these two images:\n"
355
+ f"1. REFERENCE (left): {reference_image_description}\n"
356
+ f"2. INPUT (right): User-provided drawing\n\n"
357
+ f"Rating criteria:\n"
358
+ f"1: No meaningful similarity (fundamentally different)\n"
359
+ f"2: Barely recognizable similarity (25% match)\n"
360
+ f"3: Partial match (50% key features)\n"
361
+ f"4: Strong alignment (75% features)\n"
362
+ f"5: Near-perfect match (90%+ similarity)\n\n"
363
+ f"Output format ► Return ONLY:\n"
364
+ "{\n"
365
+ ' "score": [1-5],\n'
366
+ ' "summary": "reason you scored"\n'
367
+ "}\n\n"
368
+ f"Critical rules:\n"
369
+ f"- Score must reflect shape, proportions, and key details\n"
370
+ f"- List only concrete matching elements from reference\n"
371
+ f"- No markdown or additional text"
372
+ )
373
+ },
374
+ {
375
+ "type": "image", # Added missing type
376
+ "source": {
377
+ "type": "base64",
378
+ "media_type": "image/png",
379
+ "data": reference
380
+ }
381
+ },
382
+ {
383
+ "type": "image", # Added missing type
384
+ "source": {
385
+ "type": "base64",
386
+ "media_type": "image/jpeg",
387
+ "data": encoded
388
+ }
389
+ }
390
+ ]
391
+
392
+
342
393
  if model_source == "OpenAI":
343
394
  from openai import OpenAI
344
395
  client = OpenAI(api_key=api_key)
@@ -354,20 +405,6 @@ def extract_image_score(
354
405
  print(f"An error occurred: {e}")
355
406
  link1.append(f"Error processing input: {e}")
356
407
 
357
- elif model_source == "Perplexity":
358
- from openai import OpenAI
359
- client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
360
- try:
361
- response_obj = client.chat.completions.create(
362
- model=user_model,
363
- messages=[{'role': 'user', 'content': prompt}],
364
- temperature=creativity
365
- )
366
- reply = response_obj.choices[0].message.content
367
- link1.append(reply)
368
- except Exception as e:
369
- print(f"An error occurred: {e}")
370
- link1.append(f"Error processing input: {e}")
371
408
  elif model_source == "Anthropic":
372
409
  import anthropic
373
410
  client = anthropic.Anthropic(api_key=api_key)
@@ -383,6 +420,7 @@ def extract_image_score(
383
420
  except Exception as e:
384
421
  print(f"An error occurred: {e}")
385
422
  link1.append(f"Error processing input: {e}")
423
+
386
424
  elif model_source == "Mistral":
387
425
  from mistralai import Mistral
388
426
  client = Mistral(api_key=api_key)
@@ -465,7 +503,7 @@ def extract_image_score(
465
503
  return categorized_data
466
504
 
467
505
  # image features function
468
- def extract_image_features(
506
+ def image_features(
469
507
  image_description,
470
508
  image_input,
471
509
  features_to_extract,
@@ -527,41 +565,80 @@ def extract_image_features(
527
565
 
528
566
  for i, img_path in enumerate(
529
567
  tqdm(image_files, desc="Categorising images"), start=0):
568
+ if img_path is None or not os.path.exists(img_path):
569
+ link1.append("Skipped NaN input or invalid path")
570
+ extracted_jsons.append("""{"no_valid_image": 1}""")
571
+ continue # Skip the rest of the loop iteration
530
572
  # encode this specific image once
531
573
  with open(img_path, "rb") as f:
532
574
  encoded = base64.b64encode(f.read()).decode("utf-8")
533
575
  ext = Path(img_path).suffix.lstrip(".").lower()
534
576
  encoded_image = f"data:image/{ext};base64,{encoded}"
535
577
 
536
- prompt = [
537
- {
538
- "type": "text",
539
- "text": (
540
- f"You are a visual question answering assistant.\n"
541
- f"Task Analyze the attached image and answer these specific questions:\n\n"
542
- f"Image context: {image_description}\n\n"
543
- f"Questions to answer:\n{categories_str}\n\n"
544
- f"Output format ► Return **only** a JSON object where:\n"
545
- f"- Keys are question numbers ('1', '2', ...)\n"
546
- f"- Values are concise answers (numbers, short phrases)\n\n"
547
- f"Example for 3 questions:\n"
548
- "{\n"
549
- ' "1": "4",\n'
550
- ' "2": "blue",\n'
551
- ' "3": "yes"\n'
552
- "}\n\n"
553
- f"Important rules:\n"
554
- f"1. Answer directly - no explanations\n"
555
- f"2. Use exact numerical values when possible\n"
556
- f"3. For yes/no questions, use 'yes' or 'no'\n"
557
- f"4. Never add extra keys or formatting"
558
- ),
559
- },
560
- {
561
- "type": "image_url",
562
- "image_url": {"url": encoded_image, "detail": "high"},
563
- },
564
- ]
578
+ if model_source == "OpenAI":
579
+ prompt = [
580
+ {
581
+ "type": "text",
582
+ "text": (
583
+ f"You are a visual question answering assistant.\n"
584
+ f"Task Analyze the attached image and answer these specific questions:\n\n"
585
+ f"Image context: {image_description}\n\n"
586
+ f"Questions to answer:\n{categories_str}\n\n"
587
+ f"Output format Return **only** a JSON object where:\n"
588
+ f"- Keys are question numbers ('1', '2', ...)\n"
589
+ f"- Values are concise answers (numbers, short phrases)\n\n"
590
+ f"Example for 3 questions:\n"
591
+ "{\n"
592
+ ' "1": "4",\n'
593
+ ' "2": "blue",\n'
594
+ ' "3": "yes"\n'
595
+ "}\n\n"
596
+ f"Important rules:\n"
597
+ f"1. Answer directly - no explanations\n"
598
+ f"2. Use exact numerical values when possible\n"
599
+ f"3. For yes/no questions, use 'yes' or 'no'\n"
600
+ f"4. Never add extra keys or formatting"
601
+ ),
602
+ },
603
+ {
604
+ "type": "image_url",
605
+ "image_url": {"url": encoded_image, "detail": "high"},
606
+ },
607
+ ]
608
+ elif model_source == "Anthropic":
609
+ prompt = [
610
+ {
611
+ "type": "text",
612
+ "text": (
613
+ f"You are a visual question answering assistant.\n"
614
+ f"Task ► Analyze the attached image and answer these specific questions:\n\n"
615
+ f"Image context: {image_description}\n\n"
616
+ f"Questions to answer:\n{categories_str}\n\n"
617
+ f"Output format ► Return **only** a JSON object where:\n"
618
+ f"- Keys are question numbers ('1', '2', ...)\n"
619
+ f"- Values are concise answers (numbers, short phrases)\n\n"
620
+ f"Example for 3 questions:\n"
621
+ "{\n"
622
+ ' "1": "4",\n'
623
+ ' "2": "blue",\n'
624
+ ' "3": "yes"\n'
625
+ "}\n\n"
626
+ f"Important rules:\n"
627
+ f"1. Answer directly - no explanations\n"
628
+ f"2. Use exact numerical values when possible\n"
629
+ f"3. For yes/no questions, use 'yes' or 'no'\n"
630
+ f"4. Never add extra keys or formatting"
631
+ )
632
+ },
633
+ {
634
+ "type": "image",
635
+ "source": {
636
+ "type": "base64",
637
+ "media_type": "image/jpeg",
638
+ "data": encoded
639
+ }
640
+ }
641
+ ]
565
642
  if model_source == "OpenAI":
566
643
  from openai import OpenAI
567
644
  client = OpenAI(api_key=api_key)
File without changes
File without changes
File without changes
File without changes