cat-llm 0.0.27__py3-none-any.whl → 0.0.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.27
3
+ Version: 0.0.28
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -0,0 +1,9 @@
1
+ catllm/CERAD_functions.py,sha256=mtHxshRWmWXMH9kkkCfbMHXgDe00EVabjFiN8s73LPI,16935
2
+ catllm/__about__.py,sha256=ZBz_2FX253RxgXFq-1v4qkfIEZi38fNzs_Rp2e3ZdCo,404
3
+ catllm/__init__.py,sha256=kLk180aJna1s-wU6CLr4_hKkbjoeET-11jGmC1pdhQw,330
4
+ catllm/cat_llm.py,sha256=Rwyz93caNf0h9tfurObY6qDjtG6EKaYXR0GrVW7h2kU,16920
5
+ catllm/image_functions.py,sha256=JLlv5qQhAQzgsRIY18rUPtM1P7x1Fw2UlWlI1dpv3dA,31272
6
+ cat_llm-0.0.28.dist-info/METADATA,sha256=zcHC8uPOLGIr9GOeq1Qyf04WebFdYEY5p4f8ZoZAwHg,1679
7
+ cat_llm-0.0.28.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ cat_llm-0.0.28.dist-info/licenses/LICENSE,sha256=wJLsvOr6lrFUDcoPXExa01HOKFWrS3JC9f0RudRw8uw,1075
9
+ cat_llm-0.0.28.dist-info/RECORD,,
catllm/__about__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.27"
4
+ __version__ = "0.0.28"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
catllm/image_functions.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # image multi-class (binary) function
2
- def extract_image_multi_class(
2
+ def image_multi_class(
3
3
  image_description,
4
4
  image_input,
5
5
  categories,
@@ -96,7 +96,7 @@ def extract_image_multi_class(
96
96
  },
97
97
  ]
98
98
 
99
- if model_source == "Anthropic":
99
+ elif model_source == "Anthropic":
100
100
  encoded_image = f"data:image/{ext};base64,{encoded}"
101
101
  prompt = [
102
102
  {"type": "text",
@@ -171,7 +171,7 @@ def extract_image_multi_class(
171
171
  print(f"An error occurred: {e}")
172
172
  link1.append(f"Error processing input: {e}")
173
173
  else:
174
- raise ValueError("Unknown source! Choose from OpenAI, Anthropic, Perplexity, or Mistral")
174
+ raise ValueError("Unknown source! Choose from OpenAI, Anthropic, or Mistral")
175
175
  # in situation that no JSON is found
176
176
  if reply is not None:
177
177
  extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
@@ -240,7 +240,7 @@ def extract_image_multi_class(
240
240
  return categorized_data
241
241
 
242
242
  #image score function
243
- def extract_image_score(
243
+ def image_score(
244
244
  reference_image_description,
245
245
  image_input,
246
246
  reference_image,
@@ -288,7 +288,8 @@ def extract_image_score(
288
288
  print(f"Provided a list of {len(image_input)} images.")
289
289
 
290
290
  with open(reference_image, 'rb') as f:
291
- reference_image = f"data:image/{reference_image.split('.')[-1]};base64,{base64.b64encode(f.read()).decode('utf-8')}"
291
+ reference = base64.b64encode(f.read()).decode('utf-8')
292
+ reference_image = f"data:image/{reference_image.split('.')[-1]};base64,{reference}"
292
293
 
293
294
  link1 = []
294
295
  extracted_jsons = []
@@ -308,40 +309,87 @@ def extract_image_score(
308
309
  ext = Path(img_path).suffix.lstrip(".").lower()
309
310
  encoded_image = f"data:image/{ext};base64,{encoded}"
310
311
 
311
- prompt = [
312
- {
313
- "type": "text",
314
- "text": (
315
- f"You are a visual similarity assessment system.\n"
316
- f"Task Compare these two images:\n"
317
- f"1. REFERENCE (left): {reference_image_description}\n"
318
- f"2. INPUT (right): User-provided drawing\n\n"
319
- f"Rating criteria:\n"
320
- f"1: No meaningful similarity (fundamentally different)\n"
321
- f"2: Barely recognizable similarity (25% match)\n"
322
- f"3: Partial match (50% key features)\n"
323
- f"4: Strong alignment (75% features)\n"
324
- f"5: Near-perfect match (90%+ similarity)\n\n"
325
- f"Output format Return ONLY:\n"
326
- "{\n"
327
- ' "score": [1-5],\n'
328
- ' "summary": "reason you scored"\n'
329
- "}\n\n"
330
- f"Critical rules:\n"
331
- f"- Score must reflect shape, proportions, and key details\n"
332
- f"- List only concrete matching elements from reference\n"
333
- f"- No markdown or additional text"
334
- ),
335
- },
336
- {"type": "image_url",
337
- "image_url": {"url": reference_image, "detail": "high"}
338
- },
339
- {
340
- "type": "image_url",
341
-
342
- "image_url": {"url": encoded_image, "detail": "high"},
343
- },
344
- ]
312
+ if model_source == "OpenAI":
313
+ prompt = [
314
+ {
315
+ "type": "text",
316
+ "text": (
317
+ f"You are a visual similarity assessment system.\n"
318
+ f"Task Compare these two images:\n"
319
+ f"1. REFERENCE (left): {reference_image_description}\n"
320
+ f"2. INPUT (right): User-provided drawing\n\n"
321
+ f"Rating criteria:\n"
322
+ f"1: No meaningful similarity (fundamentally different)\n"
323
+ f"2: Barely recognizable similarity (25% match)\n"
324
+ f"3: Partial match (50% key features)\n"
325
+ f"4: Strong alignment (75% features)\n"
326
+ f"5: Near-perfect match (90%+ similarity)\n\n"
327
+ f"Output format ► Return ONLY:\n"
328
+ "{\n"
329
+ ' "score": [1-5],\n'
330
+ ' "summary": "reason you scored"\n'
331
+ "}\n\n"
332
+ f"Critical rules:\n"
333
+ f"- Score must reflect shape, proportions, and key details\n"
334
+ f"- List only concrete matching elements from reference\n"
335
+ f"- No markdown or additional text"
336
+ )
337
+ },
338
+ {
339
+ "type": "image_url",
340
+ "image_url": {"url": reference_image, "detail": "high"}
341
+ },
342
+ {
343
+ "type": "image_url",
344
+ "image_url": {"url": encoded_image, "detail": "high"}
345
+ }
346
+ ]
347
+
348
+ elif model_source == "Anthropic": # Changed to elif
349
+ prompt = [
350
+ {
351
+ "type": "text",
352
+ "text": (
353
+ f"You are a visual similarity assessment system.\n"
354
+ f"Task ► Compare these two images:\n"
355
+ f"1. REFERENCE (left): {reference_image_description}\n"
356
+ f"2. INPUT (right): User-provided drawing\n\n"
357
+ f"Rating criteria:\n"
358
+ f"1: No meaningful similarity (fundamentally different)\n"
359
+ f"2: Barely recognizable similarity (25% match)\n"
360
+ f"3: Partial match (50% key features)\n"
361
+ f"4: Strong alignment (75% features)\n"
362
+ f"5: Near-perfect match (90%+ similarity)\n\n"
363
+ f"Output format ► Return ONLY:\n"
364
+ "{\n"
365
+ ' "score": [1-5],\n'
366
+ ' "summary": "reason you scored"\n'
367
+ "}\n\n"
368
+ f"Critical rules:\n"
369
+ f"- Score must reflect shape, proportions, and key details\n"
370
+ f"- List only concrete matching elements from reference\n"
371
+ f"- No markdown or additional text"
372
+ )
373
+ },
374
+ {
375
+ "type": "image", # Added missing type
376
+ "source": {
377
+ "type": "base64",
378
+ "media_type": "image/png",
379
+ "data": reference
380
+ }
381
+ },
382
+ {
383
+ "type": "image", # Added missing type
384
+ "source": {
385
+ "type": "base64",
386
+ "media_type": "image/jpeg",
387
+ "data": encoded
388
+ }
389
+ }
390
+ ]
391
+
392
+
345
393
  if model_source == "OpenAI":
346
394
  from openai import OpenAI
347
395
  client = OpenAI(api_key=api_key)
@@ -357,20 +405,6 @@ def extract_image_score(
357
405
  print(f"An error occurred: {e}")
358
406
  link1.append(f"Error processing input: {e}")
359
407
 
360
- elif model_source == "Perplexity":
361
- from openai import OpenAI
362
- client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
363
- try:
364
- response_obj = client.chat.completions.create(
365
- model=user_model,
366
- messages=[{'role': 'user', 'content': prompt}],
367
- temperature=creativity
368
- )
369
- reply = response_obj.choices[0].message.content
370
- link1.append(reply)
371
- except Exception as e:
372
- print(f"An error occurred: {e}")
373
- link1.append(f"Error processing input: {e}")
374
408
  elif model_source == "Anthropic":
375
409
  import anthropic
376
410
  client = anthropic.Anthropic(api_key=api_key)
@@ -386,6 +420,7 @@ def extract_image_score(
386
420
  except Exception as e:
387
421
  print(f"An error occurred: {e}")
388
422
  link1.append(f"Error processing input: {e}")
423
+
389
424
  elif model_source == "Mistral":
390
425
  from mistralai import Mistral
391
426
  client = Mistral(api_key=api_key)
@@ -468,7 +503,7 @@ def extract_image_score(
468
503
  return categorized_data
469
504
 
470
505
  # image features function
471
- def extract_image_features(
506
+ def image_features(
472
507
  image_description,
473
508
  image_input,
474
509
  features_to_extract,
@@ -530,41 +565,80 @@ def extract_image_features(
530
565
 
531
566
  for i, img_path in enumerate(
532
567
  tqdm(image_files, desc="Categorising images"), start=0):
568
+ if img_path is None or not os.path.exists(img_path):
569
+ link1.append("Skipped NaN input or invalid path")
570
+ extracted_jsons.append("""{"no_valid_image": 1}""")
571
+ continue # Skip the rest of the loop iteration
533
572
  # encode this specific image once
534
573
  with open(img_path, "rb") as f:
535
574
  encoded = base64.b64encode(f.read()).decode("utf-8")
536
575
  ext = Path(img_path).suffix.lstrip(".").lower()
537
576
  encoded_image = f"data:image/{ext};base64,{encoded}"
538
577
 
539
- prompt = [
540
- {
541
- "type": "text",
542
- "text": (
543
- f"You are a visual question answering assistant.\n"
544
- f"Task Analyze the attached image and answer these specific questions:\n\n"
545
- f"Image context: {image_description}\n\n"
546
- f"Questions to answer:\n{categories_str}\n\n"
547
- f"Output format ► Return **only** a JSON object where:\n"
548
- f"- Keys are question numbers ('1', '2', ...)\n"
549
- f"- Values are concise answers (numbers, short phrases)\n\n"
550
- f"Example for 3 questions:\n"
551
- "{\n"
552
- ' "1": "4",\n'
553
- ' "2": "blue",\n'
554
- ' "3": "yes"\n'
555
- "}\n\n"
556
- f"Important rules:\n"
557
- f"1. Answer directly - no explanations\n"
558
- f"2. Use exact numerical values when possible\n"
559
- f"3. For yes/no questions, use 'yes' or 'no'\n"
560
- f"4. Never add extra keys or formatting"
561
- ),
562
- },
563
- {
564
- "type": "image_url",
565
- "image_url": {"url": encoded_image, "detail": "high"},
566
- },
567
- ]
578
+ if model_source == "OpenAI":
579
+ prompt = [
580
+ {
581
+ "type": "text",
582
+ "text": (
583
+ f"You are a visual question answering assistant.\n"
584
+ f"Task Analyze the attached image and answer these specific questions:\n\n"
585
+ f"Image context: {image_description}\n\n"
586
+ f"Questions to answer:\n{categories_str}\n\n"
587
+ f"Output format Return **only** a JSON object where:\n"
588
+ f"- Keys are question numbers ('1', '2', ...)\n"
589
+ f"- Values are concise answers (numbers, short phrases)\n\n"
590
+ f"Example for 3 questions:\n"
591
+ "{\n"
592
+ ' "1": "4",\n'
593
+ ' "2": "blue",\n'
594
+ ' "3": "yes"\n'
595
+ "}\n\n"
596
+ f"Important rules:\n"
597
+ f"1. Answer directly - no explanations\n"
598
+ f"2. Use exact numerical values when possible\n"
599
+ f"3. For yes/no questions, use 'yes' or 'no'\n"
600
+ f"4. Never add extra keys or formatting"
601
+ ),
602
+ },
603
+ {
604
+ "type": "image_url",
605
+ "image_url": {"url": encoded_image, "detail": "high"},
606
+ },
607
+ ]
608
+ elif model_source == "Anthropic":
609
+ prompt = [
610
+ {
611
+ "type": "text",
612
+ "text": (
613
+ f"You are a visual question answering assistant.\n"
614
+ f"Task ► Analyze the attached image and answer these specific questions:\n\n"
615
+ f"Image context: {image_description}\n\n"
616
+ f"Questions to answer:\n{categories_str}\n\n"
617
+ f"Output format ► Return **only** a JSON object where:\n"
618
+ f"- Keys are question numbers ('1', '2', ...)\n"
619
+ f"- Values are concise answers (numbers, short phrases)\n\n"
620
+ f"Example for 3 questions:\n"
621
+ "{\n"
622
+ ' "1": "4",\n'
623
+ ' "2": "blue",\n'
624
+ ' "3": "yes"\n'
625
+ "}\n\n"
626
+ f"Important rules:\n"
627
+ f"1. Answer directly - no explanations\n"
628
+ f"2. Use exact numerical values when possible\n"
629
+ f"3. For yes/no questions, use 'yes' or 'no'\n"
630
+ f"4. Never add extra keys or formatting"
631
+ )
632
+ },
633
+ {
634
+ "type": "image",
635
+ "source": {
636
+ "type": "base64",
637
+ "media_type": "image/jpeg",
638
+ "data": encoded
639
+ }
640
+ }
641
+ ]
568
642
  if model_source == "OpenAI":
569
643
  from openai import OpenAI
570
644
  client = OpenAI(api_key=api_key)
@@ -1,9 +0,0 @@
1
- catllm/CERAD_functions.py,sha256=mtHxshRWmWXMH9kkkCfbMHXgDe00EVabjFiN8s73LPI,16935
2
- catllm/__about__.py,sha256=H3dYrI6XpHXpRmgCCiw8u2dIaFZWRsw7RxfRy_aIlaQ,404
3
- catllm/__init__.py,sha256=kLk180aJna1s-wU6CLr4_hKkbjoeET-11jGmC1pdhQw,330
4
- catllm/cat_llm.py,sha256=Rwyz93caNf0h9tfurObY6qDjtG6EKaYXR0GrVW7h2kU,16920
5
- catllm/image_functions.py,sha256=rMa7Jb565Rp75pbRKYF8Sqk_Uwuv60olMU0Mbvbq55s,27440
6
- cat_llm-0.0.27.dist-info/METADATA,sha256=ocT3Y5NoA9csBGpSkg0G_PCPPhbm_EgWiNicpf9Xf-I,1679
7
- cat_llm-0.0.27.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
- cat_llm-0.0.27.dist-info/licenses/LICENSE,sha256=wJLsvOr6lrFUDcoPXExa01HOKFWrS3JC9f0RudRw8uw,1075
9
- cat_llm-0.0.27.dist-info/RECORD,,