PyPI - cat-llm - Versions diffs - 0.0.62__py3-none-any.whl → 0.0.64__py3-none-any.whl - Mend

cat-llm 0.0.62py3-none-any.whl → 0.0.64py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{cat_llm-0.0.62.dist-info → cat_llm-0.0.64.dist-info}/METADATA +2 -1
cat_llm-0.0.64.dist-info/RECORD +15 -0
catllm/CERAD_functions.py +8 -6
catllm/__about__.py +1 -1
catllm/build_web_research.py +309 -29
catllm/image_functions.py +22 -16
catllm/text_functions.py +14 -8
cat_llm-0.0.62.dist-info/RECORD +0 -15
{cat_llm-0.0.62.dist-info → cat_llm-0.0.64.dist-info}/WHEEL +0 -0
{cat_llm-0.0.62.dist-info → cat_llm-0.0.64.dist-info}/licenses/LICENSE +0 -0

{cat_llm-0.0.62.dist-info → cat_llm-0.0.64.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cat-llm
-Version: 0.0.62
+Version: 0.0.64
 Summary: A tool for categorizing text data and images using LLMs and vision models
 Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
 Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -22,6 +22,7 @@ Requires-Python: >=3.8
 Requires-Dist: anthropic
 Requires-Dist: openai
 Requires-Dist: pandas
+Requires-Dist: perplexity
 Requires-Dist: requests
 Requires-Dist: tqdm
 Description-Content-Type: text/markdown

cat_llm-0.0.64.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+catllm/CERAD_functions.py,sha256=q4HbP5e2Yu8NnZZ-2eX4sImyj6u3i8xWcq0pYU81iis,22676
+catllm/__about__.py,sha256=ef_C266qfrp7mTd1dpTp_iodPNJXT6D5pQVsfdLEmB8,408
+catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
+catllm/build_web_research.py,sha256=880dfE2bEQb-FrXP-42JoLLtyc9ox_sBULDr38xiTiQ,22655
+catllm/image_functions.py,sha256=8_FftRU285x1HT-AgNkaobefQVD-5q7ZY_t7JFdL3Sg,36177
+catllm/text_functions.py,sha256=Jf51lNaFtcS2QGnNLkhM8rFVJSD4tN0Bm_VfELvb47g,18686
+catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
+catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
+catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
+catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
+catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
+cat_llm-0.0.64.dist-info/METADATA,sha256=KKM4UUyBk7ty5oG7IpVjnJvbF-vRbpC0tFTsAl8UdLo,22421
+cat_llm-0.0.64.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+cat_llm-0.0.64.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
+cat_llm-0.0.64.dist-info/RECORD,,

catllm/CERAD_functions.py CHANGED Viewed

@@ -44,6 +44,8 @@ def cerad_drawn_score(
     from pathlib import Path
     import pkg_resources
+    model_source = model_source.lower() # eliminating case sensitivity
     shape = shape.lower()
     shape = "rectangles" if shape == "overlapping rectangles" else shape
     if shape == "circle":
@@ -155,7 +157,7 @@ def cerad_drawn_score(
         else:
             reference_text = f"Image is expected to show within it a drawing of a {shape}.\n\n"
-        if model_source == "OpenAI" and valid_image:
+        if model_source == "openai" and valid_image:
             prompt = [
                 {
                     "type": "text",
@@ -185,7 +187,7 @@ def cerad_drawn_score(
                 "image_url": {"url": encoded_image, "detail": "high"}
             })
-        elif model_source == "Anthropic" and valid_image:
+        elif model_source == "anthropic" and valid_image:
             prompt = [
                 {
                     "type": "text",
@@ -225,7 +227,7 @@ def cerad_drawn_score(
             }
             )
-        elif model_source == "Mistral" and valid_image:
+        elif model_source == "mistral" and valid_image:
             prompt = [
                 {
                     "type": "text",
@@ -254,7 +256,7 @@ def cerad_drawn_score(
                 "image_url": f"data:image/{ext};base64,{encoded_image}"
             })
-        if model_source == "OpenAI" and valid_image:
+        if model_source == "openai" and valid_image:
             from openai import OpenAI
             client = OpenAI(api_key=api_key)
             try:
@@ -272,7 +274,7 @@ def cerad_drawn_score(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Anthropic"  and valid_image:
+        elif model_source == "anthropic"  and valid_image:
             import anthropic
             client = anthropic.Anthropic(api_key=api_key)
             try:
@@ -291,7 +293,7 @@ def cerad_drawn_score(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Mistral"  and valid_image:
+        elif model_source == "mistral"  and valid_image:
             from mistralai import Mistral
             reply = None
             client = Mistral(api_key=api_key)

catllm/__about__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
 #
 # SPDX-License-Identifier: MIT
-__version__ = "0.0.62"
+__version__ = "0.0.64"
 __author__ = "Chris Soria"
 __email__ = "chrissoria@berkeley.edu"
 __title__ = "cat-llm"

catllm/build_web_research.py CHANGED Viewed

@@ -5,39 +5,78 @@ def build_web_research_dataset(
     api_key,
     answer_format = "concise",
     additional_instructions = "",
-    categories = ['Answer','URL'],
+    categories = ['Answer'],
     user_model="claude-sonnet-4-20250514",
     creativity=None,
     safety=False,
     filename="categorized_data.csv",
     save_directory=None,
     model_source="Anthropic",
-    time_delay=15
+    start_date=None,
+    end_date=None,
+    search_depth="", #enables Tavily searches
+    tavily_api=None,
+    output_urls = True,
+    max_retries = 6, #API rate limit error handler retries
+    time_delay=5
 ):
     import os
+    import re
     import json
     import pandas as pd
     import regex
     from tqdm import tqdm
     import time
+    from datetime import datetime
+    #ensures proper date format
+    def _validate_date(date_str):
+        """Validates YYYY-MM-DD format"""
+        if date_str is None:
+            return True  # None is acceptable (means no date constraint)
+        if not isinstance(date_str, str):
+            return False
+        # Check pattern: YYYY_MM_DD
+        pattern = r'^\d{4}-\d{2}-\d{2}$'
+        if not re.match(pattern, date_str):
+            return False
+        # Validate actual date
+        try:
+            year, month, day = date_str.split('-')
+            datetime(int(year), int(month), int(day))
+            return True
+        except (ValueError, OverflowError):
+            return False
+    # Validate dates at the start of the function
+    if not _validate_date(start_date):
+        raise ValueError(f"start_date must be in YYYY-MM-DD format, got: {start_date}")
+    if not _validate_date(end_date):
+        raise ValueError(f"end_date must be in YYYY-MM-DD format, got: {end_date}")
     model_source = model_source.lower() # eliminating case sensitivity
+    if model_source == "perplexity" and start_date is not None:
+        start_date = datetime.strptime(start_date, "%Y-%m-%d").strftime("%m/%d/%Y")
+    if model_source == "perplexity" and end_date is not None:
+        end_date = datetime.strptime(end_date, "%Y-%m-%d").strftime("%m/%d/%Y")
+    # in case user switches to google but doesn't switch model
+    if model_source == "google" and user_model == "claude-sonnet-4-20250514":
+        user_model = "gemini-2.5-flash"
     categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
-    print(categories_str)
     cat_num = len(categories)
     category_dict = {str(i+1): "0" for i in range(cat_num)}
     example_JSON = json.dumps(category_dict, indent=4)
-    # ensure number of categories is what user wants
-    #print("\nThe information to be extracted:")
-    #for i, cat in enumerate(categories, 1):
-        #print(f"{i}. {cat}")
     link1 = []
     extracted_jsons = []
-    max_retries = 5 #API rate limit error handler retries
+    extracted_urls = []
     for idx, item in enumerate(tqdm(search_input, desc="Building dataset")):
         if idx > 0:  # Skip delay for first item only
@@ -46,9 +85,9 @@ def build_web_research_dataset(
         if pd.isna(item):
             link1.append("Skipped NaN input")
+            extracted_urls.append([])
             default_json = example_JSON
             extracted_jsons.append(default_json)
-            #print(f"Skipped NaN input.")
         else:
             prompt = f"""<role>You are a research assistant specializing in finding current, factual information.</role>
@@ -59,7 +98,6 @@ def build_web_research_dataset(
             - Provide your answer as {answer_format}
             - Prioritize official sources when possible
             - If information is not found, state "Information not found"
-            - Include exactly one source URL where you found the information
             - Do not include any explanatory text or commentary beyond the JSON
                 {additional_instructions}
             </rules>
@@ -68,14 +106,74 @@ def build_web_research_dataset(
             Return your response as valid JSON with this exact structure:
             {{
             "answer": "Your factual answer or 'Information not found'",
-            "url": "Source URL or 'No source available'"
+            "second_best_answer": "Your second best factual answer or 'Information not found'",
+            "confidence": "confidence in response 0-5 or 'Information not found'"
         }}
         </format>"""
-            #print(prompt)
-            if model_source == "anthropic":
+            if start_date is not None and end_date is not None:
+                append_text = f"\n- Focus on webpages with a page age between {start_date} and {end_date}."
+                prompt = prompt.replace("<rules>", "<rules>" + append_text)
+            elif start_date is not None:
+                append_text = f"\n- Focus on webpages published after {start_date}."
+                prompt = prompt.replace("<rules>", "<rules>" + append_text)
+            elif end_date is not None:
+                append_text = f"\n- Focus on webpages published before {end_date}."
+                prompt = prompt.replace("<rules>", "<rules>" + append_text)
+            if search_depth == "advanced" and model_source != "perplexity":
+                try:
+                    from tavily import TavilyClient
+                    tavily_client = TavilyClient(tavily_api)
+                    tavily_response = tavily_client.search(
+                        query=f"{item}'s {search_question}",
+                        include_answer=True,
+                        max_results=15,
+                        search_depth="advanced",
+                        **({"start_date": start_date} if start_date is not None else {}),
+                        **({"end_date": end_date} if end_date is not None else {})
+                    )
+                    urls = [
+                        result['url']
+                        for result in tavily_response.get('results', [])
+                        if 'url' in result
+                    ]
+                    seen = set()
+                    urls = [u for u in urls if not (u in seen or seen.add(u))]
+                    extracted_urls.append(urls)
+                except Exception as e:
+                    error_msg = str(e).lower()
+                    if "unauthorized" in error_msg or "403" in error_msg or "401" in error_msg or "api_key" in error_msg:
+                        raise ValueError("ERROR: Invalid or missing tavily_api required for advanced search. Get one at https://app.tavily.com/home. To install: pip install tavily-python") from e
+                    else:
+                        print(f"Tavily search error: {e}")
+                        link1.append(f"Error with Tavily search: {e}")
+                        extracted_urls.append([])
+                        continue
+                #print(tavily_response)
+                advanced_prompt = f"""Based on the following search results about {item}'s {search_question}, provide your answer in this EXACT JSON format and {answer_format}:
+                If you can't find the information, respond with 'Information not found'.
+                {{"answer": "your answer here or 'Information not found'",
+                "second_best_answer": "your second best answer here or 'Information not found'",
+                "confidence": "confidence in response 0-5 or 'Information not found'"}}
+                Search results:
+                {tavily_response}
+                Additional context from sources:
+                {chr(10).join([f"- {r.get('title', '')}: {r.get('content', '')}" for r in tavily_response.get('results', [])[:3]])}
+                Return ONLY the JSON object, no other text."""
+            if model_source == "anthropic" and search_depth != "advanced":
                 import anthropic
                 client = anthropic.Anthropic(api_key=api_key)
+                #print(prompt)
                 attempt = 0
                 while attempt < max_retries:
                     try:
@@ -95,8 +193,21 @@ def build_web_research_dataset(
                             if getattr(block, "type", "") == "text"
                         ).strip()
                         link1.append(reply)
+                        urls = [
+                            item["url"]
+                            for block in message.content
+                            if getattr(block, "type", "") == "web_search_tool_result"
+                            for item in (getattr(block, "content", []) or [])
+                            if isinstance(item, dict) and item.get("type") == "web_search_result" and "url" in item
+                        ]
+                        seen = set()
+                        urls = [u for u in urls if not (u in seen or seen.add(u))]
+                        extracted_urls.append(urls)
                         break
-                    except anthropic.error.RateLimitError as e:
+                    except anthropic.RateLimitError as e:
                         wait_time = 2 ** attempt  # Exponential backoff, keeps doubling after each attempt
                         print(f"Rate limit error encountered. Retrying in {wait_time} seconds...")
                         time.sleep(wait_time) #in case user wants to try and buffer the amount of errors by adding a wait time before attemps
@@ -104,11 +215,59 @@ def build_web_research_dataset(
                     except Exception as e:
                         print(f"A Non-rate-limit error occurred: {e}")
                         link1.append(f"Error processing input: {e}")
+                        extracted_urls.append([])
                         break #stop retrying
                 else:
                     link1.append("Max retries exceeded for rate limit errors.")
+                    extracted_urls.append([])
-            elif model_source == "google":
+            elif model_source == "anthropic" and search_depth == "advanced":
+                import anthropic
+                claude_client = anthropic.Anthropic(api_key=api_key)
+                attempt = 0
+                while attempt < max_retries:
+                    try:
+                        message = claude_client.messages.create(
+                            model=user_model,
+                            max_tokens=1024,
+                            messages=[{"role": "user", "content": advanced_prompt}],
+                            **({"temperature": creativity} if creativity is not None else {})
+                            )
+                        reply = " ".join(
+                            block.text
+                            for block in message.content
+                            if getattr(block, "type", "") == "text"
+                            ).strip()
+                        try:
+                            import json
+                            json_response = json.loads(reply)
+                            final_answer = json_response.get('answer', reply)
+                            link1.append(final_answer)
+                        except json.JSONDecodeError:
+                            print(f"JSON parse error, using raw reply: {reply}")
+                            link1.append(reply)
+                        break  # Success
+                    except anthropic.RateLimitError as e:
+                        wait_time = 2 ** attempt
+                        print(f"Rate limit error encountered. Retrying in {wait_time} seconds...")
+                        time.sleep(wait_time)
+                        attempt += 1
+                    except Exception as e:
+                        print(f"A Non-rate-limit error occurred: {e}")
+                        link1.append(f"Error processing input: {e}")
+                        break
+                else:
+                    # Max retries exceeded
+                    link1.append("Max retries exceeded for rate limit errors.")
+            elif model_source == "google" and search_depth != "advanced":
                 import requests
                 url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
                 try:
@@ -121,11 +280,62 @@ def build_web_research_dataset(
                         "tools": [{"google_search": {}}],
                         **({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
                     }
                     response = requests.post(url, headers=headers, json=payload)
                     response.raise_for_status()
                     result = response.json()
+                    urls = []
+                    for cand in result.get("candidates", []):
+                        rendered_html = (
+                            cand.get("groundingMetadata", {})
+                            .get("searchEntryPoint", {})
+                            .get("renderedContent", "")
+                        )
+                        if rendered_html:
+                        # regex: capture href="..."; limited to class="chip"
+                            found = re.findall(
+                                r'<a[^>]*class=["\']chip["\'][^>]*href=["\']([^"\']+)["\']',
+                                rendered_html,
+                                flags=re.IGNORECASE
+                            )
+                            urls.extend(found)
+                    seen = set()
+                    urls = [u for u in urls if not (u in seen or seen.add(u))]
+                    extracted_urls.append(urls)
+                    # extract reply from Google's response structure
+                    if "candidates" in result and result["candidates"]:
+                        reply = result["candidates"][0]["content"]["parts"][0]["text"]
+                    else:
+                        reply = "No response generated"
+                    link1.append(reply)
+                except Exception as e:
+                    print(f"An error occurred: {e}")
+                    link1.append(f"Error processing input: {e}")
+                    extracted_urls.append([])
+            elif model_source == "google" and search_depth == "advanced":
+                import requests
+                url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
+                try:
+                    headers = {
+                        "x-goog-api-key": api_key,
+                        "Content-Type": "application/json"
+                        }
+                    payload = {
+                        "contents": [{"parts": [{"text": advanced_prompt}]}],
+                        **({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
+                        }
+                    response = requests.post(url, headers=headers, json=payload)
+                    response.raise_for_status()
+                    result = response.json()
                     # extract reply from Google's response structure
                     if "candidates" in result and result["candidates"]:
                         reply = result["candidates"][0]["content"]["parts"][0]["text"]
@@ -138,6 +348,54 @@ def build_web_research_dataset(
                     print(f"An error occurred: {e}")
                     link1.append(f"Error processing input: {e}")
+            elif model_source == "perplexity":
+                from perplexity import Perplexity
+                client = Perplexity(api_key=api_key)
+                try:
+                    response = client.chat.completions.create(
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": prompt
+                            }
+                        ],
+                        model=user_model,
+                        max_tokens=1024,
+                        **({"temperature": creativity} if creativity is not None else {}),
+                        web_search_options={"search_context_size": "high" if search_depth == "advanced" else "medium"},
+                        **({"search_after_date_filter": start_date} if start_date else {}),
+                        **({"search_before_date_filter": end_date} if end_date else {}),
+                        response_format={ #requiring a JSON
+                        "type": "json_schema",
+                        "json_schema": {
+                            "schema": {
+                                "type": "object",
+                                "properties": {
+                                    "answer": {"type": "string"},
+                                    "second_best_answer": {"type": "string"},
+                                    "confidence": {"type": "integer"}
+                            },
+                            "required": ["answer", "second_best_answer"]
+                        }
+                    }
+                }
+            )
+                    reply = response.choices[0].message.content
+                    #print(response)
+                    link1.append(reply)
+                    urls = list(response.citations) if hasattr(response, 'citations') else []
+                    seen = set()
+                    urls = [u for u in urls if not (u in seen or seen.add(u))]
+                    extracted_urls.append(urls)
+                except Exception as e:
+                    print(f"An error occurred: {e}")
+                    link1.append(f"Error processing input: {e}")
+                    extracted_urls.append([])
             else:
                 raise ValueError("Unknown source! Currently this function only supports 'Anthropic' or 'Google' as model_source.")
             # in situation that no JSON is found
@@ -157,12 +415,12 @@ def build_web_research_dataset(
                         extracted_jsons.append(raw_json)
                 else:
                     # Use consistent schema for errors
-                    error_message = json.dumps({"answer": "e", "url": "e"})
+                    error_message = json.dumps({"answer": "e"})
                     extracted_jsons.append(error_message)
                     print(error_message)
             else:
                 # Handle None reply case
-                error_message = json.dumps({"answer": "e", "url": "e"})
+                error_message = json.dumps({"answer": "e"})
                 extracted_jsons.append(error_message)
                 #print(error_message)
@@ -170,9 +428,9 @@ def build_web_research_dataset(
         if safety:
             # Save progress so far
             temp_df = pd.DataFrame({
-                'survey_response': search_input[:idx+1],
-                'model_response': link1,
-                'json': extracted_jsons
+                'raw_response': search_input[:idx+1]
+                #'model_response': link1,
+                #'json': extracted_jsons
             })
             # Normalize processed jsons so far
             normalized_data_list = []
@@ -181,9 +439,11 @@ def build_web_research_dataset(
                     parsed_obj = json.loads(json_str)
                     normalized_data_list.append(pd.json_normalize(parsed_obj))
                 except json.JSONDecodeError:
-                    normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
+                    normalized_data_list.append(pd.DataFrame({"answer": ["e"]}))
             normalized_data = pd.concat(normalized_data_list, ignore_index=True)
+            temp_urls = pd.DataFrame(extracted_urls).add_prefix("url_")
             temp_df = pd.concat([temp_df, normalized_data], axis=1)
+            temp_df = pd.concat([temp_df, temp_urls], axis=1)
             # Save to CSV
             if save_directory is None:
                 save_directory = os.getcwd()
@@ -196,17 +456,37 @@ def build_web_research_dataset(
             parsed_obj = json.loads(json_str)
             normalized_data_list.append(pd.json_normalize(parsed_obj))
         except json.JSONDecodeError:
-            normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
+            normalized_data_list.append(pd.DataFrame({"answer": ["e"]}))
     normalized_data = pd.concat(normalized_data_list, ignore_index=True)
+    # converting urls to dataframe and adding prefix
+    df_urls = pd.DataFrame(extracted_urls).add_prefix("url_")
     categorized_data = pd.DataFrame({
-        'survey_response': (
+        'search_input': (
             search_input.reset_index(drop=True) if isinstance(search_input, (pd.DataFrame, pd.Series))
             else pd.Series(search_input)
         ),
-        'link1': pd.Series(link1).reset_index(drop=True),
-        'json': pd.Series(extracted_jsons).reset_index(drop=True)
+        'raw_response': pd.Series(link1).reset_index(drop=True),
+        #'json': pd.Series(extracted_jsons).reset_index(drop=True),
+        #"all_urls": pd.Series(extracted_urls).reset_index(drop=True)
     })
     categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
+    categorized_data = pd.concat([categorized_data, df_urls], axis=1)
+    # drop second best answer column if it exists
+    # we only ask for the second best answer to "force" the model to think more carefully about its best answer, but we don't actually need to keep it
+    categorized_data = categorized_data.drop(columns=["second_best_answer"], errors='ignore')
+    # dropping this column for advanced searches (this column is mostly useful for basic searches to see what the model saw)
+    if search_depth == "advanced":
+        categorized_data = categorized_data.drop(columns=["raw_response"], errors='ignore')
+    #for users who don't want the urls included in the final dataframe
+    if output_urls is False:
+        categorized_data = categorized_data.drop(columns=[col for col in categorized_data.columns if col.startswith("url_")])
+    if save_directory is not None:
+        categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
     return categorized_data

catllm/image_functions.py CHANGED Viewed

@@ -33,6 +33,8 @@ def image_multi_class(
     '*.psd'
     ]
+    model_source = model_source.lower() # eliminating case sensitivity
     if not isinstance(image_input, list):
         # If image_input is a filepath (string)
         image_files = []
@@ -86,7 +88,7 @@ def image_multi_class(
     # Handle extension safely
         ext = Path(img_path).suffix.lstrip(".").lower()
-        if model_source == "OpenAI" or model_source == "Mistral":
+        if model_source == "openai" or model_source == "mistral":
             encoded_image = f"data:image/{ext};base64,{encoded}"
             prompt = [
                 {
@@ -110,7 +112,7 @@ def image_multi_class(
                 },
             ]
-        elif model_source == "Anthropic":
+        elif model_source == "anthropic":
             encoded_image = f"data:image/{ext};base64,{encoded}"
             prompt = [
                 {"type": "text",
@@ -136,7 +138,7 @@ def image_multi_class(
                     }
                 }
             ]
-        if model_source == "OpenAI":
+        if model_source == "openAI":
             from openai import OpenAI
             client = OpenAI(api_key=api_key)
             try:
@@ -154,7 +156,7 @@ def image_multi_class(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Anthropic":
+        elif model_source == "anthropic":
             import anthropic
             reply = None
             client = anthropic.Anthropic(api_key=api_key)
@@ -174,7 +176,7 @@ def image_multi_class(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Mistral":
+        elif model_source == "mistral":
             from mistralai import Mistral
             client = Mistral(api_key=api_key)
             try:
@@ -305,6 +307,8 @@ def image_score_drawing(
     '*.psd'
     ]
+    model_source = model_source.lower() # eliminating case sensitivity
     if not isinstance(image_input, list):
         # If image_input is a filepath (string)
         image_files = []
@@ -354,7 +358,7 @@ def image_score_drawing(
         ext = Path(img_path).suffix.lstrip(".").lower()
         encoded_image = f"data:image/{ext};base64,{encoded}"
-        if model_source == "OpenAI" or model_source == "Mistral":
+        if model_source == "openai" or model_source == "mistral":
             prompt = [
                 {
                     "type": "text",
@@ -390,7 +394,7 @@ def image_score_drawing(
                 }
             ]
-        elif model_source == "Anthropic":  # Changed to elif
+        elif model_source == "anthropic":  # Changed to elif
             prompt = [
                 {
                     "type": "text",
@@ -435,7 +439,7 @@ def image_score_drawing(
             ]
-        if model_source == "OpenAI":
+        if model_source == "openai":
             from openai import OpenAI
             client = OpenAI(api_key=api_key)
             try:
@@ -453,7 +457,7 @@ def image_score_drawing(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Anthropic":
+        elif model_source == "anthropic":
             import anthropic
             client = anthropic.Anthropic(api_key=api_key)
             try:
@@ -472,7 +476,7 @@ def image_score_drawing(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Mistral":
+        elif model_source == "mistral":
             from mistralai import Mistral
             client = Mistral(api_key=api_key)
             try:
@@ -598,6 +602,8 @@ def image_features(
     '*.psd'
     ]
+    model_source = model_source.lower() # eliminating case sensitivity
     if not isinstance(image_input, list):
         # If image_input is a filepath (string)
         image_files = []
@@ -644,7 +650,7 @@ def image_features(
             encoded_image = f"data:image/{ext};base64,{encoded}"
             valid_image = True
-        if model_source == "OpenAI" or model_source == "Mistral":
+        if model_source == "openai" or model_source == "mistral":
             prompt = [
                 {
                     "type": "text",
@@ -674,7 +680,7 @@ def image_features(
                             "image_url": {"url": encoded_image, "detail": "high"},
                             },
             ]
-        elif model_source == "Anthropic":
+        elif model_source == "anthropic":
             prompt = [
                 {
                     "type": "text",
@@ -708,7 +714,7 @@ def image_features(
                     }
                 }
             ]
-        if model_source == "OpenAI":
+        if model_source == "openai":
             from openai import OpenAI
             client = OpenAI(api_key=api_key)
             try:
@@ -726,7 +732,7 @@ def image_features(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Perplexity":
+        elif model_source == "perplexity":
             from openai import OpenAI
             client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
             try:
@@ -744,7 +750,7 @@ def image_features(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Anthropic":
+        elif model_source == "anthropic":
             import anthropic
             client = anthropic.Anthropic(api_key=api_key)
             try:
@@ -763,7 +769,7 @@ def image_features(
                     print("An error occurred: {e}")
                     link1.append("Error processing input: {e}")
-        elif model_source == "Mistral":
+        elif model_source == "mistral":
             from mistralai import Mistral
             client = Mistral(api_key=api_key)
             try:

catllm/text_functions.py CHANGED Viewed

@@ -22,6 +22,8 @@ def explore_corpus(
     print(f"Exploring class for question: '{survey_question}'.\n          {cat_num * divisions} unique categories to be extracted.")
     print()
+    model_source = model_source.lower() # eliminating case sensitivity
     chunk_size = round(max(1, len(survey_input) / divisions),0)
     chunk_size = int(chunk_size)
@@ -46,7 +48,7 @@ Responses are each separated by a semicolon. \
 Responses are contained within triple backticks here: ```{survey_participant_chunks}``` \
 Number your categories from 1 through {cat_num} and be concise with the category labels and provide no description of the categories."""
-        if model_source == "OpenAI":
+        if model_source == "openai":
             client = OpenAI(api_key=api_key)
             try:
                 response_obj = client.chat.completions.create(
@@ -123,6 +125,8 @@ def explore_common_categories(
     print(f"Exploring class for question: '{survey_question}'.\n          {cat_num * divisions} unique categories to be extracted and {top_n} to be identified as the most common.")
     print()
+    model_source = model_source.lower() # eliminating case sensitivity
     chunk_size = round(max(1, len(survey_input) / divisions),0)
     chunk_size = int(chunk_size)
@@ -147,7 +151,7 @@ Responses are each separated by a semicolon. \
 Responses are contained within triple backticks here: ```{survey_participant_chunks}``` \
 Number your categories from 1 through {cat_num} and be concise with the category labels and provide no description of the categories."""
-        if model_source == "OpenAI":
+        if model_source == "openai":
             client = OpenAI(api_key=api_key)
             try:
                 response_obj = client.chat.completions.create(
@@ -198,7 +202,7 @@ Number your categories from 1 through {cat_num} and be concise with the category
 The categories are contained within triple backticks here: ```{df['Category'].tolist()}``` \
 Return the top {top_n} categories as a numbered list sorted from the most to least common and keep the categories {specificity}, with no additional text or explanation."""
-    if model_source == "OpenAI":
+    if model_source == "openai":
         client = OpenAI(api_key=api_key)
         response_obj = client.chat.completions.create(
             model=user_model,
@@ -237,6 +241,8 @@ def multi_class(
     import pandas as pd
     import regex
     from tqdm import tqdm
+    model_source = model_source.lower() # eliminating case sensitivity
     categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
     cat_num = len(categories)
@@ -265,7 +271,7 @@ Categorize this survey response "{response}" into the following categories that
 {categories_str} \
 Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
             #print(prompt)
-            if model_source == ("OpenAI"):
+            if model_source == ("openai"):
                 from openai import OpenAI
                 client = OpenAI(api_key=api_key)
                 try:
@@ -279,7 +285,7 @@ Provide your work in JSON format where the number belonging to each category is
                 except Exception as e:
                     print(f"An error occurred: {e}")
                     link1.append(f"Error processing input: {e}")
-            elif model_source == "Perplexity":
+            elif model_source == "perplexity":
                 from openai import OpenAI
                 client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
                 try:
@@ -293,7 +299,7 @@ Provide your work in JSON format where the number belonging to each category is
                 except Exception as e:
                     print(f"An error occurred: {e}")
                     link1.append(f"Error processing input: {e}")
-            elif model_source == "Anthropic":
+            elif model_source == "anthropic":
                 import anthropic
                 client = anthropic.Anthropic(api_key=api_key)
                 try:
@@ -309,7 +315,7 @@ Provide your work in JSON format where the number belonging to each category is
                     print(f"An error occurred: {e}")
                     link1.append(f"Error processing input: {e}")
-            elif model_source == "Google":
+            elif model_source == "google":
                 import requests
                 url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
                 try:
@@ -339,7 +345,7 @@ Provide your work in JSON format where the number belonging to each category is
                     print(f"An error occurred: {e}")
                     link1.append(f"Error processing input: {e}")
-            elif model_source == "Mistral":
+            elif model_source == "mistral":
                 from mistralai import Mistral
                 client = Mistral(api_key=api_key)
                 try:

cat_llm-0.0.62.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-catllm/CERAD_functions.py,sha256=05n7h27TuAp3klkOnrH--m1wMreYqYuObM9NIab934o,22603
-catllm/__about__.py,sha256=R0Mt1NOAMAQCF7SHD4XDl2P4gF92EnfjYXaJ1Xo0vdc,408
-catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
-catllm/build_web_research.py,sha256=nAKfkg7lihjXrYrLvltsKCvpb5zRFYpNp95A-0zpDb8,9159
-catllm/image_functions.py,sha256=KDb2UxDLrioerlqKZjKAX7lqfW-S_TSQCK6YxtJRKwI,35958
-catllm/text_functions.py,sha256=xfpwAYivnPnDlsU21vp1Pma9mDR24tn1lcBZQfsyIrc,18467
-catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
-catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
-catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
-catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
-catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
-cat_llm-0.0.62.dist-info/METADATA,sha256=jstvau7l_g2KqYSheIcZJxC8DX2Bf_lA_wLNzPO5-qw,22395
-cat_llm-0.0.62.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-cat_llm-0.0.62.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
-cat_llm-0.0.62.dist-info/RECORD,,

{cat_llm-0.0.62.dist-info → cat_llm-0.0.64.dist-info}/WHEEL RENAMED Viewed

File without changes

{cat_llm-0.0.62.dist-info → cat_llm-0.0.64.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

cat-llm 0.0.62__py3-none-any.whl → 0.0.64__py3-none-any.whl

cat-llm 0.0.62py3-none-any.whl → 0.0.64py3-none-any.whl