cat-llm 0.0.61__tar.gz → 0.0.62__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.61
3
+ Version: 0.0.62
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: Implementation :: CPython
20
20
  Classifier: Programming Language :: Python :: Implementation :: PyPy
21
21
  Requires-Python: >=3.8
22
+ Requires-Dist: anthropic
22
23
  Requires-Dist: openai
23
24
  Requires-Dist: pandas
24
25
  Requires-Dist: requests
@@ -28,7 +28,8 @@ dependencies = [
28
28
  "pandas",
29
29
  "tqdm",
30
30
  "requests",
31
- "openai"
31
+ "openai",
32
+ "anthropic"
32
33
  ]
33
34
 
34
35
  [project.urls]
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.61"
4
+ __version__ = "0.0.62"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
@@ -20,6 +20,8 @@ def build_web_research_dataset(
20
20
  import regex
21
21
  from tqdm import tqdm
22
22
  import time
23
+
24
+ model_source = model_source.lower() # eliminating case sensitivity
23
25
 
24
26
  categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
25
27
  print(categories_str)
@@ -35,6 +37,8 @@ def build_web_research_dataset(
35
37
  link1 = []
36
38
  extracted_jsons = []
37
39
 
40
+ max_retries = 5 #API rate limit error handler retries
41
+
38
42
  for idx, item in enumerate(tqdm(search_input, desc="Building dataset")):
39
43
  if idx > 0: # Skip delay for first item only
40
44
  time.sleep(time_delay)
@@ -68,32 +72,43 @@ def build_web_research_dataset(
68
72
  }}
69
73
  </format>"""
70
74
  #print(prompt)
71
- if model_source == "Anthropic":
75
+ if model_source == "anthropic":
72
76
  import anthropic
73
77
  client = anthropic.Anthropic(api_key=api_key)
74
- try:
75
- message = client.messages.create(
76
- model=user_model,
77
- max_tokens=1024,
78
- messages=[{"role": "user", "content": prompt}],
79
- **({"temperature": creativity} if creativity is not None else {}),
80
- tools=[{
81
- "type": "web_search_20250305",
82
- "name": "web_search"
83
- }]
84
- )
85
- reply = " ".join(
86
- block.text
87
- for block in message.content
88
- if getattr(block, "type", "") == "text"
89
- ).strip()
90
- link1.append(reply)
91
-
92
- except Exception as e:
93
- print(f"An error occurred: {e}")
94
- link1.append(f"Error processing input: {e}")
95
78
 
96
- elif model_source == "Google":
79
+ attempt = 0
80
+ while attempt < max_retries:
81
+ try:
82
+ message = client.messages.create(
83
+ model=user_model,
84
+ max_tokens=1024,
85
+ messages=[{"role": "user", "content": prompt}],
86
+ **({"temperature": creativity} if creativity is not None else {}),
87
+ tools=[{
88
+ "type": "web_search_20250305",
89
+ "name": "web_search"
90
+ }]
91
+ )
92
+ reply = " ".join(
93
+ block.text
94
+ for block in message.content
95
+ if getattr(block, "type", "") == "text"
96
+ ).strip()
97
+ link1.append(reply)
98
+ break
99
+ except anthropic.error.RateLimitError as e:
100
+ wait_time = 2 ** attempt # Exponential backoff, keeps doubling after each attempt
101
+ print(f"Rate limit error encountered. Retrying in {wait_time} seconds...")
102
+ time.sleep(wait_time) #in case user wants to try and buffer the amount of errors by adding a wait time before attemps
103
+ attempt += 1
104
+ except Exception as e:
105
+ print(f"A Non-rate-limit error occurred: {e}")
106
+ link1.append(f"Error processing input: {e}")
107
+ break #stop retrying
108
+ else:
109
+ link1.append("Max retries exceeded for rate limit errors.")
110
+
111
+ elif model_source == "google":
97
112
  import requests
98
113
  url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
99
114
  try:
File without changes
File without changes
File without changes