cat-llm 0.0.61__py3-none-any.whl → 0.0.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.61.dist-info → cat_llm-0.0.62.dist-info}/METADATA +2 -1
- {cat_llm-0.0.61.dist-info → cat_llm-0.0.62.dist-info}/RECORD +6 -6
- catllm/__about__.py +1 -1
- catllm/build_web_research.py +38 -23
- {cat_llm-0.0.61.dist-info → cat_llm-0.0.62.dist-info}/WHEEL +0 -0
- {cat_llm-0.0.61.dist-info → cat_llm-0.0.62.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-llm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.62
|
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
|
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
20
20
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
21
21
|
Requires-Python: >=3.8
|
|
22
|
+
Requires-Dist: anthropic
|
|
22
23
|
Requires-Dist: openai
|
|
23
24
|
Requires-Dist: pandas
|
|
24
25
|
Requires-Dist: requests
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
catllm/CERAD_functions.py,sha256=05n7h27TuAp3klkOnrH--m1wMreYqYuObM9NIab934o,22603
|
|
2
|
-
catllm/__about__.py,sha256=
|
|
2
|
+
catllm/__about__.py,sha256=R0Mt1NOAMAQCF7SHD4XDl2P4gF92EnfjYXaJ1Xo0vdc,408
|
|
3
3
|
catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
|
|
4
|
-
catllm/build_web_research.py,sha256=
|
|
4
|
+
catllm/build_web_research.py,sha256=nAKfkg7lihjXrYrLvltsKCvpb5zRFYpNp95A-0zpDb8,9159
|
|
5
5
|
catllm/image_functions.py,sha256=KDb2UxDLrioerlqKZjKAX7lqfW-S_TSQCK6YxtJRKwI,35958
|
|
6
6
|
catllm/text_functions.py,sha256=xfpwAYivnPnDlsU21vp1Pma9mDR24tn1lcBZQfsyIrc,18467
|
|
7
7
|
catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
|
|
@@ -9,7 +9,7 @@ catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
|
|
|
9
9
|
catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
|
|
10
10
|
catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
|
|
11
11
|
catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
|
|
12
|
-
cat_llm-0.0.
|
|
13
|
-
cat_llm-0.0.
|
|
14
|
-
cat_llm-0.0.
|
|
15
|
-
cat_llm-0.0.
|
|
12
|
+
cat_llm-0.0.62.dist-info/METADATA,sha256=jstvau7l_g2KqYSheIcZJxC8DX2Bf_lA_wLNzPO5-qw,22395
|
|
13
|
+
cat_llm-0.0.62.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
+
cat_llm-0.0.62.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
|
|
15
|
+
cat_llm-0.0.62.dist-info/RECORD,,
|
catllm/__about__.py
CHANGED
catllm/build_web_research.py
CHANGED
|
@@ -20,6 +20,8 @@ def build_web_research_dataset(
|
|
|
20
20
|
import regex
|
|
21
21
|
from tqdm import tqdm
|
|
22
22
|
import time
|
|
23
|
+
|
|
24
|
+
model_source = model_source.lower() # eliminating case sensitivity
|
|
23
25
|
|
|
24
26
|
categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
|
|
25
27
|
print(categories_str)
|
|
@@ -35,6 +37,8 @@ def build_web_research_dataset(
|
|
|
35
37
|
link1 = []
|
|
36
38
|
extracted_jsons = []
|
|
37
39
|
|
|
40
|
+
max_retries = 5 #API rate limit error handler retries
|
|
41
|
+
|
|
38
42
|
for idx, item in enumerate(tqdm(search_input, desc="Building dataset")):
|
|
39
43
|
if idx > 0: # Skip delay for first item only
|
|
40
44
|
time.sleep(time_delay)
|
|
@@ -68,32 +72,43 @@ def build_web_research_dataset(
|
|
|
68
72
|
}}
|
|
69
73
|
</format>"""
|
|
70
74
|
#print(prompt)
|
|
71
|
-
if model_source == "
|
|
75
|
+
if model_source == "anthropic":
|
|
72
76
|
import anthropic
|
|
73
77
|
client = anthropic.Anthropic(api_key=api_key)
|
|
74
|
-
try:
|
|
75
|
-
message = client.messages.create(
|
|
76
|
-
model=user_model,
|
|
77
|
-
max_tokens=1024,
|
|
78
|
-
messages=[{"role": "user", "content": prompt}],
|
|
79
|
-
**({"temperature": creativity} if creativity is not None else {}),
|
|
80
|
-
tools=[{
|
|
81
|
-
"type": "web_search_20250305",
|
|
82
|
-
"name": "web_search"
|
|
83
|
-
}]
|
|
84
|
-
)
|
|
85
|
-
reply = " ".join(
|
|
86
|
-
block.text
|
|
87
|
-
for block in message.content
|
|
88
|
-
if getattr(block, "type", "") == "text"
|
|
89
|
-
).strip()
|
|
90
|
-
link1.append(reply)
|
|
91
|
-
|
|
92
|
-
except Exception as e:
|
|
93
|
-
print(f"An error occurred: {e}")
|
|
94
|
-
link1.append(f"Error processing input: {e}")
|
|
95
78
|
|
|
96
|
-
|
|
79
|
+
attempt = 0
|
|
80
|
+
while attempt < max_retries:
|
|
81
|
+
try:
|
|
82
|
+
message = client.messages.create(
|
|
83
|
+
model=user_model,
|
|
84
|
+
max_tokens=1024,
|
|
85
|
+
messages=[{"role": "user", "content": prompt}],
|
|
86
|
+
**({"temperature": creativity} if creativity is not None else {}),
|
|
87
|
+
tools=[{
|
|
88
|
+
"type": "web_search_20250305",
|
|
89
|
+
"name": "web_search"
|
|
90
|
+
}]
|
|
91
|
+
)
|
|
92
|
+
reply = " ".join(
|
|
93
|
+
block.text
|
|
94
|
+
for block in message.content
|
|
95
|
+
if getattr(block, "type", "") == "text"
|
|
96
|
+
).strip()
|
|
97
|
+
link1.append(reply)
|
|
98
|
+
break
|
|
99
|
+
except anthropic.error.RateLimitError as e:
|
|
100
|
+
wait_time = 2 ** attempt # Exponential backoff, keeps doubling after each attempt
|
|
101
|
+
print(f"Rate limit error encountered. Retrying in {wait_time} seconds...")
|
|
102
|
+
time.sleep(wait_time) #in case user wants to try and buffer the amount of errors by adding a wait time before attemps
|
|
103
|
+
attempt += 1
|
|
104
|
+
except Exception as e:
|
|
105
|
+
print(f"A Non-rate-limit error occurred: {e}")
|
|
106
|
+
link1.append(f"Error processing input: {e}")
|
|
107
|
+
break #stop retrying
|
|
108
|
+
else:
|
|
109
|
+
link1.append("Max retries exceeded for rate limit errors.")
|
|
110
|
+
|
|
111
|
+
elif model_source == "google":
|
|
97
112
|
import requests
|
|
98
113
|
url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
|
|
99
114
|
try:
|
|
File without changes
|
|
File without changes
|