cat-llm 0.0.58__tar.gz → 0.0.60__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.58 → cat_llm-0.0.60}/PKG-INFO +1 -1
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/CERAD_functions.py +5 -5
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/__about__.py +2 -2
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/build_web_research.py +35 -5
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/image_functions.py +16 -16
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/text_functions.py +11 -10
- {cat_llm-0.0.58 → cat_llm-0.0.60}/.gitignore +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/LICENSE +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/README.md +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/pyproject.toml +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/__init__.py +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/images/circle.png +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/images/cube.png +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/images/diamond.png +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/images/overlapping_pentagons.png +0 -0
- {cat_llm-0.0.58 → cat_llm-0.0.60}/src/catllm/images/rectangles.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-llm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.60
|
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
|
@@ -27,7 +27,7 @@ def cerad_drawn_score(
|
|
|
27
27
|
image_input,
|
|
28
28
|
api_key,
|
|
29
29
|
user_model="gpt-4o",
|
|
30
|
-
creativity=
|
|
30
|
+
creativity=None,
|
|
31
31
|
reference_in_image=False,
|
|
32
32
|
provide_reference=False,
|
|
33
33
|
safety=False,
|
|
@@ -261,7 +261,7 @@ def cerad_drawn_score(
|
|
|
261
261
|
response_obj = client.chat.completions.create(
|
|
262
262
|
model=user_model,
|
|
263
263
|
messages=[{'role': 'user', 'content': prompt}],
|
|
264
|
-
temperature
|
|
264
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
265
265
|
)
|
|
266
266
|
reply = response_obj.choices[0].message.content
|
|
267
267
|
link1.append(reply)
|
|
@@ -279,8 +279,8 @@ def cerad_drawn_score(
|
|
|
279
279
|
message = client.messages.create(
|
|
280
280
|
model=user_model,
|
|
281
281
|
max_tokens=1024,
|
|
282
|
-
|
|
283
|
-
|
|
282
|
+
messages=[{"role": "user", "content": prompt}],
|
|
283
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
284
284
|
)
|
|
285
285
|
reply = message.content[0].text # Anthropic returns content as list
|
|
286
286
|
link1.append(reply)
|
|
@@ -301,7 +301,7 @@ def cerad_drawn_score(
|
|
|
301
301
|
messages=[
|
|
302
302
|
{'role': 'user', 'content': prompt}
|
|
303
303
|
],
|
|
304
|
-
temperature
|
|
304
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
305
305
|
)
|
|
306
306
|
reply = response.choices[0].message.content
|
|
307
307
|
link1.append(reply)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
|
-
__version__ = "0.0.
|
|
4
|
+
__version__ = "0.0.60"
|
|
5
5
|
__author__ = "Chris Soria"
|
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
|
7
7
|
__title__ = "cat-llm"
|
|
8
8
|
__description__ = "A tool for categorizing and exploring text data and images using LLMs and vision models"
|
|
9
9
|
__url__ = "https://github.com/chrissoria/cat-llm"
|
|
10
|
-
__license__ = "
|
|
10
|
+
__license__ = "GPL-3.0"
|
|
@@ -7,7 +7,7 @@ def build_web_research_dataset(
|
|
|
7
7
|
additional_instructions = "",
|
|
8
8
|
categories = ['Answer','URL'],
|
|
9
9
|
user_model="claude-sonnet-4-20250514",
|
|
10
|
-
creativity=
|
|
10
|
+
creativity=None,
|
|
11
11
|
safety=False,
|
|
12
12
|
filename="categorized_data.csv",
|
|
13
13
|
save_directory=None,
|
|
@@ -75,8 +75,8 @@ def build_web_research_dataset(
|
|
|
75
75
|
message = client.messages.create(
|
|
76
76
|
model=user_model,
|
|
77
77
|
max_tokens=1024,
|
|
78
|
-
temperature=creativity,
|
|
79
78
|
messages=[{"role": "user", "content": prompt}],
|
|
79
|
+
**({"temperature": creativity} if creativity is not None else {}),
|
|
80
80
|
tools=[{
|
|
81
81
|
"type": "web_search_20250305",
|
|
82
82
|
"name": "web_search"
|
|
@@ -88,13 +88,43 @@ def build_web_research_dataset(
|
|
|
88
88
|
if getattr(block, "type", "") == "text"
|
|
89
89
|
).strip()
|
|
90
90
|
link1.append(reply)
|
|
91
|
-
print(reply)
|
|
92
91
|
|
|
93
92
|
except Exception as e:
|
|
94
93
|
print(f"An error occurred: {e}")
|
|
95
94
|
link1.append(f"Error processing input: {e}")
|
|
95
|
+
|
|
96
|
+
elif model_source == "Google":
|
|
97
|
+
import requests
|
|
98
|
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
|
|
99
|
+
try:
|
|
100
|
+
headers = {
|
|
101
|
+
"x-goog-api-key": api_key,
|
|
102
|
+
"Content-Type": "application/json"
|
|
103
|
+
}
|
|
104
|
+
payload = {
|
|
105
|
+
"contents": [{"parts": [{"text": prompt}]}],
|
|
106
|
+
"tools": [{"google_search": {}}],
|
|
107
|
+
**({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
111
|
+
response.raise_for_status()
|
|
112
|
+
result = response.json()
|
|
113
|
+
|
|
114
|
+
# extract reply from Google's response structure
|
|
115
|
+
if "candidates" in result and result["candidates"]:
|
|
116
|
+
reply = result["candidates"][0]["content"]["parts"][0]["text"]
|
|
117
|
+
else:
|
|
118
|
+
reply = "No response generated"
|
|
119
|
+
|
|
120
|
+
link1.append(reply)
|
|
121
|
+
|
|
122
|
+
except Exception as e:
|
|
123
|
+
print(f"An error occurred: {e}")
|
|
124
|
+
link1.append(f"Error processing input: {e}")
|
|
125
|
+
|
|
96
126
|
else:
|
|
97
|
-
raise ValueError("Unknown source! Currently this function only supports 'Anthropic' as model_source.")
|
|
127
|
+
raise ValueError("Unknown source! Currently this function only supports 'Anthropic' or 'Google' as model_source.")
|
|
98
128
|
# in situation that no JSON is found
|
|
99
129
|
if reply is not None:
|
|
100
130
|
extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
|
|
@@ -126,7 +156,7 @@ def build_web_research_dataset(
|
|
|
126
156
|
# Save progress so far
|
|
127
157
|
temp_df = pd.DataFrame({
|
|
128
158
|
'survey_response': search_input[:idx+1],
|
|
129
|
-
'
|
|
159
|
+
'model_response': link1,
|
|
130
160
|
'json': extracted_jsons
|
|
131
161
|
})
|
|
132
162
|
# Normalize processed jsons so far
|
|
@@ -5,7 +5,7 @@ def image_multi_class(
|
|
|
5
5
|
categories,
|
|
6
6
|
api_key,
|
|
7
7
|
user_model="gpt-4o",
|
|
8
|
-
creativity=
|
|
8
|
+
creativity=None,
|
|
9
9
|
to_csv=False,
|
|
10
10
|
safety=False,
|
|
11
11
|
filename="categorized_data.csv",
|
|
@@ -143,7 +143,7 @@ def image_multi_class(
|
|
|
143
143
|
response_obj = client.chat.completions.create(
|
|
144
144
|
model=user_model,
|
|
145
145
|
messages=[{'role': 'user', 'content': prompt}],
|
|
146
|
-
temperature
|
|
146
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
147
147
|
)
|
|
148
148
|
reply = response_obj.choices[0].message.content
|
|
149
149
|
link1.append(reply)
|
|
@@ -162,8 +162,8 @@ def image_multi_class(
|
|
|
162
162
|
message = client.messages.create(
|
|
163
163
|
model=user_model,
|
|
164
164
|
max_tokens=1024,
|
|
165
|
-
|
|
166
|
-
|
|
165
|
+
messages=[{"role": "user", "content": prompt}],
|
|
166
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
167
167
|
)
|
|
168
168
|
reply = message.content[0].text
|
|
169
169
|
link1.append(reply)
|
|
@@ -183,7 +183,7 @@ def image_multi_class(
|
|
|
183
183
|
messages=[
|
|
184
184
|
{'role': 'user', 'content': prompt}
|
|
185
185
|
],
|
|
186
|
-
temperature
|
|
186
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
187
187
|
)
|
|
188
188
|
reply = response.choices[0].message.content
|
|
189
189
|
link1.append(reply)
|
|
@@ -277,7 +277,7 @@ def image_score_drawing(
|
|
|
277
277
|
api_key,
|
|
278
278
|
columns="numbered",
|
|
279
279
|
user_model="gpt-4o-2024-11-20",
|
|
280
|
-
creativity=
|
|
280
|
+
creativity=None,
|
|
281
281
|
to_csv=False,
|
|
282
282
|
safety=False,
|
|
283
283
|
filename="categorized_data.csv",
|
|
@@ -442,7 +442,7 @@ def image_score_drawing(
|
|
|
442
442
|
response_obj = client.chat.completions.create(
|
|
443
443
|
model=user_model,
|
|
444
444
|
messages=[{'role': 'user', 'content': prompt}],
|
|
445
|
-
temperature
|
|
445
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
446
446
|
)
|
|
447
447
|
reply = response_obj.choices[0].message.content
|
|
448
448
|
link1.append(reply)
|
|
@@ -460,8 +460,8 @@ def image_score_drawing(
|
|
|
460
460
|
message = client.messages.create(
|
|
461
461
|
model=user_model,
|
|
462
462
|
max_tokens=1024,
|
|
463
|
-
|
|
464
|
-
|
|
463
|
+
messages=[{"role": "user", "content": prompt}],
|
|
464
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
465
465
|
)
|
|
466
466
|
reply = message.content[0].text # Anthropic returns content as list
|
|
467
467
|
link1.append(reply)
|
|
@@ -481,7 +481,7 @@ def image_score_drawing(
|
|
|
481
481
|
messages=[
|
|
482
482
|
{'role': 'user', 'content': prompt}
|
|
483
483
|
],
|
|
484
|
-
temperature
|
|
484
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
485
485
|
)
|
|
486
486
|
reply = response.choices[0].message.content
|
|
487
487
|
link1.append(reply)
|
|
@@ -574,7 +574,7 @@ def image_features(
|
|
|
574
574
|
features_to_extract,
|
|
575
575
|
api_key,
|
|
576
576
|
user_model="gpt-4o-2024-11-20",
|
|
577
|
-
creativity=
|
|
577
|
+
creativity=None,
|
|
578
578
|
to_csv=False,
|
|
579
579
|
safety=False,
|
|
580
580
|
filename="categorized_data.csv",
|
|
@@ -715,7 +715,7 @@ def image_features(
|
|
|
715
715
|
response_obj = client.chat.completions.create(
|
|
716
716
|
model=user_model,
|
|
717
717
|
messages=[{'role': 'user', 'content': prompt}],
|
|
718
|
-
temperature
|
|
718
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
719
719
|
)
|
|
720
720
|
reply = response_obj.choices[0].message.content
|
|
721
721
|
link1.append(reply)
|
|
@@ -733,7 +733,7 @@ def image_features(
|
|
|
733
733
|
response_obj = client.chat.completions.create(
|
|
734
734
|
model=user_model,
|
|
735
735
|
messages=[{'role': 'user', 'content': prompt}],
|
|
736
|
-
temperature
|
|
736
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
737
737
|
)
|
|
738
738
|
reply = response_obj.choices[0].message.content
|
|
739
739
|
link1.append(reply)
|
|
@@ -751,8 +751,8 @@ def image_features(
|
|
|
751
751
|
message = client.messages.create(
|
|
752
752
|
model=user_model,
|
|
753
753
|
max_tokens=1024,
|
|
754
|
-
|
|
755
|
-
|
|
754
|
+
messages=[{"role": "user", "content": prompt}],
|
|
755
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
756
756
|
)
|
|
757
757
|
reply = message.content[0].text # Anthropic returns content as list
|
|
758
758
|
link1.append(reply)
|
|
@@ -772,7 +772,7 @@ def image_features(
|
|
|
772
772
|
messages=[
|
|
773
773
|
{'role': 'user', 'content': prompt}
|
|
774
774
|
],
|
|
775
|
-
temperature
|
|
775
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
776
776
|
)
|
|
777
777
|
reply = response.choices[0].message.content
|
|
778
778
|
link1.append(reply)
|
|
@@ -8,7 +8,7 @@ def explore_corpus(
|
|
|
8
8
|
cat_num=10,
|
|
9
9
|
divisions=5,
|
|
10
10
|
user_model="gpt-4o-2024-11-20",
|
|
11
|
-
creativity=
|
|
11
|
+
creativity=None,
|
|
12
12
|
filename="corpus_exploration.csv",
|
|
13
13
|
model_source="OpenAI"
|
|
14
14
|
):
|
|
@@ -57,7 +57,7 @@ Number your categories from 1 through {cat_num} and be concise with the category
|
|
|
57
57
|
The research question is: {research_question}""" if research_question else "You are a helpful assistant."},
|
|
58
58
|
{'role': 'user', 'content': prompt}
|
|
59
59
|
],
|
|
60
|
-
temperature
|
|
60
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
61
61
|
)
|
|
62
62
|
reply = response_obj.choices[0].message.content
|
|
63
63
|
responses.append(reply)
|
|
@@ -107,7 +107,7 @@ def explore_common_categories(
|
|
|
107
107
|
cat_num=10,
|
|
108
108
|
divisions=5,
|
|
109
109
|
user_model="gpt-4o",
|
|
110
|
-
creativity=
|
|
110
|
+
creativity=None,
|
|
111
111
|
specificity="broad",
|
|
112
112
|
research_question=None,
|
|
113
113
|
filename=None,
|
|
@@ -158,7 +158,7 @@ Number your categories from 1 through {cat_num} and be concise with the category
|
|
|
158
158
|
The research question is: {research_question}""" if research_question else "You are a helpful assistant."},
|
|
159
159
|
{'role': 'user', 'content': prompt}
|
|
160
160
|
],
|
|
161
|
-
temperature
|
|
161
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
162
162
|
)
|
|
163
163
|
reply = response_obj.choices[0].message.content
|
|
164
164
|
responses.append(reply)
|
|
@@ -225,7 +225,7 @@ def multi_class(
|
|
|
225
225
|
categories,
|
|
226
226
|
api_key,
|
|
227
227
|
user_model="gpt-4o",
|
|
228
|
-
creativity=
|
|
228
|
+
creativity=None,
|
|
229
229
|
safety=False,
|
|
230
230
|
to_csv=False,
|
|
231
231
|
filename="categorized_data.csv",
|
|
@@ -272,7 +272,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
272
272
|
response_obj = client.chat.completions.create(
|
|
273
273
|
model=user_model,
|
|
274
274
|
messages=[{'role': 'user', 'content': prompt}],
|
|
275
|
-
temperature
|
|
275
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
276
276
|
)
|
|
277
277
|
reply = response_obj.choices[0].message.content
|
|
278
278
|
link1.append(reply)
|
|
@@ -286,7 +286,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
286
286
|
response_obj = client.chat.completions.create(
|
|
287
287
|
model=user_model,
|
|
288
288
|
messages=[{'role': 'user', 'content': prompt}],
|
|
289
|
-
temperature
|
|
289
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
290
290
|
)
|
|
291
291
|
reply = response_obj.choices[0].message.content
|
|
292
292
|
link1.append(reply)
|
|
@@ -300,7 +300,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
300
300
|
message = client.messages.create(
|
|
301
301
|
model=user_model,
|
|
302
302
|
max_tokens=1024,
|
|
303
|
-
temperature
|
|
303
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
304
304
|
messages=[{"role": "user", "content": prompt}]
|
|
305
305
|
)
|
|
306
306
|
reply = message.content[0].text # Anthropic returns content as list
|
|
@@ -321,7 +321,8 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
321
321
|
payload = {
|
|
322
322
|
"contents": [{
|
|
323
323
|
"parts": [{"text": prompt}]
|
|
324
|
-
}]
|
|
324
|
+
}],
|
|
325
|
+
**({"generationConfig": {"temperature": creativity}} if creativity is not None else {})
|
|
325
326
|
}
|
|
326
327
|
|
|
327
328
|
response = requests.post(url, headers=headers, json=payload)
|
|
@@ -347,7 +348,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
347
348
|
messages=[
|
|
348
349
|
{'role': 'user', 'content': prompt}
|
|
349
350
|
],
|
|
350
|
-
temperature
|
|
351
|
+
**({"temperature": creativity} if creativity is not None else {})
|
|
351
352
|
)
|
|
352
353
|
reply = response.choices[0].message.content
|
|
353
354
|
link1.append(reply)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|