cat-llm 0.0.65__tar.gz → 0.0.67__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.65 → cat_llm-0.0.67}/PKG-INFO +1 -1
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/__about__.py +2 -2
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/text_functions.py +29 -7
- {cat_llm-0.0.65 → cat_llm-0.0.67}/.gitignore +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/LICENSE +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/README.md +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/pyproject.toml +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/CERAD_functions.py +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/__init__.py +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/build_web_research.py +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/image_functions.py +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/images/circle.png +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/images/cube.png +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/images/diamond.png +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/images/overlapping_pentagons.png +0 -0
- {cat_llm-0.0.65 → cat_llm-0.0.67}/src/catllm/images/rectangles.png +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cat-llm
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.67
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
@@ -1,10 +1,10 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MIT
|
4
|
-
__version__ = "0.0.
|
4
|
+
__version__ = "0.0.67"
|
5
5
|
__author__ = "Chris Soria"
|
6
6
|
__email__ = "chrissoria@berkeley.edu"
|
7
7
|
__title__ = "cat-llm"
|
8
|
-
__description__ = "
|
8
|
+
__description__ = "An AI tool for categorizing and exploring text data, images, and building high quality datasets from the web."
|
9
9
|
__url__ = "https://github.com/chrissoria/cat-llm"
|
10
10
|
__license__ = "GPL-3.0"
|
@@ -7,7 +7,7 @@ def explore_corpus(
|
|
7
7
|
specificity="broad",
|
8
8
|
cat_num=10,
|
9
9
|
divisions=5,
|
10
|
-
user_model="gpt-
|
10
|
+
user_model="gpt-5",
|
11
11
|
creativity=None,
|
12
12
|
filename="corpus_exploration.csv",
|
13
13
|
model_source="OpenAI"
|
@@ -108,7 +108,7 @@ def explore_common_categories(
|
|
108
108
|
top_n=10,
|
109
109
|
cat_num=10,
|
110
110
|
divisions=5,
|
111
|
-
user_model="gpt-
|
111
|
+
user_model="gpt-5",
|
112
112
|
creativity=None,
|
113
113
|
specificity="broad",
|
114
114
|
research_question=None,
|
@@ -223,12 +223,27 @@ Return the top {top_n} categories as a numbered list sorted from the most to lea
|
|
223
223
|
return top_categories_final
|
224
224
|
|
225
225
|
#multi-class text classification
|
226
|
+
# what this function does:
|
227
|
+
# does context prompting, giving the model a background on the task at hand and the user's survey question
|
228
|
+
# system prompting, overall context and purpose for the language model
|
229
|
+
# role prompting, assings a spacific identity to the model
|
230
|
+
# also enables few shot prompting, allowing the user to input a few examples
|
231
|
+
# provides POSITIVE INSTRUCTIONS reather than limitations/restrictions
|
232
|
+
# GOAL: enable step-back prompting
|
233
|
+
# GOAL 2: enable self-consistency
|
226
234
|
def multi_class(
|
227
235
|
survey_question,
|
228
236
|
survey_input,
|
229
237
|
categories,
|
230
238
|
api_key,
|
231
|
-
user_model="gpt-
|
239
|
+
user_model="gpt-5",
|
240
|
+
user_prompt = None,
|
241
|
+
example1 = None,
|
242
|
+
example2 = None,
|
243
|
+
example3 = None,
|
244
|
+
example4 = None,
|
245
|
+
example5 = None,
|
246
|
+
example6 = None,
|
232
247
|
creativity=None,
|
233
248
|
safety=False,
|
234
249
|
to_csv=False,
|
@@ -256,6 +271,11 @@ def multi_class(
|
|
256
271
|
|
257
272
|
link1 = []
|
258
273
|
extracted_jsons = []
|
274
|
+
#handling example inputs
|
275
|
+
examples = [example1, example2, example3, example4, example5, example6]
|
276
|
+
examples_text = "\n".join(
|
277
|
+
f"Example {i}: {ex}" for i, ex in enumerate(examples, 1) if ex is not None
|
278
|
+
)
|
259
279
|
|
260
280
|
for idx, response in enumerate(tqdm(survey_input, desc="Categorizing responses")):
|
261
281
|
reply = None
|
@@ -266,11 +286,13 @@ def multi_class(
|
|
266
286
|
extracted_jsons.append(default_json)
|
267
287
|
#print(f"Skipped NaN input.")
|
268
288
|
else:
|
289
|
+
|
269
290
|
prompt = f"""A respondent was asked: {survey_question}. \
|
270
|
-
Categorize this survey response "{response}" into the following categories that apply: \
|
271
|
-
{categories_str}
|
272
|
-
|
273
|
-
|
291
|
+
Categorize this survey response "{response}" into the following categories that apply: \
|
292
|
+
{categories_str}
|
293
|
+
{examples_text}
|
294
|
+
Provide your work in JSON format..."""
|
295
|
+
|
274
296
|
if model_source == ("openai"):
|
275
297
|
from openai import OpenAI
|
276
298
|
client = OpenAI(api_key=api_key)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|