cat-llm 0.0.65__py3-none-any.whl → 0.0.67__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.65
3
+ Version: 0.0.67
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -1,15 +1,15 @@
1
1
  catllm/CERAD_functions.py,sha256=q4HbP5e2Yu8NnZZ-2eX4sImyj6u3i8xWcq0pYU81iis,22676
2
- catllm/__about__.py,sha256=QeI7x2I4oYiFhztRrDnRvZOLW_kEShiCK7Y_hax8U8o,408
2
+ catllm/__about__.py,sha256=hIVt-fQydvImRffcCyiHlTFeOR5F2aEchTWXAsWTPc4,430
3
3
  catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
4
4
  catllm/build_web_research.py,sha256=880dfE2bEQb-FrXP-42JoLLtyc9ox_sBULDr38xiTiQ,22655
5
5
  catllm/image_functions.py,sha256=8_FftRU285x1HT-AgNkaobefQVD-5q7ZY_t7JFdL3Sg,36177
6
- catllm/text_functions.py,sha256=Jf51lNaFtcS2QGnNLkhM8rFVJSD4tN0Bm_VfELvb47g,18686
6
+ catllm/text_functions.py,sha256=_GMretLVBUs0ntL-wV6My0TRAvzvaPo_WU9ZqXlKxeM,19426
7
7
  catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
8
8
  catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
9
9
  catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
10
10
  catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
11
11
  catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
12
- cat_llm-0.0.65.dist-info/METADATA,sha256=77WCioobgfzMsP_o76XHbRncfNrXYayxFgZDrUVFv7k,22423
13
- cat_llm-0.0.65.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- cat_llm-0.0.65.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
15
- cat_llm-0.0.65.dist-info/RECORD,,
12
+ cat_llm-0.0.67.dist-info/METADATA,sha256=UR95eJdArTmJe6A2g2hH0Q9mZ9PkEQsJ2kVSg3KfQe4,22423
13
+ cat_llm-0.0.67.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ cat_llm-0.0.67.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
15
+ cat_llm-0.0.67.dist-info/RECORD,,
catllm/__about__.py CHANGED
@@ -1,10 +1,10 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.65"
4
+ __version__ = "0.0.67"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
8
- __description__ = "A tool for categorizing and exploring text data and images using LLMs and vision models"
8
+ __description__ = "An AI tool for categorizing and exploring text data, images, and building high quality datasets from the web."
9
9
  __url__ = "https://github.com/chrissoria/cat-llm"
10
10
  __license__ = "GPL-3.0"
catllm/text_functions.py CHANGED
@@ -7,7 +7,7 @@ def explore_corpus(
7
7
  specificity="broad",
8
8
  cat_num=10,
9
9
  divisions=5,
10
- user_model="gpt-4o-2024-11-20",
10
+ user_model="gpt-5",
11
11
  creativity=None,
12
12
  filename="corpus_exploration.csv",
13
13
  model_source="OpenAI"
@@ -108,7 +108,7 @@ def explore_common_categories(
108
108
  top_n=10,
109
109
  cat_num=10,
110
110
  divisions=5,
111
- user_model="gpt-4o",
111
+ user_model="gpt-5",
112
112
  creativity=None,
113
113
  specificity="broad",
114
114
  research_question=None,
@@ -223,12 +223,27 @@ Return the top {top_n} categories as a numbered list sorted from the most to lea
223
223
  return top_categories_final
224
224
 
225
225
  #multi-class text classification
226
+ # what this function does:
227
+ # does context prompting, giving the model a background on the task at hand and the user's survey question
228
+ # system prompting, overall context and purpose for the language model
229
+ # role prompting, assings a spacific identity to the model
230
+ # also enables few shot prompting, allowing the user to input a few examples
231
+ # provides POSITIVE INSTRUCTIONS reather than limitations/restrictions
232
+ # GOAL: enable step-back prompting
233
+ # GOAL 2: enable self-consistency
226
234
  def multi_class(
227
235
  survey_question,
228
236
  survey_input,
229
237
  categories,
230
238
  api_key,
231
- user_model="gpt-4o",
239
+ user_model="gpt-5",
240
+ user_prompt = None,
241
+ example1 = None,
242
+ example2 = None,
243
+ example3 = None,
244
+ example4 = None,
245
+ example5 = None,
246
+ example6 = None,
232
247
  creativity=None,
233
248
  safety=False,
234
249
  to_csv=False,
@@ -256,6 +271,11 @@ def multi_class(
256
271
 
257
272
  link1 = []
258
273
  extracted_jsons = []
274
+ #handling example inputs
275
+ examples = [example1, example2, example3, example4, example5, example6]
276
+ examples_text = "\n".join(
277
+ f"Example {i}: {ex}" for i, ex in enumerate(examples, 1) if ex is not None
278
+ )
259
279
 
260
280
  for idx, response in enumerate(tqdm(survey_input, desc="Categorizing responses")):
261
281
  reply = None
@@ -266,11 +286,13 @@ def multi_class(
266
286
  extracted_jsons.append(default_json)
267
287
  #print(f"Skipped NaN input.")
268
288
  else:
289
+
269
290
  prompt = f"""A respondent was asked: {survey_question}. \
270
- Categorize this survey response "{response}" into the following categories that apply: \
271
- {categories_str} \
272
- Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
273
- #print(prompt)
291
+ Categorize this survey response "{response}" into the following categories that apply: \
292
+ {categories_str}
293
+ {examples_text}
294
+ Provide your work in JSON format..."""
295
+
274
296
  if model_source == ("openai"):
275
297
  from openai import OpenAI
276
298
  client = OpenAI(api_key=api_key)