cat-llm 0.0.65__tar.gz → 0.0.66__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.65
3
+ Version: 0.0.66
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -1,10 +1,10 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.65"
4
+ __version__ = "0.0.66"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
8
- __description__ = "A tool for categorizing and exploring text data and images using LLMs and vision models"
8
+ __description__ = "An AI tool for categorizing and exploring text data, images, and building high quality datasets from the web."
9
9
  __url__ = "https://github.com/chrissoria/cat-llm"
10
10
  __license__ = "GPL-3.0"
@@ -223,12 +223,27 @@ Return the top {top_n} categories as a numbered list sorted from the most to lea
223
223
  return top_categories_final
224
224
 
225
225
  #multi-class text classification
226
- def multi_class(
226
+ # what this function does:
227
+ # does context prompting, giving the model a background on the task at hand and the user's survey question
228
+ # system prompting, overall context and purpose for the language model
229
+ # role promptingk, assings a spacific identity to the model
230
+ # also enables few shot prompting, allowing the user to input a few examples
231
+ # provides POSITIVE INSTRUCTIONS reather than limitations/restrictions
232
+ # GOAL: enable step-back prompting
233
+ # GOAL 2: enable self-consistency
234
+ def text_multi_class(
227
235
  survey_question,
228
236
  survey_input,
229
237
  categories,
230
238
  api_key,
231
239
  user_model="gpt-4o",
240
+ user_prompt = None,
241
+ example1 = None,
242
+ example2 = None,
243
+ example3 = None,
244
+ example4 = None,
245
+ example5 = None,
246
+ example6 = None,
232
247
  creativity=None,
233
248
  safety=False,
234
249
  to_csv=False,
@@ -256,6 +271,11 @@ def multi_class(
256
271
 
257
272
  link1 = []
258
273
  extracted_jsons = []
274
+ #handling example inputs
275
+ examples = [example1, example2, example3, example4, example5, example6]
276
+ examples_text = "\n".join(
277
+ f"Example {i}: {ex}" for i, ex in enumerate(examples, 1) if ex is not None
278
+ )
259
279
 
260
280
  for idx, response in enumerate(tqdm(survey_input, desc="Categorizing responses")):
261
281
  reply = None
@@ -266,11 +286,13 @@ def multi_class(
266
286
  extracted_jsons.append(default_json)
267
287
  #print(f"Skipped NaN input.")
268
288
  else:
289
+
269
290
  prompt = f"""A respondent was asked: {survey_question}. \
270
- Categorize this survey response "{response}" into the following categories that apply: \
271
- {categories_str} \
272
- Provide your work in JSON format where the number belonging to each category is the key and a 1 if the category is present and a 0 if it is not present as key values."""
273
- #print(prompt)
291
+ Categorize this survey response "{response}" into the following categories that apply: \
292
+ {categories_str}
293
+ {examples_text}
294
+ Provide your work in JSON format..."""
295
+
274
296
  if model_source == ("openai"):
275
297
  from openai import OpenAI
276
298
  client = OpenAI(api_key=api_key)
File without changes
File without changes
File without changes
File without changes