cat-llm 0.0.53__py3-none-any.whl → 0.0.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.53
3
+ Version: 0.0.55
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
7
7
  Project-URL: Source, https://github.com/chrissoria/cat-llm
8
- Author-email: Christopher Soria <chrissoria@berkeley.edu>
8
+ Author-email: Chris Soria <chrissoria@berkeley.edu>
9
9
  License-Expression: MIT
10
10
  License-File: LICENSE
11
11
  Keywords: categorizer,image classification,llm,structured output,survey data,text classification
@@ -19,7 +19,10 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: Implementation :: CPython
20
20
  Classifier: Programming Language :: Python :: Implementation :: PyPy
21
21
  Requires-Python: >=3.8
22
+ Requires-Dist: openai
22
23
  Requires-Dist: pandas
24
+ Requires-Dist: random
25
+ Requires-Dist: requests
23
26
  Requires-Dist: tqdm
24
27
  Description-Content-Type: text/markdown
25
28
 
@@ -1,15 +1,15 @@
1
- catllm/CERAD_functions.py,sha256=NNEu_Q10tClV7vRIVEgSQY8ujlXDbpWDzo1AbqlN7nQ,22462
2
- catllm/__about__.py,sha256=9ibCFe0aysD41qIPAN3LS-HE5Z0iMmnMJiYSMUPghak,404
1
+ catllm/CERAD_functions.py,sha256=ZCKyCiv-2eUPzJ7Yhrz4Y0OJK4iEyWMnOUI7mFDsoEI,22471
2
+ catllm/__about__.py,sha256=VaZlPOW_B39qUtTpqKKsmCDpMCtV8MCv3LFokpKtjVI,404
3
3
  catllm/__init__.py,sha256=sf02zp7N0NW0mAQi7eQ4gliWR1EwoqvXkHN2HwwjcTE,372
4
4
  catllm/build_web_research.py,sha256=CYGhxnonJLBw80ATEBkpRjOKJgCYntHTgx4s4Pb8g88,6833
5
- catllm/image_functions.py,sha256=Gz-djnXVaLT8GOR0sc8aPjjuC9L_gIT2AjUMjsjjmi0,35492
6
- catllm/text_functions.py,sha256=YK9BcpTbEo5FhkA5aiNfK8c72kyiW6AYzuILYNqGjqc,16603
5
+ catllm/image_functions.py,sha256=8dUpwHwVe4Vf06wjFNgLnh54q5upo4E-P87-TKaSECE,35519
6
+ catllm/text_functions.py,sha256=iAfd6roKxxSrrKQ4VvEc5y_ZWyIgW9fVzQOaDITyFDU,18116
7
7
  catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
8
8
  catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
9
9
  catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
10
10
  catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
11
11
  catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
12
- cat_llm-0.0.53.dist-info/METADATA,sha256=p9ov-gGpzkVF1APkaqozr7G4BxEX-2c17P5dWsLPdkE,21459
13
- cat_llm-0.0.53.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- cat_llm-0.0.53.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
15
- cat_llm-0.0.53.dist-info/RECORD,,
12
+ cat_llm-0.0.55.dist-info/METADATA,sha256=rOd1FlzPT3uFmUb4vdyaBPrVaI-zyt3ZRcBsYLGY3f8,21521
13
+ cat_llm-0.0.55.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ cat_llm-0.0.55.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
15
+ cat_llm-0.0.55.dist-info/RECORD,,
catllm/CERAD_functions.py CHANGED
@@ -378,7 +378,7 @@ def cerad_drawn_score(
378
378
  image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
379
379
  else pd.Series(image_files)
380
380
  ),
381
- 'link1': pd.Series(link1).reset_index(drop=True),
381
+ 'model_response': pd.Series(link1).reset_index(drop=True),
382
382
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
383
383
  })
384
384
  categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
catllm/__about__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.53"
4
+ __version__ = "0.0.55"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
catllm/image_functions.py CHANGED
@@ -225,7 +225,7 @@ def image_multi_class(
225
225
  # Save progress so far
226
226
  temp_df = pd.DataFrame({
227
227
  'image_input': image_files[:i+1],
228
- 'link1': link1,
228
+ 'model_response': link1,
229
229
  'json': extracted_jsons
230
230
  })
231
231
  # Normalize processed jsons so far
@@ -522,7 +522,7 @@ def image_score_drawing(
522
522
  # Save progress so far
523
523
  temp_df = pd.DataFrame({
524
524
  'image_input': image_files[:i+1],
525
- 'link1': link1,
525
+ 'model_response': link1,
526
526
  'json': extracted_jsons
527
527
  })
528
528
  # Normalize processed jsons so far
@@ -844,7 +844,7 @@ def image_features(
844
844
  image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
845
845
  else pd.Series(image_files)
846
846
  ),
847
- 'link1': pd.Series(link1).reset_index(drop=True),
847
+ 'model_response': pd.Series(link1).reset_index(drop=True),
848
848
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
849
849
  })
850
850
  categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
catllm/text_functions.py CHANGED
@@ -227,6 +227,7 @@ def multi_class(
227
227
  user_model="gpt-4o",
228
228
  creativity=0,
229
229
  safety=False,
230
+ to_csv=False,
230
231
  filename="categorized_data.csv",
231
232
  save_directory=None,
232
233
  model_source="OpenAI"
@@ -307,6 +308,37 @@ Provide your work in JSON format where the number belonging to each category is
307
308
  except Exception as e:
308
309
  print(f"An error occurred: {e}")
309
310
  link1.append(f"Error processing input: {e}")
311
+
312
+ elif model_source == "Google":
313
+ import requests
314
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
315
+ try:
316
+ headers = {
317
+ "x-goog-api-key": api_key,
318
+ "Content-Type": "application/json"
319
+ }
320
+
321
+ payload = {
322
+ "contents": [{
323
+ "parts": [{"text": prompt}]
324
+ }]
325
+ }
326
+
327
+ response = requests.post(url, headers=headers, json=payload)
328
+ response.raise_for_status() # Raise exception for HTTP errors
329
+ result = response.json()
330
+
331
+ if "candidates" in result and result["candidates"]:
332
+ reply = result["candidates"][0]["content"]["parts"][0]["text"]
333
+ else:
334
+ reply = "No response generated"
335
+
336
+ link1.append(reply)
337
+ print(reply)
338
+ except Exception as e:
339
+ print(f"An error occurred: {e}")
340
+ link1.append(f"Error processing input: {e}")
341
+
310
342
  elif model_source == "Mistral":
311
343
  from mistralai import Mistral
312
344
  client = Mistral(api_key=api_key)
@@ -359,7 +391,7 @@ Provide your work in JSON format where the number belonging to each category is
359
391
  normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
360
392
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
361
393
  temp_df = pd.concat([temp_df, normalized_data], axis=1)
362
- # Save to CSV
394
+ # save to CSV
363
395
  if save_directory is None:
364
396
  save_directory = os.getcwd()
365
397
  temp_df.to_csv(os.path.join(save_directory, filename), index=False)
@@ -374,13 +406,18 @@ Provide your work in JSON format where the number belonging to each category is
374
406
  normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
375
407
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
376
408
  categorized_data = pd.DataFrame({
377
- 'image_input': (
409
+ 'survey_input': (
378
410
  survey_input.reset_index(drop=True) if isinstance(survey_input, (pd.DataFrame, pd.Series))
379
411
  else pd.Series(survey_input)
380
412
  ),
381
- 'link1': pd.Series(link1).reset_index(drop=True),
413
+ 'model_response': pd.Series(link1).reset_index(drop=True),
382
414
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
383
415
  })
384
416
  categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
417
+
418
+ if to_csv:
419
+ if save_directory is None:
420
+ save_directory = os.getcwd()
421
+ categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
385
422
 
386
423
  return categorized_data