cat-llm 0.0.54__tar.gz → 0.0.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.54
3
+ Version: 0.0.55
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: Implementation :: PyPy
21
21
  Requires-Python: >=3.8
22
22
  Requires-Dist: openai
23
23
  Requires-Dist: pandas
24
+ Requires-Dist: random
24
25
  Requires-Dist: requests
25
26
  Requires-Dist: tqdm
26
27
  Description-Content-Type: text/markdown
@@ -28,7 +28,8 @@ dependencies = [
28
28
  "pandas",
29
29
  "tqdm",
30
30
  "requests",
31
- "openai"
31
+ "openai",
32
+ "random"
32
33
  ]
33
34
 
34
35
  [project.urls]
@@ -378,7 +378,7 @@ def cerad_drawn_score(
378
378
  image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
379
379
  else pd.Series(image_files)
380
380
  ),
381
- 'link1': pd.Series(link1).reset_index(drop=True),
381
+ 'model_response': pd.Series(link1).reset_index(drop=True),
382
382
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
383
383
  })
384
384
  categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.54"
4
+ __version__ = "0.0.55"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
@@ -225,7 +225,7 @@ def image_multi_class(
225
225
  # Save progress so far
226
226
  temp_df = pd.DataFrame({
227
227
  'image_input': image_files[:i+1],
228
- 'link1': link1,
228
+ 'model_response': link1,
229
229
  'json': extracted_jsons
230
230
  })
231
231
  # Normalize processed jsons so far
@@ -522,7 +522,7 @@ def image_score_drawing(
522
522
  # Save progress so far
523
523
  temp_df = pd.DataFrame({
524
524
  'image_input': image_files[:i+1],
525
- 'link1': link1,
525
+ 'model_response': link1,
526
526
  'json': extracted_jsons
527
527
  })
528
528
  # Normalize processed jsons so far
@@ -844,7 +844,7 @@ def image_features(
844
844
  image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
845
845
  else pd.Series(image_files)
846
846
  ),
847
- 'link1': pd.Series(link1).reset_index(drop=True),
847
+ 'model_response': pd.Series(link1).reset_index(drop=True),
848
848
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
849
849
  })
850
850
  categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
@@ -227,6 +227,7 @@ def multi_class(
227
227
  user_model="gpt-4o",
228
228
  creativity=0,
229
229
  safety=False,
230
+ to_csv=False,
230
231
  filename="categorized_data.csv",
231
232
  save_directory=None,
232
233
  model_source="OpenAI"
@@ -390,7 +391,7 @@ Provide your work in JSON format where the number belonging to each category is
390
391
  normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
391
392
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
392
393
  temp_df = pd.concat([temp_df, normalized_data], axis=1)
393
- # Save to CSV
394
+ # save to CSV
394
395
  if save_directory is None:
395
396
  save_directory = os.getcwd()
396
397
  temp_df.to_csv(os.path.join(save_directory, filename), index=False)
@@ -405,13 +406,18 @@ Provide your work in JSON format where the number belonging to each category is
405
406
  normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
406
407
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
407
408
  categorized_data = pd.DataFrame({
408
- 'image_input': (
409
+ 'survey_input': (
409
410
  survey_input.reset_index(drop=True) if isinstance(survey_input, (pd.DataFrame, pd.Series))
410
411
  else pd.Series(survey_input)
411
412
  ),
412
- 'link1': pd.Series(link1).reset_index(drop=True),
413
+ 'model_response': pd.Series(link1).reset_index(drop=True),
413
414
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
414
415
  })
415
416
  categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
417
+
418
+ if to_csv:
419
+ if save_directory is None:
420
+ save_directory = os.getcwd()
421
+ categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
416
422
 
417
423
  return categorized_data
File without changes
File without changes
File without changes