cat-llm 0.0.42__py3-none-any.whl → 0.0.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cat-llm
3
- Version: 0.0.42
3
+ Version: 0.0.50
4
4
  Summary: A tool for categorizing text data and images using LLMs and vision models
5
5
  Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
6
6
  Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
@@ -0,0 +1,15 @@
1
+ catllm/CERAD_functions.py,sha256=NNEu_Q10tClV7vRIVEgSQY8ujlXDbpWDzo1AbqlN7nQ,22462
2
+ catllm/__about__.py,sha256=IeIRRWVEawr5HDscDa0V1yID-dwAEyHrEefI_Z624JI,404
3
+ catllm/__init__.py,sha256=BpAG8nPhM3ZQRd0WqkubI_36-VCOs4eCYtGVgzz48Bs,337
4
+ catllm/build_web_research.py,sha256=gpYizrEe0ENUTZ8iyjzwvQj5kTXI15K_3rtt3yvwvUo,6927
5
+ catllm/image_functions.py,sha256=Gz-djnXVaLT8GOR0sc8aPjjuC9L_gIT2AjUMjsjjmi0,35492
6
+ catllm/text_functions.py,sha256=YK9BcpTbEo5FhkA5aiNfK8c72kyiW6AYzuILYNqGjqc,16603
7
+ catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
8
+ catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
9
+ catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
10
+ catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
11
+ catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
12
+ cat_llm-0.0.50.dist-info/METADATA,sha256=vi2-c-FsagiVxUROXxxBPv4knnig4OwVCx6VEOcFGb4,17514
13
+ cat_llm-0.0.50.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ cat_llm-0.0.50.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
15
+ cat_llm-0.0.50.dist-info/RECORD,,
catllm/CERAD_functions.py CHANGED
@@ -21,6 +21,7 @@ Areas for improvement:
21
21
  10. Test variety: expanding or adding functions to handle score more tests relevant for cogntive assesment, such as the MMSE.
22
22
  11. Error handling: improving error handling to better manage unexpected inputs or model failures.
23
23
  """
24
+
24
25
  def cerad_drawn_score(
25
26
  shape,
26
27
  image_input,
@@ -265,8 +266,11 @@ def cerad_drawn_score(
265
266
  reply = response_obj.choices[0].message.content
266
267
  link1.append(reply)
267
268
  except Exception as e:
268
- print("An error occurred: {e}")
269
- link1.append("Error processing input: {e}")
269
+ if "model" in str(e).lower():
270
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
271
+ else:
272
+ print("An error occurred: {e}")
273
+ link1.append("Error processing input: {e}")
270
274
 
271
275
  elif model_source == "Anthropic" and valid_image:
272
276
  import anthropic
@@ -281,8 +285,11 @@ def cerad_drawn_score(
281
285
  reply = message.content[0].text # Anthropic returns content as list
282
286
  link1.append(reply)
283
287
  except Exception as e:
284
- print("An error occurred: {e}")
285
- link1.append("Error processing input: {e}")
288
+ if "model" in str(e).lower():
289
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
290
+ else:
291
+ print("An error occurred: {e}")
292
+ link1.append("Error processing input: {e}")
286
293
 
287
294
  elif model_source == "Mistral" and valid_image:
288
295
  from mistralai import Mistral
@@ -299,9 +306,11 @@ def cerad_drawn_score(
299
306
  reply = response.choices[0].message.content
300
307
  link1.append(reply)
301
308
  except Exception as e:
302
- reply = None
303
- print("An error occurred: {e}")
304
- link1.append("Error processing input: {e}")
309
+ if "model" in str(e).lower():
310
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
311
+ else:
312
+ print("An error occurred: {e}")
313
+ link1.append("Error processing input: {e}")
305
314
  #if no valid image path is provided
306
315
  elif valid_image == False:
307
316
  reply = "invalid image path"
@@ -365,7 +374,10 @@ def cerad_drawn_score(
365
374
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
366
375
 
367
376
  categorized_data = pd.DataFrame({
368
- 'image_input': image_files,
377
+ 'image_input': (
378
+ image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
379
+ else pd.Series(image_files)
380
+ ),
369
381
  'link1': pd.Series(link1).reset_index(drop=True),
370
382
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
371
383
  })
catllm/__about__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.42"
4
+ __version__ = "0.0.50"
5
5
  __author__ = "Chris Soria"
6
6
  __email__ = "chrissoria@berkeley.edu"
7
7
  __title__ = "cat-llm"
@@ -0,0 +1,169 @@
1
+ #build dataset classification
2
+ def build_web_research_dataset(
3
+ search_question,
4
+ search_input,
5
+ api_key,
6
+ answer_format = "concise",
7
+ additional_instructions = "",
8
+ categories = ['Answer','URL'],
9
+ user_model="claude-3-7-sonnet-20250219",
10
+ creativity=0,
11
+ safety=False,
12
+ filename="categorized_data.csv",
13
+ save_directory=None,
14
+ model_source="Anthropic",
15
+ time_delay=5
16
+ ):
17
+ import os
18
+ import json
19
+ import pandas as pd
20
+ import regex
21
+ from tqdm import tqdm
22
+ import time
23
+
24
+ categories_str = "\n".join(f"{i + 1}. {cat}" for i, cat in enumerate(categories))
25
+ print(categories_str)
26
+ cat_num = len(categories)
27
+ category_dict = {str(i+1): "0" for i in range(cat_num)}
28
+ example_JSON = json.dumps(category_dict, indent=4)
29
+
30
+ # ensure number of categories is what user wants
31
+ #print("\nThe information to be extracted:")
32
+ #for i, cat in enumerate(categories, 1):
33
+ #print(f"{i}. {cat}")
34
+
35
+ link1 = []
36
+ extracted_jsons = []
37
+
38
+ for idx, item in enumerate(tqdm(search_input, desc="Building dataset")):
39
+ if idx == 0: # delay the first item just to be safe
40
+ time.sleep(time_delay)
41
+ reply = None
42
+
43
+ if pd.isna(item):
44
+ link1.append("Skipped NaN input")
45
+ default_json = example_JSON
46
+ extracted_jsons.append(default_json)
47
+ #print(f"Skipped NaN input.")
48
+ else:
49
+ prompt = f"""<role>You are a research assistant specializing in finding current, factual information.</role>
50
+
51
+ <task>Find information about {item}'s {search_question}</task>
52
+
53
+ <rules>
54
+ - Search for the most current and authoritative information available
55
+ - Provide your answer as {answer_format}
56
+ - Prioritize official sources when possible
57
+ - If information is not found, state "Information not found"
58
+ - Include exactly one source URL where you found the information
59
+ - Do not include any explanatory text or commentary beyond the JSON
60
+ {additional_instructions}
61
+ </rules>
62
+
63
+ <format>
64
+ Return your response as valid JSON with this exact structure:
65
+ {{
66
+ "answer": "Your factual answer or 'Information not found'",
67
+ "url": "Source URL or 'No source available'"
68
+ }}
69
+ </format>"""
70
+ #print(prompt)
71
+ if model_source == "Anthropic":
72
+ import anthropic
73
+ client = anthropic.Anthropic(api_key=api_key)
74
+ try:
75
+ message = client.messages.create(
76
+ model=user_model,
77
+ max_tokens=1024,
78
+ temperature=creativity,
79
+ messages=[{"role": "user", "content": prompt}],
80
+ tools=[{
81
+ "type": "web_search_20250305",
82
+ "name": "web_search"
83
+ }]
84
+ )
85
+ reply = " ".join(
86
+ block.text
87
+ for block in message.content
88
+ if getattr(block, "type", "") == "text"
89
+ ).strip()
90
+ link1.append(reply)
91
+ time.sleep(time_delay)
92
+ print(reply)
93
+
94
+ except Exception as e:
95
+ print(f"An error occurred: {e}")
96
+ link1.append(f"Error processing input: {e}")
97
+ time.sleep(time_delay)
98
+ else:
99
+ raise ValueError("Unknown source! Currently this function only supports 'Anthropic' as model_source.")
100
+ # in situation that no JSON is found
101
+ if reply is not None:
102
+ extracted_json = regex.findall(r'\{(?:[^{}]|(?R))*\}', reply, regex.DOTALL)
103
+ if extracted_json:
104
+ raw_json = extracted_json[0].strip() # Only strip leading/trailing whitespace
105
+ try:
106
+ # Parse to validate JSON structure
107
+ parsed_obj = json.loads(raw_json)
108
+ # Re-serialize for consistent formatting (optional)
109
+ cleaned_json = json.dumps(parsed_obj)
110
+ extracted_jsons.append(cleaned_json)
111
+ except json.JSONDecodeError as e:
112
+ print(f"JSON parsing error: {e}")
113
+ # Fallback to raw extraction if parsing fails
114
+ extracted_jsons.append(raw_json)
115
+ else:
116
+ # Use consistent schema for errors
117
+ error_message = json.dumps({"answer": "e", "url": "e"})
118
+ extracted_jsons.append(error_message)
119
+ print(error_message)
120
+ else:
121
+ # Handle None reply case
122
+ error_message = json.dumps({"answer": "e", "url": "e"})
123
+ extracted_jsons.append(error_message)
124
+ #print(error_message)
125
+
126
+ # --- Safety Save ---
127
+ if safety:
128
+ # Save progress so far
129
+ temp_df = pd.DataFrame({
130
+ 'survey_response': search_input[:idx+1],
131
+ 'link1': link1,
132
+ 'json': extracted_jsons
133
+ })
134
+ # Normalize processed jsons so far
135
+ normalized_data_list = []
136
+ for json_str in extracted_jsons:
137
+ try:
138
+ parsed_obj = json.loads(json_str)
139
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
140
+ except json.JSONDecodeError:
141
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
142
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
143
+ temp_df = pd.concat([temp_df, normalized_data], axis=1)
144
+ # Save to CSV
145
+ if save_directory is None:
146
+ save_directory = os.getcwd()
147
+ temp_df.to_csv(os.path.join(save_directory, filename), index=False)
148
+
149
+ # --- Final DataFrame ---
150
+ normalized_data_list = []
151
+ for json_str in extracted_jsons:
152
+ try:
153
+ parsed_obj = json.loads(json_str)
154
+ normalized_data_list.append(pd.json_normalize(parsed_obj))
155
+ except json.JSONDecodeError:
156
+ normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
157
+ normalized_data = pd.concat(normalized_data_list, ignore_index=True)
158
+
159
+ categorized_data = pd.DataFrame({
160
+ 'survey_response': (
161
+ search_input.reset_index(drop=True) if isinstance(search_input, (pd.DataFrame, pd.Series))
162
+ else pd.Series(search_input)
163
+ ),
164
+ 'link1': pd.Series(link1).reset_index(drop=True),
165
+ 'json': pd.Series(extracted_jsons).reset_index(drop=True)
166
+ })
167
+ categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
168
+
169
+ return categorized_data
catllm/image_functions.py CHANGED
@@ -148,8 +148,11 @@ def image_multi_class(
148
148
  reply = response_obj.choices[0].message.content
149
149
  link1.append(reply)
150
150
  except Exception as e:
151
- print(f"An error occurred: {e}")
152
- link1.append(f"Error processing input: {e}")
151
+ if "model" in str(e).lower():
152
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
153
+ else:
154
+ print("An error occurred: {e}")
155
+ link1.append("Error processing input: {e}")
153
156
 
154
157
  elif model_source == "Anthropic":
155
158
  import anthropic
@@ -165,8 +168,11 @@ def image_multi_class(
165
168
  reply = message.content[0].text
166
169
  link1.append(reply)
167
170
  except Exception as e:
168
- print(f"An error occurred: {e}")
169
- link1.append(f"Error processing input: {e}")
171
+ if "model" in str(e).lower():
172
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
173
+ else:
174
+ print("An error occurred: {e}")
175
+ link1.append("Error processing input: {e}")
170
176
 
171
177
  elif model_source == "Mistral":
172
178
  from mistralai import Mistral
@@ -182,8 +188,11 @@ def image_multi_class(
182
188
  reply = response.choices[0].message.content
183
189
  link1.append(reply)
184
190
  except Exception as e:
185
- print(f"An error occurred: {e}")
186
- link1.append(f"Error processing input: {e}")
191
+ if "model" in str(e).lower():
192
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
193
+ else:
194
+ print("An error occurred: {e}")
195
+ link1.append("Error processing input: {e}")
187
196
  #if no valid image path is provided
188
197
  elif valid_image == False:
189
198
  reply = "invalid image path"
@@ -243,9 +252,11 @@ def image_multi_class(
243
252
  except json.JSONDecodeError:
244
253
  normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
245
254
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
246
-
247
255
  categorized_data = pd.DataFrame({
248
- 'image_input': image_files,
256
+ 'image_input': (
257
+ image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
258
+ else pd.Series(image_files)
259
+ ),
249
260
  'link1': pd.Series(link1).reset_index(drop=True),
250
261
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
251
262
  })
@@ -436,8 +447,11 @@ def image_score_drawing(
436
447
  reply = response_obj.choices[0].message.content
437
448
  link1.append(reply)
438
449
  except Exception as e:
439
- print(f"An error occurred: {e}")
440
- link1.append(f"Error processing input: {e}")
450
+ if "model" in str(e).lower():
451
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
452
+ else:
453
+ print("An error occurred: {e}")
454
+ link1.append("Error processing input: {e}")
441
455
 
442
456
  elif model_source == "Anthropic":
443
457
  import anthropic
@@ -452,8 +466,11 @@ def image_score_drawing(
452
466
  reply = message.content[0].text # Anthropic returns content as list
453
467
  link1.append(reply)
454
468
  except Exception as e:
455
- print(f"An error occurred: {e}")
456
- link1.append(f"Error processing input: {e}")
469
+ if "model" in str(e).lower():
470
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
471
+ else:
472
+ print("An error occurred: {e}")
473
+ link1.append("Error processing input: {e}")
457
474
 
458
475
  elif model_source == "Mistral":
459
476
  from mistralai import Mistral
@@ -469,8 +486,11 @@ def image_score_drawing(
469
486
  reply = response.choices[0].message.content
470
487
  link1.append(reply)
471
488
  except Exception as e:
472
- print(f"An error occurred: {e}")
473
- link1.append(f"Error processing input: {e}")
489
+ if "model" in str(e).lower():
490
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
491
+ else:
492
+ print("An error occurred: {e}")
493
+ link1.append("Error processing input: {e}")
474
494
  #if no valid image path is provided
475
495
  elif valid_image == False:
476
496
  reply = "invalid image path"
@@ -531,7 +551,10 @@ def image_score_drawing(
531
551
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
532
552
 
533
553
  categorized_data = pd.DataFrame({
534
- 'image_input': image_files,
554
+ 'image_input': (
555
+ image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
556
+ else pd.Series(image_files)
557
+ ),
535
558
  'link1': pd.Series(link1).reset_index(drop=True),
536
559
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
537
560
  })
@@ -567,10 +590,6 @@ def image_features(
567
590
  import base64
568
591
  from pathlib import Path
569
592
 
570
- if save_directory is not None and not os.path.isdir(save_directory):
571
- # Directory doesn't exist - raise an exception to halt execution
572
- raise FileNotFoundError(f"Directory {save_directory} doesn't exist")
573
-
574
593
  image_extensions = [
575
594
  '*.png', '*.jpg', '*.jpeg',
576
595
  '*.gif', '*.webp', '*.svg', '*.svgz', '*.avif', '*.apng',
@@ -595,26 +614,35 @@ def image_features(
595
614
  cat_num = len(features_to_extract)
596
615
  category_dict = {str(i+1): "0" for i in range(cat_num)}
597
616
  example_JSON = json.dumps(category_dict, indent=4)
598
-
599
- # ensure number of categories is what user wants
600
- print("\nThe image features to be extracted are:")
601
- for i, cat in enumerate(features_to_extract, 1):
602
- print(f"{i}. {cat}")
603
617
 
604
618
  link1 = []
605
619
  extracted_jsons = []
606
620
 
607
- for i, img_path in enumerate(
608
- tqdm(image_files, desc="Categorising images"), start=0):
621
+ for i, img_path in enumerate(tqdm(image_files, desc="Scoring images"), start=0):
622
+ # Check validity first
609
623
  if img_path is None or not os.path.exists(img_path):
610
624
  link1.append("Skipped NaN input or invalid path")
611
625
  extracted_jsons.append("""{"no_valid_image": 1}""")
612
626
  continue # Skip the rest of the loop iteration
613
- # encode this specific image once
614
- with open(img_path, "rb") as f:
615
- encoded = base64.b64encode(f.read()).decode("utf-8")
616
- ext = Path(img_path).suffix.lstrip(".").lower()
617
- encoded_image = f"data:image/{ext};base64,{encoded}"
627
+
628
+ # Only open the file if path is valid
629
+ if os.path.isdir(img_path):
630
+ encoded = "Not a Valid Image, contains file path"
631
+ else:
632
+ try:
633
+ with open(img_path, "rb") as f:
634
+ encoded = base64.b64encode(f.read()).decode("utf-8")
635
+ except Exception as e:
636
+ encoded = f"Error: {str(e)}"
637
+ # Handle extension safely
638
+ if encoded.startswith("Error:") or encoded == "Not a Valid Image, contains file path":
639
+ encoded_image = encoded
640
+ valid_image = False
641
+
642
+ else:
643
+ ext = Path(img_path).suffix.lstrip(".").lower()
644
+ encoded_image = f"data:image/{ext};base64,{encoded}"
645
+ valid_image = True
618
646
 
619
647
  if model_source == "OpenAI" or model_source == "Mistral":
620
648
  prompt = [
@@ -692,8 +720,11 @@ def image_features(
692
720
  reply = response_obj.choices[0].message.content
693
721
  link1.append(reply)
694
722
  except Exception as e:
695
- print(f"An error occurred: {e}")
696
- link1.append(f"Error processing input: {e}")
723
+ if "model" in str(e).lower():
724
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
725
+ else:
726
+ print("An error occurred: {e}")
727
+ link1.append("Error processing input: {e}")
697
728
 
698
729
  elif model_source == "Perplexity":
699
730
  from openai import OpenAI
@@ -707,8 +738,12 @@ def image_features(
707
738
  reply = response_obj.choices[0].message.content
708
739
  link1.append(reply)
709
740
  except Exception as e:
710
- print(f"An error occurred: {e}")
711
- link1.append(f"Error processing input: {e}")
741
+ if "model" in str(e).lower():
742
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
743
+ else:
744
+ print("An error occurred: {e}")
745
+ link1.append("Error processing input: {e}")
746
+
712
747
  elif model_source == "Anthropic":
713
748
  import anthropic
714
749
  client = anthropic.Anthropic(api_key=api_key)
@@ -722,8 +757,12 @@ def image_features(
722
757
  reply = message.content[0].text # Anthropic returns content as list
723
758
  link1.append(reply)
724
759
  except Exception as e:
725
- print(f"An error occurred: {e}")
726
- link1.append(f"Error processing input: {e}")
760
+ if "model" in str(e).lower():
761
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
762
+ else:
763
+ print("An error occurred: {e}")
764
+ link1.append("Error processing input: {e}")
765
+
727
766
  elif model_source == "Mistral":
728
767
  from mistralai import Mistral
729
768
  client = Mistral(api_key=api_key)
@@ -738,8 +777,12 @@ def image_features(
738
777
  reply = response.choices[0].message.content
739
778
  link1.append(reply)
740
779
  except Exception as e:
741
- print(f"An error occurred: {e}")
742
- link1.append(f"Error processing input: {e}")
780
+ if "model" in str(e).lower():
781
+ raise ValueError(f"Invalid OpenAI model '{user_model}': {e}")
782
+ else:
783
+ print("An error occurred: {e}")
784
+ link1.append("Error processing input: {e}")
785
+
743
786
  elif valid_image == False:
744
787
  print("Skipped NaN input or invalid path")
745
788
  reply = None
@@ -797,7 +840,10 @@ def image_features(
797
840
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
798
841
 
799
842
  categorized_data = pd.DataFrame({
800
- 'image_input': image_files,
843
+ 'image_input': (
844
+ image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
845
+ else pd.Series(image_files)
846
+ ),
801
847
  'link1': pd.Series(link1).reset_index(drop=True),
802
848
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
803
849
  })
catllm/text_functions.py CHANGED
@@ -373,20 +373,14 @@ Provide your work in JSON format where the number belonging to each category is
373
373
  except json.JSONDecodeError:
374
374
  normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
375
375
  normalized_data = pd.concat(normalized_data_list, ignore_index=True)
376
-
377
376
  categorized_data = pd.DataFrame({
378
- 'survey_response': survey_input.reset_index(drop=True),
377
+ 'image_input': (
378
+ survey_input.reset_index(drop=True) if isinstance(survey_input, (pd.DataFrame, pd.Series))
379
+ else pd.Series(survey_input)
380
+ ),
379
381
  'link1': pd.Series(link1).reset_index(drop=True),
380
382
  'json': pd.Series(extracted_jsons).reset_index(drop=True)
381
383
  })
382
384
  categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
383
385
 
384
- if columns != "numbered": #if user wants text columns
385
- categorized_data.columns = list(categorized_data.columns[:3]) + categories[:len(categorized_data.columns) - 3]
386
-
387
- if to_csv:
388
- if save_directory is None:
389
- save_directory = os.getcwd()
390
- categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
391
-
392
386
  return categorized_data
@@ -1,14 +0,0 @@
1
- catllm/CERAD_functions.py,sha256=aV_2AU6nF7HNacu60-7lEiIzSvRpHtbPD9J2eTpgqlg,21874
2
- catllm/__about__.py,sha256=8nuXcj67gpeJhC7bBMhiy6wJtgXwYs11t-WyevYxitA,404
3
- catllm/__init__.py,sha256=BpAG8nPhM3ZQRd0WqkubI_36-VCOs4eCYtGVgzz48Bs,337
4
- catllm/image_functions.py,sha256=86EDccwnRVze7uhc-6p7aBxvvh8ozA7FEMtR6ywOTjY,33401
5
- catllm/text_functions.py,sha256=K6oetWYk25PwsllWSZP4cFrz7kyxJg0plPRvpmQkCsU,16846
6
- catllm/images/circle.png,sha256=JWujAWAh08-TajAoEr_TAeFNLlfbryOLw6cgIBREBuQ,86202
7
- catllm/images/cube.png,sha256=nFec3e5bmRe4zrBCJ8QK-HcJLrG7u7dYdKhmdMfacfE,77275
8
- catllm/images/diamond.png,sha256=rJDZKtsnBGRO8FPA0iHuA8FvHFGi9PkI_DWSFdw6iv0,99568
9
- catllm/images/overlapping_pentagons.png,sha256=VO5plI6eoVRnjfqinn1nNzsCP2WQhuQy71V0EASouW4,71208
10
- catllm/images/rectangles.png,sha256=2XM16HO9EYWj2yHgN4bPXaCwPfl7iYQy0tQUGaJX9xg,40692
11
- cat_llm-0.0.42.dist-info/METADATA,sha256=1kiY9RIAXt_tdNB1Zg-5YPG3wGbF7y-Qwsi4ZKxbmPw,17514
12
- cat_llm-0.0.42.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
- cat_llm-0.0.42.dist-info/licenses/LICENSE,sha256=Vje2sS5WV4TnIwY5uQHrF4qnBAM3YOk1pGpdH0ot-2o,34969
14
- cat_llm-0.0.42.dist-info/RECORD,,