cat-llm 0.0.53__tar.gz → 0.0.55__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cat_llm-0.0.53 → cat_llm-0.0.55}/PKG-INFO +5 -2
- {cat_llm-0.0.53 → cat_llm-0.0.55}/pyproject.toml +5 -2
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/CERAD_functions.py +1 -1
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/__about__.py +1 -1
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/image_functions.py +3 -3
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/text_functions.py +40 -3
- {cat_llm-0.0.53 → cat_llm-0.0.55}/.gitignore +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/LICENSE +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/README.md +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/__init__.py +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/build_web_research.py +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/images/circle.png +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/images/cube.png +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/images/diamond.png +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/images/overlapping_pentagons.png +0 -0
- {cat_llm-0.0.53 → cat_llm-0.0.55}/src/catllm/images/rectangles.png +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cat-llm
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.55
|
|
4
4
|
Summary: A tool for categorizing text data and images using LLMs and vision models
|
|
5
5
|
Project-URL: Documentation, https://github.com/chrissoria/cat-llm#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/chrissoria/cat-llm/issues
|
|
7
7
|
Project-URL: Source, https://github.com/chrissoria/cat-llm
|
|
8
|
-
Author-email:
|
|
8
|
+
Author-email: Chris Soria <chrissoria@berkeley.edu>
|
|
9
9
|
License-Expression: MIT
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Keywords: categorizer,image classification,llm,structured output,survey data,text classification
|
|
@@ -19,7 +19,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
20
20
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
21
21
|
Requires-Python: >=3.8
|
|
22
|
+
Requires-Dist: openai
|
|
22
23
|
Requires-Dist: pandas
|
|
24
|
+
Requires-Dist: random
|
|
25
|
+
Requires-Dist: requests
|
|
23
26
|
Requires-Dist: tqdm
|
|
24
27
|
Description-Content-Type: text/markdown
|
|
25
28
|
|
|
@@ -11,7 +11,7 @@ requires-python = ">=3.8"
|
|
|
11
11
|
license = "MIT"
|
|
12
12
|
keywords = ["llm","categorizer","survey data", "image classification", "structured output", "text classification"]
|
|
13
13
|
authors = [
|
|
14
|
-
{ name = "
|
|
14
|
+
{ name = "Chris Soria", email = "chrissoria@berkeley.edu" },
|
|
15
15
|
]
|
|
16
16
|
classifiers = [
|
|
17
17
|
"Development Status :: 4 - Beta",
|
|
@@ -26,7 +26,10 @@ classifiers = [
|
|
|
26
26
|
]
|
|
27
27
|
dependencies = [
|
|
28
28
|
"pandas",
|
|
29
|
-
"tqdm"
|
|
29
|
+
"tqdm",
|
|
30
|
+
"requests",
|
|
31
|
+
"openai",
|
|
32
|
+
"random"
|
|
30
33
|
]
|
|
31
34
|
|
|
32
35
|
[project.urls]
|
|
@@ -378,7 +378,7 @@ def cerad_drawn_score(
|
|
|
378
378
|
image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
|
|
379
379
|
else pd.Series(image_files)
|
|
380
380
|
),
|
|
381
|
-
'
|
|
381
|
+
'model_response': pd.Series(link1).reset_index(drop=True),
|
|
382
382
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
383
383
|
})
|
|
384
384
|
categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
|
|
@@ -225,7 +225,7 @@ def image_multi_class(
|
|
|
225
225
|
# Save progress so far
|
|
226
226
|
temp_df = pd.DataFrame({
|
|
227
227
|
'image_input': image_files[:i+1],
|
|
228
|
-
'
|
|
228
|
+
'model_response': link1,
|
|
229
229
|
'json': extracted_jsons
|
|
230
230
|
})
|
|
231
231
|
# Normalize processed jsons so far
|
|
@@ -522,7 +522,7 @@ def image_score_drawing(
|
|
|
522
522
|
# Save progress so far
|
|
523
523
|
temp_df = pd.DataFrame({
|
|
524
524
|
'image_input': image_files[:i+1],
|
|
525
|
-
'
|
|
525
|
+
'model_response': link1,
|
|
526
526
|
'json': extracted_jsons
|
|
527
527
|
})
|
|
528
528
|
# Normalize processed jsons so far
|
|
@@ -844,7 +844,7 @@ def image_features(
|
|
|
844
844
|
image_files.reset_index(drop=True) if isinstance(image_files, (pd.DataFrame, pd.Series))
|
|
845
845
|
else pd.Series(image_files)
|
|
846
846
|
),
|
|
847
|
-
'
|
|
847
|
+
'model_response': pd.Series(link1).reset_index(drop=True),
|
|
848
848
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
849
849
|
})
|
|
850
850
|
categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
|
|
@@ -227,6 +227,7 @@ def multi_class(
|
|
|
227
227
|
user_model="gpt-4o",
|
|
228
228
|
creativity=0,
|
|
229
229
|
safety=False,
|
|
230
|
+
to_csv=False,
|
|
230
231
|
filename="categorized_data.csv",
|
|
231
232
|
save_directory=None,
|
|
232
233
|
model_source="OpenAI"
|
|
@@ -307,6 +308,37 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
307
308
|
except Exception as e:
|
|
308
309
|
print(f"An error occurred: {e}")
|
|
309
310
|
link1.append(f"Error processing input: {e}")
|
|
311
|
+
|
|
312
|
+
elif model_source == "Google":
|
|
313
|
+
import requests
|
|
314
|
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/{user_model}:generateContent"
|
|
315
|
+
try:
|
|
316
|
+
headers = {
|
|
317
|
+
"x-goog-api-key": api_key,
|
|
318
|
+
"Content-Type": "application/json"
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
payload = {
|
|
322
|
+
"contents": [{
|
|
323
|
+
"parts": [{"text": prompt}]
|
|
324
|
+
}]
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
328
|
+
response.raise_for_status() # Raise exception for HTTP errors
|
|
329
|
+
result = response.json()
|
|
330
|
+
|
|
331
|
+
if "candidates" in result and result["candidates"]:
|
|
332
|
+
reply = result["candidates"][0]["content"]["parts"][0]["text"]
|
|
333
|
+
else:
|
|
334
|
+
reply = "No response generated"
|
|
335
|
+
|
|
336
|
+
link1.append(reply)
|
|
337
|
+
print(reply)
|
|
338
|
+
except Exception as e:
|
|
339
|
+
print(f"An error occurred: {e}")
|
|
340
|
+
link1.append(f"Error processing input: {e}")
|
|
341
|
+
|
|
310
342
|
elif model_source == "Mistral":
|
|
311
343
|
from mistralai import Mistral
|
|
312
344
|
client = Mistral(api_key=api_key)
|
|
@@ -359,7 +391,7 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
359
391
|
normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
|
|
360
392
|
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
361
393
|
temp_df = pd.concat([temp_df, normalized_data], axis=1)
|
|
362
|
-
#
|
|
394
|
+
# save to CSV
|
|
363
395
|
if save_directory is None:
|
|
364
396
|
save_directory = os.getcwd()
|
|
365
397
|
temp_df.to_csv(os.path.join(save_directory, filename), index=False)
|
|
@@ -374,13 +406,18 @@ Provide your work in JSON format where the number belonging to each category is
|
|
|
374
406
|
normalized_data_list.append(pd.DataFrame({"1": ["e"]}))
|
|
375
407
|
normalized_data = pd.concat(normalized_data_list, ignore_index=True)
|
|
376
408
|
categorized_data = pd.DataFrame({
|
|
377
|
-
'
|
|
409
|
+
'survey_input': (
|
|
378
410
|
survey_input.reset_index(drop=True) if isinstance(survey_input, (pd.DataFrame, pd.Series))
|
|
379
411
|
else pd.Series(survey_input)
|
|
380
412
|
),
|
|
381
|
-
'
|
|
413
|
+
'model_response': pd.Series(link1).reset_index(drop=True),
|
|
382
414
|
'json': pd.Series(extracted_jsons).reset_index(drop=True)
|
|
383
415
|
})
|
|
384
416
|
categorized_data = pd.concat([categorized_data, normalized_data], axis=1)
|
|
417
|
+
|
|
418
|
+
if to_csv:
|
|
419
|
+
if save_directory is None:
|
|
420
|
+
save_directory = os.getcwd()
|
|
421
|
+
categorized_data.to_csv(os.path.join(save_directory, filename), index=False)
|
|
385
422
|
|
|
386
423
|
return categorized_data
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|