PgsFile 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +42 -4
- PgsFile/__init__.py +2 -1
- PgsFile/models/dics/LIWC2015-Chinese.json +1 -0
- PgsFile/models/dics/LIWC2015-English.dic +6625 -0
- PgsFile/models/prompts/9. TextClassification prompt.txt +32 -0
- {pgsfile-0.5.3.dist-info → pgsfile-0.5.5.dist-info}/METADATA +5 -5
- {pgsfile-0.5.3.dist-info → pgsfile-0.5.5.dist-info}/RECORD +11 -8
- /PgsFile/models/prompts/{3. ICTCLAS Prompt.txt → 3. ICTCLAS prompt.txt} +0 -0
- {pgsfile-0.5.3.dist-info → pgsfile-0.5.5.dist-info}/WHEEL +0 -0
- {pgsfile-0.5.3.dist-info → pgsfile-0.5.5.dist-info}/licenses/LICENSE +0 -0
- {pgsfile-0.5.3.dist-info → pgsfile-0.5.5.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -3800,17 +3800,16 @@ def file_to_list_of_dicts(input_path, output_path):
|
|
|
3800
3800
|
|
|
3801
3801
|
import liwc
|
|
3802
3802
|
import json
|
|
3803
|
-
def perform_liwc_en(
|
|
3803
|
+
def perform_liwc_en(file_path, output_excel_path):
|
|
3804
3804
|
'''
|
|
3805
3805
|
Parameters
|
|
3806
3806
|
----------
|
|
3807
|
-
dic_path : str
|
|
3808
|
-
Path to the LIWC dictionary file.
|
|
3809
3807
|
file_path : str
|
|
3810
3808
|
Path to the raw text file.
|
|
3811
3809
|
output_excel_path : str
|
|
3812
3810
|
Path to the output Excel file.
|
|
3813
3811
|
'''
|
|
3812
|
+
dic_path = get_library_location("PgsFile")+"/PgsFile/models/dics/LIWC2015-English.dic"
|
|
3814
3813
|
parse, category_names = liwc.load_token_parser(dic_path)
|
|
3815
3814
|
test = get_data_text(file_path)
|
|
3816
3815
|
test_tokens = [w.lower() for w in word_tokenize2(test)]
|
|
@@ -3850,7 +3849,7 @@ def perform_liwc_en(dic_path, file_path, output_excel_path):
|
|
|
3850
3849
|
df.to_excel(output_excel_path, 'sheet1', index=False)
|
|
3851
3850
|
|
|
3852
3851
|
|
|
3853
|
-
def perform_liwc_zh(
|
|
3852
|
+
def perform_liwc_zh(file_path, output_excel_path):
|
|
3854
3853
|
'''
|
|
3855
3854
|
Parameters
|
|
3856
3855
|
----------
|
|
@@ -3861,6 +3860,7 @@ def perform_liwc_zh(dic_path, file_path, output_excel_path):
|
|
|
3861
3860
|
output_excel_path : str
|
|
3862
3861
|
Path to the output Excel file.
|
|
3863
3862
|
'''
|
|
3863
|
+
dic_path = get_library_location("PgsFile")+"/PgsFile/models/dics/LIWC2015-Chinese.json"
|
|
3864
3864
|
|
|
3865
3865
|
f=open(dic_path,"r")
|
|
3866
3866
|
dicx=json.load(f)
|
|
@@ -4322,3 +4322,41 @@ def append_result_only(prompts_dict, note=RESULT_ONLY_NOTE):
|
|
|
4322
4322
|
# Apply it
|
|
4323
4323
|
translation_prompts = append_result_only(raw_translation_prompts)
|
|
4324
4324
|
|
|
4325
|
+
|
|
4326
|
+
def csv_to_json_append(csv_path: str, json_path: str) -> None:
|
|
4327
|
+
"""
|
|
4328
|
+
Convert a CSV file into a list of dictionaries and append them into a JSON file.
|
|
4329
|
+
|
|
4330
|
+
Args:
|
|
4331
|
+
csv_path (str): Path to the CSV file.
|
|
4332
|
+
json_path (str): Path to the output JSON file.
|
|
4333
|
+
"""
|
|
4334
|
+
|
|
4335
|
+
import pandas as pd
|
|
4336
|
+
|
|
4337
|
+
# Load CSV into DataFrame
|
|
4338
|
+
df = pd.read_csv(csv_path)
|
|
4339
|
+
|
|
4340
|
+
# Automatically get all columns, convert to list of dicts
|
|
4341
|
+
data_list = df.to_dict(orient='records')
|
|
4342
|
+
|
|
4343
|
+
# Append each dict to JSON file
|
|
4344
|
+
for record in data_list:
|
|
4345
|
+
append_dict_to_json(json_path, record)
|
|
4346
|
+
|
|
4347
|
+
print(f"✅ Completed! Appended {len(data_list)} records to {json_path}")
|
|
4348
|
+
|
|
4349
|
+
def get_data_csv(csv_path: str) -> list[dict]:
|
|
4350
|
+
"""
|
|
4351
|
+
Load a CSV file and return its rows as a list of dictionaries.
|
|
4352
|
+
Column names are automatically detected.
|
|
4353
|
+
|
|
4354
|
+
Args:
|
|
4355
|
+
csv_path (str): Path to the CSV file.
|
|
4356
|
+
|
|
4357
|
+
Returns:
|
|
4358
|
+
list[dict]: A list of dictionaries, where each dict represents one row.
|
|
4359
|
+
"""
|
|
4360
|
+
import pandas as pd
|
|
4361
|
+
df = pd.read_csv(csv_path)
|
|
4362
|
+
return df.to_dict(orient="records")
|
PgsFile/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@ from .PgsFile import conda_mirror_commands
|
|
|
11
11
|
|
|
12
12
|
# 3. Text data retrieval
|
|
13
13
|
from .PgsFile import get_data_text, get_data_lines, get_json_lines, get_tsv_lines
|
|
14
|
-
from .PgsFile import get_data_excel, get_data_json, get_data_tsv, extract_misspelled_words_from_docx
|
|
14
|
+
from .PgsFile import get_data_excel, get_data_json, get_data_tsv, get_data_csv, extract_misspelled_words_from_docx
|
|
15
15
|
from .PgsFile import get_data_html_online, get_data_html_offline
|
|
16
16
|
from .PgsFile import get_data_table_url, get_data_table_html_string
|
|
17
17
|
from .PgsFile import mhtml2html
|
|
@@ -33,6 +33,7 @@ from .PgsFile import set_permanent_environment_variable
|
|
|
33
33
|
from .PgsFile import delete_permanent_environment_variable
|
|
34
34
|
from .PgsFile import get_env_variable, get_all_env_variables
|
|
35
35
|
from .PgsFile import get_system_info
|
|
36
|
+
from .PgsFile import csv_to_json_append
|
|
36
37
|
|
|
37
38
|
# 6. Data cleaning
|
|
38
39
|
from .PgsFile import BigPunctuation, StopTags, Special, yhd
|