PgsFile 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PgsFile might be problematic. Click here for more details.

PgsFile/PgsFile.py CHANGED
@@ -3800,17 +3800,16 @@ def file_to_list_of_dicts(input_path, output_path):
3800
3800
 
3801
3801
  import liwc
3802
3802
  import json
3803
- def perform_liwc_en(dic_path, file_path, output_excel_path):
3803
+ def perform_liwc_en(file_path, output_excel_path):
3804
3804
  '''
3805
3805
  Parameters
3806
3806
  ----------
3807
- dic_path : str
3808
- Path to the LIWC dictionary file.
3809
3807
  file_path : str
3810
3808
  Path to the raw text file.
3811
3809
  output_excel_path : str
3812
3810
  Path to the output Excel file.
3813
3811
  '''
3812
+ dic_path = get_library_location("PgsFile")+"/PgsFile/models/dics/LIWC2015-English.dic"
3814
3813
  parse, category_names = liwc.load_token_parser(dic_path)
3815
3814
  test = get_data_text(file_path)
3816
3815
  test_tokens = [w.lower() for w in word_tokenize2(test)]
@@ -3850,7 +3849,7 @@ def perform_liwc_en(dic_path, file_path, output_excel_path):
3850
3849
  df.to_excel(output_excel_path, 'sheet1', index=False)
3851
3850
 
3852
3851
 
3853
- def perform_liwc_zh(dic_path, file_path, output_excel_path):
3852
+ def perform_liwc_zh(file_path, output_excel_path):
3854
3853
  '''
3855
3854
  Parameters
3856
3855
  ----------
@@ -3861,6 +3860,7 @@ def perform_liwc_zh(dic_path, file_path, output_excel_path):
3861
3860
  output_excel_path : str
3862
3861
  Path to the output Excel file.
3863
3862
  '''
3863
+ dic_path = get_library_location("PgsFile")+"/PgsFile/models/dics/LIWC2015-Chinese.json"
3864
3864
 
3865
3865
  f=open(dic_path,"r")
3866
3866
  dicx=json.load(f)
@@ -4322,3 +4322,41 @@ def append_result_only(prompts_dict, note=RESULT_ONLY_NOTE):
4322
4322
  # Apply it
4323
4323
  translation_prompts = append_result_only(raw_translation_prompts)
4324
4324
 
4325
+
4326
+ def csv_to_json_append(csv_path: str, json_path: str) -> None:
4327
+ """
4328
+ Convert a CSV file into a list of dictionaries and append them into a JSON file.
4329
+
4330
+ Args:
4331
+ csv_path (str): Path to the CSV file.
4332
+ json_path (str): Path to the output JSON file.
4333
+ """
4334
+
4335
+ import pandas as pd
4336
+
4337
+ # Load CSV into DataFrame
4338
+ df = pd.read_csv(csv_path)
4339
+
4340
+ # Automatically get all columns, convert to list of dicts
4341
+ data_list = df.to_dict(orient='records')
4342
+
4343
+ # Append each dict to JSON file
4344
+ for record in data_list:
4345
+ append_dict_to_json(json_path, record)
4346
+
4347
+ print(f"✅ Completed! Appended {len(data_list)} records to {json_path}")
4348
+
4349
+ def get_data_csv(csv_path: str) -> list[dict]:
4350
+ """
4351
+ Load a CSV file and return its rows as a list of dictionaries.
4352
+ Column names are automatically detected.
4353
+
4354
+ Args:
4355
+ csv_path (str): Path to the CSV file.
4356
+
4357
+ Returns:
4358
+ list[dict]: A list of dictionaries, where each dict represents one row.
4359
+ """
4360
+ import pandas as pd
4361
+ df = pd.read_csv(csv_path)
4362
+ return df.to_dict(orient="records")
PgsFile/__init__.py CHANGED
@@ -11,7 +11,7 @@ from .PgsFile import conda_mirror_commands
11
11
 
12
12
  # 3. Text data retrieval
13
13
  from .PgsFile import get_data_text, get_data_lines, get_json_lines, get_tsv_lines
14
- from .PgsFile import get_data_excel, get_data_json, get_data_tsv, extract_misspelled_words_from_docx
14
+ from .PgsFile import get_data_excel, get_data_json, get_data_tsv, get_data_csv, extract_misspelled_words_from_docx
15
15
  from .PgsFile import get_data_html_online, get_data_html_offline
16
16
  from .PgsFile import get_data_table_url, get_data_table_html_string
17
17
  from .PgsFile import mhtml2html
@@ -33,6 +33,7 @@ from .PgsFile import set_permanent_environment_variable
33
33
  from .PgsFile import delete_permanent_environment_variable
34
34
  from .PgsFile import get_env_variable, get_all_env_variables
35
35
  from .PgsFile import get_system_info
36
+ from .PgsFile import csv_to_json_append
36
37
 
37
38
  # 6. Data cleaning
38
39
  from .PgsFile import BigPunctuation, StopTags, Special, yhd