PgsFile 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +29 -0
- PgsFile/__init__.py +1 -1
- {PgsFile-0.3.6.dist-info → PgsFile-0.3.7.dist-info}/METADATA +1 -1
- {PgsFile-0.3.6.dist-info → PgsFile-0.3.7.dist-info}/RECORD +7 -7
- {PgsFile-0.3.6.dist-info → PgsFile-0.3.7.dist-info}/LICENSE +0 -0
- {PgsFile-0.3.6.dist-info → PgsFile-0.3.7.dist-info}/WHEEL +0 -0
- {PgsFile-0.3.6.dist-info → PgsFile-0.3.7.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -3667,3 +3667,32 @@ def timeit(func):
|
|
|
3667
3667
|
return result
|
|
3668
3668
|
return wrapper
|
|
3669
3669
|
|
|
3670
|
+
def file_to_list_of_dicts(input_path, output_path):
|
|
3671
|
+
"""
|
|
3672
|
+
Convert a CSV or XLSX file into a JSON file, where each line in the JSON file is a dictionary representing a row from the input file.
|
|
3673
|
+
The keys of each dictionary are formatted as "column1, column2, column3, etc."
|
|
3674
|
+
|
|
3675
|
+
Args:
|
|
3676
|
+
input_path (str): Path to the CSV or XLSX file.
|
|
3677
|
+
output_path (str): Path to the output JSON file.
|
|
3678
|
+
|
|
3679
|
+
Returns:
|
|
3680
|
+
None: The function writes the dictionaries to a JSON file specified by output_path.
|
|
3681
|
+
"""
|
|
3682
|
+
# Determine the file type based on the file extension
|
|
3683
|
+
import pandas as pd
|
|
3684
|
+
if input_path.endswith('.csv'):
|
|
3685
|
+
df = pd.read_csv(input_path)
|
|
3686
|
+
elif input_path.endswith('.xlsx'):
|
|
3687
|
+
df = pd.read_excel(input_path)
|
|
3688
|
+
else:
|
|
3689
|
+
raise ValueError("Unsupported file format. Please provide a CSV or XLSX file.")
|
|
3690
|
+
|
|
3691
|
+
# Use default keys "col1, col2, etc."
|
|
3692
|
+
total = len(list(df.iterrows()))
|
|
3693
|
+
for index, row in df.iterrows():
|
|
3694
|
+
row_dict = {f"col{i+1}": value for i, value in enumerate(row)}
|
|
3695
|
+
append_dict_to_json(output_path, row_dict)
|
|
3696
|
+
print(f'Converting {decimal_to_percent(index/total)}')
|
|
3697
|
+
|
|
3698
|
+
print("Conversion complete!")
|
PgsFile/__init__.py
CHANGED
|
@@ -17,7 +17,7 @@ from .PgsFile import get_data_table_url, get_data_table_html_string
|
|
|
17
17
|
from .PgsFile import mhtml2html
|
|
18
18
|
|
|
19
19
|
# 4. Text data storage
|
|
20
|
-
from .PgsFile import write_to_txt, write_to_excel, write_to_json, write_to_json_lines, append_dict_to_json, save_dict_to_excel
|
|
20
|
+
from .PgsFile import write_to_txt, write_to_excel, write_to_json, write_to_json_lines, append_dict_to_json, save_dict_to_excel, file_to_list_of_dicts
|
|
21
21
|
from .PgsFile import write_to_excel_normal
|
|
22
22
|
|
|
23
23
|
# 5. File/folder process
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: This module simplifies Python package management, script execution, file handling, web scraping, and multimedia downloads. The module supports LLM-based NLP tasks such as tokenization, lemmatization, POS tagging, NER, dependency parsing, MDD, WSD, and MIP analysis. It also generates word lists and plots data, aiding literary students. Ideal for scraping data, cleaning text, and analyzing language, it offers user-friendly tools to streamline workflows.
|
|
5
5
|
Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
|
|
6
6
|
Author: Pan Guisheng
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
2
|
-
PgsFile/__init__.py,sha256=
|
|
1
|
+
PgsFile/PgsFile.py,sha256=GFC_G9f_g3Fxbp52bBHTk4bsLvVEDPrSOumCC6LEcQ0,150918
|
|
2
|
+
PgsFile/__init__.py,sha256=KJOCHhbfIJNSecPz_qjtX3g_uWglhxEkesR-ou8f3FI,3431
|
|
3
3
|
PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
|
|
4
4
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
|
|
5
5
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
|
|
@@ -2585,8 +2585,8 @@ PgsFile/models/fonts/博洋行书3500.TTF,sha256=VrgeHr8cgOL6JD05QyuD9ZSyw4J2aIV
|
|
|
2585
2585
|
PgsFile/models/fonts/陆柬之行书字体.ttf,sha256=Zpd4Z7E9w-Qy74yklXHk4vM7HOtHuQgllvygxZZ1Hvs,1247288
|
|
2586
2586
|
PgsFile/models/prompts/1. MIP prompt.txt,sha256=4lHlHmleayRytqr1n9jtt6vn1rQvyf4BKeThpbwI8o8,1638
|
|
2587
2587
|
PgsFile/models/prompts/2. WSD prompt.txt,sha256=o-ZFtCRUCDrXgm040WTQch9v2Y_r2SIlrZaquilJjgQ,2348
|
|
2588
|
-
PgsFile-0.3.
|
|
2589
|
-
PgsFile-0.3.
|
|
2590
|
-
PgsFile-0.3.
|
|
2591
|
-
PgsFile-0.3.
|
|
2592
|
-
PgsFile-0.3.
|
|
2588
|
+
PgsFile-0.3.7.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2589
|
+
PgsFile-0.3.7.dist-info/METADATA,sha256=fVMCPnAFQ5CjWOZxowrODAE3FGNN7HdNr3ONd5G7tFs,2892
|
|
2590
|
+
PgsFile-0.3.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
2591
|
+
PgsFile-0.3.7.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2592
|
+
PgsFile-0.3.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|