PgsFile 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/Corpora/Stopwords/NLPIR.user +0 -0
- PgsFile/PgsFile.py +53 -2
- PgsFile/__init__.py +1 -1
- {PgsFile-0.2.2.dist-info → PgsFile-0.2.3.dist-info}/METADATA +1 -1
- {PgsFile-0.2.2.dist-info → PgsFile-0.2.3.dist-info}/RECORD +8 -7
- {PgsFile-0.2.2.dist-info → PgsFile-0.2.3.dist-info}/LICENSE +0 -0
- {PgsFile-0.2.2.dist-info → PgsFile-0.2.3.dist-info}/WHEEL +0 -0
- {PgsFile-0.2.2.dist-info → PgsFile-0.2.3.dist-info}/top_level.txt +0 -0
|
Binary file
|
PgsFile/PgsFile.py
CHANGED
|
@@ -700,6 +700,21 @@ def find_txt_files_with_keyword(root_folder, keyword, case_sensitive=None):
|
|
|
700
700
|
matches.append(os.path.join(root, filename))
|
|
701
701
|
return matches
|
|
702
702
|
|
|
703
|
+
import fnmatch
|
|
704
|
+
def find_user_files_in_upper_folder(directory, user_file_name):
|
|
705
|
+
# Get the direct upper folder path
|
|
706
|
+
upper_folder = os.path.dirname(os.path.abspath(directory))
|
|
707
|
+
|
|
708
|
+
# List to store matching file paths
|
|
709
|
+
matching_files = []
|
|
710
|
+
|
|
711
|
+
# Walk through the upper folder
|
|
712
|
+
for root, dirs, files in os.walk(upper_folder):
|
|
713
|
+
for filename in fnmatch.filter(files, f'{user_file_name}.user'):
|
|
714
|
+
matching_files.append(os.path.join(root, filename))
|
|
715
|
+
|
|
716
|
+
return matching_files
|
|
717
|
+
|
|
703
718
|
# Standard sentence tokenizer.
|
|
704
719
|
def sent_tokenize(text, lang=None):
|
|
705
720
|
import pysbd
|
|
@@ -782,6 +797,7 @@ def cs1(text):
|
|
|
782
797
|
sentences=sentences
|
|
783
798
|
return sentences
|
|
784
799
|
|
|
800
|
+
|
|
785
801
|
def word_tokenize(text, pos_tagged=False):
|
|
786
802
|
'''
|
|
787
803
|
Parameters
|
|
@@ -816,10 +832,22 @@ def word_tokenize(text, pos_tagged=False):
|
|
|
816
832
|
try:
|
|
817
833
|
from nlpir import tools
|
|
818
834
|
tools.update_license()
|
|
835
|
+
print("\n\nThe user file is ready. Please restart your kernel and run the Python script!")
|
|
819
836
|
except Exception as err2:
|
|
820
|
-
print("
|
|
837
|
+
print("\n*****SOLUTION WARNING! \nYOU MAY NEED A VPN TO TRY THIS SERVICE!*****\n\n", err2)
|
|
821
838
|
else:
|
|
822
|
-
|
|
839
|
+
try:
|
|
840
|
+
if "Can not open" in str(err):
|
|
841
|
+
user_folder=get_library_location("PgsFile")+"/PgsFile/Corpora/Stopwords"
|
|
842
|
+
destination_folder=get_library_location("nlpir-python")+"/nlpir/Data"
|
|
843
|
+
source_file=find_user_files_in_upper_folder(user_folder, "NLPIR")[0]
|
|
844
|
+
copy_file(source_file, destination_folder)
|
|
845
|
+
print("The user file is ready. Please restart your kernel and run the Python script!")
|
|
846
|
+
else:
|
|
847
|
+
print(err)
|
|
848
|
+
except Exception as rer:
|
|
849
|
+
print(rer)
|
|
850
|
+
|
|
823
851
|
return words
|
|
824
852
|
|
|
825
853
|
def pad_sequence(
|
|
@@ -1547,6 +1575,29 @@ def move_file(source_file, destination_folder, new_file_name=None):
|
|
|
1547
1575
|
shutil.move(source_file, destination_file)
|
|
1548
1576
|
|
|
1549
1577
|
print(f"File moved from {source_file} to {destination_file}")
|
|
1578
|
+
|
|
1579
|
+
def copy_file(source_file, destination_folder, new_file_name=None):
|
|
1580
|
+
"""
|
|
1581
|
+
Copy a file to another folder.
|
|
1582
|
+
|
|
1583
|
+
Parameters:
|
|
1584
|
+
source_file (str): The path to the source file.
|
|
1585
|
+
destination_folder (str): The path to the destination folder.
|
|
1586
|
+
new_file_name (str, optional): The new name for the file in the destination folder. Defaults to None.
|
|
1587
|
+
"""
|
|
1588
|
+
# Ensure the destination folder exists
|
|
1589
|
+
if not os.path.exists(destination_folder):
|
|
1590
|
+
os.makedirs(destination_folder)
|
|
1591
|
+
|
|
1592
|
+
# Construct the destination file path
|
|
1593
|
+
if new_file_name:
|
|
1594
|
+
destination_file = os.path.join(destination_folder, new_file_name)
|
|
1595
|
+
else:
|
|
1596
|
+
destination_file = os.path.join(destination_folder, os.path.basename(source_file))
|
|
1597
|
+
|
|
1598
|
+
# Copy the file to the destination folder
|
|
1599
|
+
shutil.copy2(source_file, destination_file)
|
|
1600
|
+
|
|
1550
1601
|
|
|
1551
1602
|
def check_empty_cells(file_path):
|
|
1552
1603
|
"""
|
PgsFile/__init__.py
CHANGED
|
@@ -22,7 +22,7 @@ from .PgsFile import FilePath, FileName, DirList
|
|
|
22
22
|
from .PgsFile import get_subfolder_path
|
|
23
23
|
from .PgsFile import makedirec, makefile
|
|
24
24
|
from .PgsFile import source_path, next_folder_names, get_directory_tree_with_meta, find_txt_files_with_keyword
|
|
25
|
-
from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line, move_file
|
|
25
|
+
from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line, move_file, copy_file
|
|
26
26
|
from .PgsFile import concatenate_excel_files
|
|
27
27
|
|
|
28
28
|
# 6. Data cleaning
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, multimedia download, data cleaning, NLP tasks like Chinese word tokenization and POS tagging, and word list generation for literary students, making it more accessible and convenient to use.
|
|
5
5
|
Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
|
|
6
6
|
Author: Pan Guisheng
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
2
|
-
PgsFile/__init__.py,sha256
|
|
1
|
+
PgsFile/PgsFile.py,sha256=MpXQK6MLMBh1JMAcBw5sRiRof--x4OyARcCsWwn7Z4A,85828
|
|
2
|
+
PgsFile/__init__.py,sha256=E4VfPu1BxCBcZ5WXi5E6faPaNt_Shpvgh9LvBlg7eA0,2389
|
|
3
3
|
PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
|
|
4
4
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
|
|
5
5
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
|
|
@@ -2569,6 +2569,7 @@ PgsFile/Corpora/Parallel/TED_EC_2017-2020/YvetteAlberdingkThijm_2017X公民影
|
|
|
2569
2569
|
PgsFile/Corpora/Parallel/TED_EC_2017-2020/ZacharyRWood_2018为什么持有不同意见的人值得被聆听..txt,sha256=4SFYMhlFSHP2aEVvNS1CBeogq0D2lPTE5VhFsZjlZnM,9546
|
|
2570
2570
|
PgsFile/Corpora/Parallel/TED_EC_2017-2020/ZeynepTufekci_2017G为了让人们点击广告_我们正在建造一个反乌托邦..txt,sha256=S3BSXKsNAX0ugVqBPhmJyaRF8MYAHapDMR12DoBYZgc,32353
|
|
2571
2571
|
PgsFile/Corpora/Parallel/Xi's Speech_CE_2021/Speech at a Ceremony Marking the Centenary of the CPC.txt,sha256=3suCjs2LF2_Endg2i_hc3GX1N8lTBORlqpMWEKsXFeM,54282
|
|
2572
|
+
PgsFile/Corpora/Stopwords/NLPIR.user,sha256=DykLJdr8_cVHrdCnDJES1O5dgmnYqfaSO1_dtAVKYJk,3356
|
|
2572
2573
|
PgsFile/Corpora/Stopwords/arabic.txt,sha256=yL9id0vdNF20WEvM0buRnRt1ByEeRGJuGDiY3jE7tlQ,1287
|
|
2573
2574
|
PgsFile/Corpora/Stopwords/bulgarian.txt,sha256=eiIwYk1TU8YcYYPbMPjUzZSZlgd7gl5o7d0LIthzqHQ,2409
|
|
2574
2575
|
PgsFile/Corpora/Stopwords/catalan.txt,sha256=8OyAOBHfWsEvKuLEphCfdiWhuxyVg1sOWV5gi2DJLwY,699
|
|
@@ -2618,8 +2619,8 @@ PgsFile/models/slovene.pickle,sha256=faxlAhKzeHs5mWwBvSCEEVST5vbsOQurYfdnUlsIuOo
|
|
|
2618
2619
|
PgsFile/models/spanish.pickle,sha256=Jx3GAnxKrgVvcqm_q1ZFz2fhmL9PlyiVhE5A9ZiczcM,597831
|
|
2619
2620
|
PgsFile/models/swedish.pickle,sha256=QNUOva1sqodxXy4wCxIX7JLELeIFpUPMSlaQO9LJrPo,1034496
|
|
2620
2621
|
PgsFile/models/turkish.pickle,sha256=065H12UB0CdpiAnRLnUpLJw5KRBIhUM0KAL5Xbl2XMw,1225013
|
|
2621
|
-
PgsFile-0.2.
|
|
2622
|
-
PgsFile-0.2.
|
|
2623
|
-
PgsFile-0.2.
|
|
2624
|
-
PgsFile-0.2.
|
|
2625
|
-
PgsFile-0.2.
|
|
2622
|
+
PgsFile-0.2.3.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2623
|
+
PgsFile-0.2.3.dist-info/METADATA,sha256=a9KMN6LpC2raZYhWwrFhWCXKl7nWneiXT7KtvA74ruY,5070
|
|
2624
|
+
PgsFile-0.2.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
2625
|
+
PgsFile-0.2.3.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2626
|
+
PgsFile-0.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|