PgsFile 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PgsFile might be problematic. Click here for more details.

Binary file
PgsFile/PgsFile.py CHANGED
@@ -700,6 +700,21 @@ def find_txt_files_with_keyword(root_folder, keyword, case_sensitive=None):
700
700
  matches.append(os.path.join(root, filename))
701
701
  return matches
702
702
 
703
+ import fnmatch
704
+ def find_user_files_in_upper_folder(directory, user_file_name):
705
+ # Get the direct upper folder path
706
+ upper_folder = os.path.dirname(os.path.abspath(directory))
707
+
708
+ # List to store matching file paths
709
+ matching_files = []
710
+
711
+ # Walk through the upper folder
712
+ for root, dirs, files in os.walk(upper_folder):
713
+ for filename in fnmatch.filter(files, f'{user_file_name}.user'):
714
+ matching_files.append(os.path.join(root, filename))
715
+
716
+ return matching_files
717
+
703
718
  # Standard sentence tokenizer.
704
719
  def sent_tokenize(text, lang=None):
705
720
  import pysbd
@@ -782,6 +797,7 @@ def cs1(text):
782
797
  sentences=sentences
783
798
  return sentences
784
799
 
800
+
785
801
  def word_tokenize(text, pos_tagged=False):
786
802
  '''
787
803
  Parameters
@@ -816,10 +832,22 @@ def word_tokenize(text, pos_tagged=False):
816
832
  try:
817
833
  from nlpir import tools
818
834
  tools.update_license()
835
+ print("\n\nThe user file is ready. Please restart your kernel and run the Python script!")
819
836
  except Exception as err2:
820
- print("You need a VPN to try this service!", err2)
837
+ print("\n*****SOLUTION WARNING! \nYOU MAY NEED A VPN TO TRY THIS SERVICE!*****\n\n", err2)
821
838
  else:
822
- print(err)
839
+ try:
840
+ if "Can not open" in str(err):
841
+ user_folder=get_library_location("PgsFile")+"/PgsFile/Corpora/Stopwords"
842
+ destination_folder=get_library_location("nlpir-python")+"/nlpir/Data"
843
+ source_file=find_user_files_in_upper_folder(user_folder, "NLPIR")[0]
844
+ copy_file(source_file, destination_folder)
845
+ print("The user file is ready. Please restart your kernel and run the Python script!")
846
+ else:
847
+ print(err)
848
+ except Exception as rer:
849
+ print(rer)
850
+
823
851
  return words
824
852
 
825
853
  def pad_sequence(
@@ -1547,6 +1575,29 @@ def move_file(source_file, destination_folder, new_file_name=None):
1547
1575
  shutil.move(source_file, destination_file)
1548
1576
 
1549
1577
  print(f"File moved from {source_file} to {destination_file}")
1578
+
1579
+ def copy_file(source_file, destination_folder, new_file_name=None):
1580
+ """
1581
+ Copy a file to another folder.
1582
+
1583
+ Parameters:
1584
+ source_file (str): The path to the source file.
1585
+ destination_folder (str): The path to the destination folder.
1586
+ new_file_name (str, optional): The new name for the file in the destination folder. Defaults to None.
1587
+ """
1588
+ # Ensure the destination folder exists
1589
+ if not os.path.exists(destination_folder):
1590
+ os.makedirs(destination_folder)
1591
+
1592
+ # Construct the destination file path
1593
+ if new_file_name:
1594
+ destination_file = os.path.join(destination_folder, new_file_name)
1595
+ else:
1596
+ destination_file = os.path.join(destination_folder, os.path.basename(source_file))
1597
+
1598
+ # Copy the file to the destination folder
1599
+ shutil.copy2(source_file, destination_file)
1600
+
1550
1601
 
1551
1602
  def check_empty_cells(file_path):
1552
1603
  """
PgsFile/__init__.py CHANGED
@@ -22,7 +22,7 @@ from .PgsFile import FilePath, FileName, DirList
22
22
  from .PgsFile import get_subfolder_path
23
23
  from .PgsFile import makedirec, makefile
24
24
  from .PgsFile import source_path, next_folder_names, get_directory_tree_with_meta, find_txt_files_with_keyword
25
- from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line, move_file
25
+ from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line, move_file, copy_file
26
26
  from .PgsFile import concatenate_excel_files
27
27
 
28
28
  # 6. Data cleaning
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PgsFile
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, multimedia download, data cleaning, NLP tasks like Chinese word tokenization and POS tagging, and word list generation for literary students, making it more accessible and convenient to use.
5
5
  Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
6
6
  Author: Pan Guisheng
@@ -1,5 +1,5 @@
1
- PgsFile/PgsFile.py,sha256=BQNZBrBYgyB_4TVxD0CJ6cMpiaaDzL9b7c7kcYtxmwQ,83682
2
- PgsFile/__init__.py,sha256=-Vy1SIh-BYopiEan-EjBtwqZsNteNrOqkws7hUj1d2w,2378
1
+ PgsFile/PgsFile.py,sha256=MpXQK6MLMBh1JMAcBw5sRiRof--x4OyARcCsWwn7Z4A,85828
2
+ PgsFile/__init__.py,sha256=E4VfPu1BxCBcZ5WXi5E6faPaNt_Shpvgh9LvBlg7eA0,2389
3
3
  PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
4
4
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
5
5
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
@@ -2569,6 +2569,7 @@ PgsFile/Corpora/Parallel/TED_EC_2017-2020/YvetteAlberdingkThijm_2017X公民影
2569
2569
  PgsFile/Corpora/Parallel/TED_EC_2017-2020/ZacharyRWood_2018为什么持有不同意见的人值得被聆听..txt,sha256=4SFYMhlFSHP2aEVvNS1CBeogq0D2lPTE5VhFsZjlZnM,9546
2570
2570
  PgsFile/Corpora/Parallel/TED_EC_2017-2020/ZeynepTufekci_2017G为了让人们点击广告_我们正在建造一个反乌托邦..txt,sha256=S3BSXKsNAX0ugVqBPhmJyaRF8MYAHapDMR12DoBYZgc,32353
2571
2571
  PgsFile/Corpora/Parallel/Xi's Speech_CE_2021/Speech at a Ceremony Marking the Centenary of the CPC.txt,sha256=3suCjs2LF2_Endg2i_hc3GX1N8lTBORlqpMWEKsXFeM,54282
2572
+ PgsFile/Corpora/Stopwords/NLPIR.user,sha256=DykLJdr8_cVHrdCnDJES1O5dgmnYqfaSO1_dtAVKYJk,3356
2572
2573
  PgsFile/Corpora/Stopwords/arabic.txt,sha256=yL9id0vdNF20WEvM0buRnRt1ByEeRGJuGDiY3jE7tlQ,1287
2573
2574
  PgsFile/Corpora/Stopwords/bulgarian.txt,sha256=eiIwYk1TU8YcYYPbMPjUzZSZlgd7gl5o7d0LIthzqHQ,2409
2574
2575
  PgsFile/Corpora/Stopwords/catalan.txt,sha256=8OyAOBHfWsEvKuLEphCfdiWhuxyVg1sOWV5gi2DJLwY,699
@@ -2618,8 +2619,8 @@ PgsFile/models/slovene.pickle,sha256=faxlAhKzeHs5mWwBvSCEEVST5vbsOQurYfdnUlsIuOo
2618
2619
  PgsFile/models/spanish.pickle,sha256=Jx3GAnxKrgVvcqm_q1ZFz2fhmL9PlyiVhE5A9ZiczcM,597831
2619
2620
  PgsFile/models/swedish.pickle,sha256=QNUOva1sqodxXy4wCxIX7JLELeIFpUPMSlaQO9LJrPo,1034496
2620
2621
  PgsFile/models/turkish.pickle,sha256=065H12UB0CdpiAnRLnUpLJw5KRBIhUM0KAL5Xbl2XMw,1225013
2621
- PgsFile-0.2.2.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2622
- PgsFile-0.2.2.dist-info/METADATA,sha256=1fm2uh-uYKgDe26DvUGCmj2LbMcjwDum113nbmW-MIA,5070
2623
- PgsFile-0.2.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
2624
- PgsFile-0.2.2.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2625
- PgsFile-0.2.2.dist-info/RECORD,,
2622
+ PgsFile-0.2.3.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2623
+ PgsFile-0.2.3.dist-info/METADATA,sha256=a9KMN6LpC2raZYhWwrFhWCXKl7nWneiXT7KtvA74ruY,5070
2624
+ PgsFile-0.2.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
2625
+ PgsFile-0.2.3.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2626
+ PgsFile-0.2.3.dist-info/RECORD,,