PyPI - PgsFile - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

PgsFile 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of PgsFile might be problematic. Click here for more details.

Files changed (7) hide show

PgsFile/PgsFile.py CHANGED Viewed

@@ -591,6 +591,24 @@ def remove_empty_folders(folder_path):
     print(delet_root)
     print("Folders removed: ",len(delet_root))
+def concatenate_excel_files(directory_path, output_file):
+    # List to hold DataFrames
+    dataframes = []
+    # Loop through all files in the directory
+    for filename in os.listdir(directory_path):
+        if filename.endswith('.xlsx') or filename.endswith('.xls'):
+            file_path = os.path.join(directory_path, filename)
+            df = pd.read_excel(file_path)
+            dataframes.append(df)
+    # Concatenate all DataFrames into a single DataFrame
+    combined_df = pd.concat(dataframes, ignore_index=True)
+    # Write the combined DataFrame to a new Excel file
+    combined_df.to_excel(output_file, index=False)
+    print(f"Combined Excel file saved as {output_file}")
 def remove_empty_lines(folder_path):
     files=FilePath(folder_path)
     for file in files:
@@ -735,6 +753,46 @@ def cs1(text):
         sentences=sentences
     return sentences
+def word_tokenize(text, pos_tagged=False):
+    '''
+    Parameters
+    ----------
+    text : TYPE, string like: "无独有偶，这个消息如晴天霹雳，霍尔姆斯听到后不知所措。中国电影家协会和中国作家协会，中国翻译协会是做慈善的。"
+        DESCRIPTION.
+    pos_tagged : TYPE, optional
+        DESCRIPTION. The default is False.
+    Returns
+    -------
+    words : TYPE, list like: ['无独有偶', '，', '这个', '消息', '如', '晴天霹雳', '，', '霍尔姆斯', '听到', '后', '不知所措', '。', '中国', '电影', '家', '协会', '和', '中国', '作家', '协会', '，', '中国', '翻译', '协会', '是', '做', '慈善', '的', '。', '']
+        DESCRIPTION.
+    '''
+    words=None
+    try:
+        try:
+            from nlpir import ictclas #调用中科院分词器ICTCLAS
+        except Exception as err:
+            print("installing nlpir/ICTCLAS...")
+            from PgsFile import install_package as ip
+            ip("nlpir-python")
+        from nlpir import ictclas
+        if pos_tagged is False:
+            words=ictclas.segment(text, pos_tagged=False)
+        else:
+            words=ictclas.segment(text, pos_tagged=True)
+    except Exception as err:
+        if "expired" in str(err):
+            try:
+                from nlpir import tools
+                tools.update_license()
+            except Exception as err2:
+                print("You need a VPN to try this service!", err2)
+        else:
+            print(err)
+    return words
 def pad_sequence(
     sequence,
     n,

PgsFile/__init__.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .PgsFile import get_subfolder_path
 from .PgsFile import makedirec, makefile
 from .PgsFile import source_path, next_folder_names, get_directory_tree_with_meta, find_txt_files_with_keyword
 from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line, move_file
+from .PgsFile import concatenate_excel_files
 # 6. Data cleaning
 from .PgsFile import BigPunctuation, StopTags, Special, yhd
@@ -38,7 +39,7 @@ from .PgsFile import extract_chinese_punctuation, generate_password, sort_string
 from .PgsFile import strQ2B_raw, strQ2B_words
 from .PgsFile import ngrams, bigrams, trigrams, everygrams, compute_similarity
 from .PgsFile import word_list, batch_word_list
-from .PgsFile import cs, cs1, sent_tokenize
+from .PgsFile import cs, cs1, sent_tokenize, word_tokenize
 # 8. Maths
 from .PgsFile import len_rows, check_empty_cells

{PgsFile-0.2.0.dist-info → PgsFile-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.1
 Name: PgsFile
-Version: 0.2.0
-Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, multimedia download, data cleaning, and word list generation for literary students, making it more accessible and convenient to use.
+Version: 0.2.1
+Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, multimedia download, data cleaning, NLP tasks, and word list generation for literary students, making it more accessible and convenient to use.
 Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
 Author: Pan Guisheng
 Author-email: 895284504@qq.com
@@ -20,6 +20,7 @@ Requires-Dist: fake-useragent
 Requires-Dist: lxml
 Requires-Dist: pimht
 Requires-Dist: pysbd
+Requires-Dist: nlpir-python
 Purpose: This module aims to assist Python beginners, particularly instructors and students of foreign languages and literature, by providing a convenient way to manage Python packages, run Python scripts, and perform operations on various file types such as txt, xlsx, json, tsv, html, mhtml, and docx. It also includes functionality for data scraping, cleaning and generating word lists.

{PgsFile-0.2.0.dist-info → PgsFile-0.2.1.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-PgsFile/PgsFile.py,sha256=1-PR4NO2FF7lO8_lQmSP_VLVs6pV8jC_5_nOdGvwuhk,80684
-PgsFile/__init__.py,sha256=Tbr3MaFP7ZqhwVaYAnBJx7UBJhM4c884F8sFMQjfzXU,2317
+PgsFile/PgsFile.py,sha256=o6J3tipdCBkA0JvUz6vckZH_YlAgQmlAGQOMKdXb95M,82975
+PgsFile/__init__.py,sha256=-Vy1SIh-BYopiEan-EjBtwqZsNteNrOqkws7hUj1d2w,2378
 PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
 PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
 PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
@@ -2618,8 +2618,8 @@ PgsFile/models/slovene.pickle,sha256=faxlAhKzeHs5mWwBvSCEEVST5vbsOQurYfdnUlsIuOo
 PgsFile/models/spanish.pickle,sha256=Jx3GAnxKrgVvcqm_q1ZFz2fhmL9PlyiVhE5A9ZiczcM,597831
 PgsFile/models/swedish.pickle,sha256=QNUOva1sqodxXy4wCxIX7JLELeIFpUPMSlaQO9LJrPo,1034496
 PgsFile/models/turkish.pickle,sha256=065H12UB0CdpiAnRLnUpLJw5KRBIhUM0KAL5Xbl2XMw,1225013
-PgsFile-0.2.0.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
-PgsFile-0.2.0.dist-info/METADATA,sha256=u-nzDLhOIJYZ-nOp9FpE5EFWsjW3683viOehOfqQIvs,4959
-PgsFile-0.2.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-PgsFile-0.2.0.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
-PgsFile-0.2.0.dist-info/RECORD,,
+PgsFile-0.2.1.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
+PgsFile-0.2.1.dist-info/METADATA,sha256=PCrjMATNQrsqPfsVVC15cmOinp-o3HYR88kLMcsn2lA,4999
+PgsFile-0.2.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+PgsFile-0.2.1.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
+PgsFile-0.2.1.dist-info/RECORD,,

{PgsFile-0.2.0.dist-info → PgsFile-0.2.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{PgsFile-0.2.0.dist-info → PgsFile-0.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{PgsFile-0.2.0.dist-info → PgsFile-0.2.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

PgsFile 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

PgsFile 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl