PgsFile 0.5.6__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PgsFile might be problematic. Click here for more details.

PgsFile/PgsFile.py CHANGED
@@ -5,7 +5,7 @@ Created on Wed Apr 28 23:44:04 2021
5
5
  @author: Petercusin
6
6
  """
7
7
 
8
- import os, sys
8
+ import os, sys, re
9
9
  def check_contain_chinese(check_str):
10
10
  for ch in check_str:
11
11
  if u'\u4e00' <=ch<= u'\u9fff':
@@ -1774,7 +1774,8 @@ def remove_empty_last_line(folder_path):
1774
1774
  f2.close()
1775
1775
  print(end_empty_files,str(len(end_empty_files))+" files found with last line empty!")
1776
1776
 
1777
- def find_txt_files_with_keyword(root_folder, keyword, case_sensitive=None):
1777
+
1778
+ def find_txt_files_with_keyword(root_folder, keyword, case_sensitive=False):
1778
1779
  """
1779
1780
  Find all .txt files whose names contain the specified keyword in a multi-level folder structure.
1780
1781
 
@@ -1784,21 +1785,22 @@ def find_txt_files_with_keyword(root_folder, keyword, case_sensitive=None):
1784
1785
 
1785
1786
  Returns:
1786
1787
  A list of file paths that match the search criteria.
1787
- """
1788
- import fnmatch,os
1789
- matches=[]
1790
- if case_sensitive is None or case_sensitive==False:
1791
- for root, dirnames, filenames in os.walk(root_folder):
1792
- for filename in fnmatch.filter(filenames, '*' + keyword + '*.txt'):
1793
- matches.append(os.path.join(root, filename))
1794
- elif case_sensitive==True:
1795
- import re
1796
- pattern=re.compile(r'.*' + re.escape(keyword) + r'.*\.txt')
1797
- for root, dirnames, filenames in os.walk(root_folder):
1788
+ """
1789
+ import fnmatch
1790
+ matches = []
1791
+ if not case_sensitive:
1792
+ keyword_lower = keyword.lower()
1793
+ for root, _, filenames in os.walk(root_folder):
1794
+ for filename in filenames:
1795
+ if filename.lower().endswith(".txt") and keyword_lower in filename.lower():
1796
+ matches.append(os.path.join(root, filename))
1797
+ else:
1798
+ pattern = re.compile(r'.*' + re.escape(keyword) + r'.*\.txt')
1799
+ for root, _, filenames in os.walk(root_folder):
1798
1800
  for filename in filenames:
1799
1801
  if pattern.match(filename):
1800
1802
  matches.append(os.path.join(root, filename))
1801
- return matches
1803
+ return matches
1802
1804
 
1803
1805
  import fnmatch
1804
1806
  def find_user_files_in_upper_folder(directory, user_file_name):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PgsFile
3
- Version: 0.5.6
3
+ Version: 0.5.7
4
4
  Summary: This module streamlines Python package management, script execution, file handling, web scraping, and multimedia downloads. It supports LLM-based NLP tasks like OCR, tokenization, lemmatization, POS tagging, NER, ATE, dependency parsing, MDD, WSD, LIWC, MIP analysis, text classification, and Chinese-English sentence alignment. Additionally, it generates word lists and data visualizations, making it a practical tool for data scraping and analysis—ideal for literary students and researchers.
5
5
  Home-page: https://github.com/Petercusin/PgsFile
6
6
  Author: Pan Guisheng
@@ -1,4 +1,4 @@
1
- PgsFile/PgsFile.py,sha256=OGbj8eYkXdAyWs2Or7TtHYrBgCiaeT1wwxcNUvLMUnA,180569
1
+ PgsFile/PgsFile.py,sha256=BiXU3odbUt8m9nq8yCT662l8tzM3W--drzLcyYacjCc,180591
2
2
  PgsFile/__init__.py,sha256=4AMmYVzDAr2Zy1r8WuN32lqgCxPaERTu5g6lnax4B9o,3840
3
3
  PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
4
4
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
@@ -2596,8 +2596,8 @@ PgsFile/models/prompts/6. ATE3 prompt.txt,sha256=VnaXpPa6BgZHUcm8PxmP_qgU-8xEoTB
2596
2596
  PgsFile/models/prompts/7. SentAlign prompt.txt,sha256=hXpqqC-CAgo8EytkJ0MaLhevLefALazWriY-ew39jxs,1537
2597
2597
  PgsFile/models/prompts/8. TitleCase prompt.txt,sha256=4p-LfGy0xAj2uPi9amyMm41T6Z17VNpFFsGZOgWhROs,1136
2598
2598
  PgsFile/models/prompts/9. TextClassification prompt.txt,sha256=JhQJu3rQSstNtkIkxPR1K-QmH9sGBEhbVKHAi7ItMUA,1066
2599
- pgsfile-0.5.6.dist-info/licenses/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2600
- pgsfile-0.5.6.dist-info/METADATA,sha256=mpdG5cKeYpf0jkEc0yKcRi-j_pLnL2s_52skr0SI2u0,3399
2601
- pgsfile-0.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2602
- pgsfile-0.5.6.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2603
- pgsfile-0.5.6.dist-info/RECORD,,
2599
+ pgsfile-0.5.7.dist-info/licenses/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2600
+ pgsfile-0.5.7.dist-info/METADATA,sha256=8DzYHkM73KVmx7rC7bZ5kCa4eGarABRZ1UszbuhqA_A,3399
2601
+ pgsfile-0.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2602
+ pgsfile-0.5.7.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2603
+ pgsfile-0.5.7.dist-info/RECORD,,