PgsFile 0.5.6__py3-none-any.whl → 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +16 -14
- PgsFile/models/prompts/6. ATE3 prompt.txt +4 -6
- {pgsfile-0.5.6.dist-info → pgsfile-0.5.8.dist-info}/METADATA +1 -1
- {pgsfile-0.5.6.dist-info → pgsfile-0.5.8.dist-info}/RECORD +7 -7
- {pgsfile-0.5.6.dist-info → pgsfile-0.5.8.dist-info}/WHEEL +0 -0
- {pgsfile-0.5.6.dist-info → pgsfile-0.5.8.dist-info}/licenses/LICENSE +0 -0
- {pgsfile-0.5.6.dist-info → pgsfile-0.5.8.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -5,7 +5,7 @@ Created on Wed Apr 28 23:44:04 2021
|
|
|
5
5
|
@author: Petercusin
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import os, sys
|
|
8
|
+
import os, sys, re
|
|
9
9
|
def check_contain_chinese(check_str):
|
|
10
10
|
for ch in check_str:
|
|
11
11
|
if u'\u4e00' <=ch<= u'\u9fff':
|
|
@@ -1774,7 +1774,8 @@ def remove_empty_last_line(folder_path):
|
|
|
1774
1774
|
f2.close()
|
|
1775
1775
|
print(end_empty_files,str(len(end_empty_files))+" files found with last line empty!")
|
|
1776
1776
|
|
|
1777
|
-
|
|
1777
|
+
|
|
1778
|
+
def find_txt_files_with_keyword(root_folder, keyword, case_sensitive=False):
|
|
1778
1779
|
"""
|
|
1779
1780
|
Find all .txt files whose names contain the specified keyword in a multi-level folder structure.
|
|
1780
1781
|
|
|
@@ -1784,21 +1785,22 @@ def find_txt_files_with_keyword(root_folder, keyword, case_sensitive=None):
|
|
|
1784
1785
|
|
|
1785
1786
|
Returns:
|
|
1786
1787
|
A list of file paths that match the search criteria.
|
|
1787
|
-
"""
|
|
1788
|
-
import fnmatch
|
|
1789
|
-
matches=[]
|
|
1790
|
-
if
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1788
|
+
"""
|
|
1789
|
+
import fnmatch
|
|
1790
|
+
matches = []
|
|
1791
|
+
if not case_sensitive:
|
|
1792
|
+
keyword_lower = keyword.lower()
|
|
1793
|
+
for root, _, filenames in os.walk(root_folder):
|
|
1794
|
+
for filename in filenames:
|
|
1795
|
+
if filename.lower().endswith(".txt") and keyword_lower in filename.lower():
|
|
1796
|
+
matches.append(os.path.join(root, filename))
|
|
1797
|
+
else:
|
|
1798
|
+
pattern = re.compile(r'.*' + re.escape(keyword) + r'.*\.txt')
|
|
1799
|
+
for root, _, filenames in os.walk(root_folder):
|
|
1798
1800
|
for filename in filenames:
|
|
1799
1801
|
if pattern.match(filename):
|
|
1800
1802
|
matches.append(os.path.join(root, filename))
|
|
1801
|
-
return matches
|
|
1803
|
+
return matches
|
|
1802
1804
|
|
|
1803
1805
|
import fnmatch
|
|
1804
1806
|
def find_user_files_in_upper_folder(directory, user_file_name):
|
|
@@ -3,8 +3,8 @@ Extract terms from the input text and classify each term into one of the followi
|
|
|
3
3
|
• Common_Term: Domain-specific but not lexicon-specific (known by laypersons, still relevant to the domain)
|
|
4
4
|
• OOD_Term: Lexicon-specific but not domain-specific (known mainly by experts, but not relevant to the domain)
|
|
5
5
|
|
|
6
|
-
Terms should not include named entities.
|
|
7
|
-
Output Format (a list of python-list): [["2-vessel cad", "Specific_Term"], ["aortic valve", "Common_Term"], ["p-value", "OOD_Term"]]
|
|
6
|
+
Terms offered should not include named entities.
|
|
7
|
+
Return Output Format (a list of python-list): [["2-vessel cad", "Specific_Term"], ["aortic valve", "Common_Term"], ["p-value", "OOD_Term"]]
|
|
8
8
|
If no terms are presented, keep it an empty list: []
|
|
9
9
|
Do not include any explanations or additional text in the output.
|
|
10
10
|
|
|
@@ -13,7 +13,5 @@ Examples (in the domain of heart failure):
|
|
|
13
13
|
• "heart" → "Common_Term": relevant to the domain and understandable by the general public.
|
|
14
14
|
• "p-value" → "OOD_Term": expert-level term, but not domain-specific to heart failure.
|
|
15
15
|
|
|
16
|
-
Now, please extract the terms from the following sentence:
|
|
17
|
-
|
|
18
|
-
Sentence: "{sentence}"
|
|
19
|
-
Output: [[]]
|
|
16
|
+
Now, please extract the terms from the following sentence: "{sentence}"
|
|
17
|
+
Output: [["list of terms present"]]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.8
|
|
4
4
|
Summary: This module streamlines Python package management, script execution, file handling, web scraping, and multimedia downloads. It supports LLM-based NLP tasks like OCR, tokenization, lemmatization, POS tagging, NER, ATE, dependency parsing, MDD, WSD, LIWC, MIP analysis, text classification, and Chinese-English sentence alignment. Additionally, it generates word lists and data visualizations, making it a practical tool for data scraping and analysis—ideal for literary students and researchers.
|
|
5
5
|
Home-page: https://github.com/Petercusin/PgsFile
|
|
6
6
|
Author: Pan Guisheng
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
1
|
+
PgsFile/PgsFile.py,sha256=BiXU3odbUt8m9nq8yCT662l8tzM3W--drzLcyYacjCc,180591
|
|
2
2
|
PgsFile/__init__.py,sha256=4AMmYVzDAr2Zy1r8WuN32lqgCxPaERTu5g6lnax4B9o,3840
|
|
3
3
|
PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
|
|
4
4
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
|
|
@@ -2592,12 +2592,12 @@ PgsFile/models/prompts/2. WSD prompt.txt,sha256=o-ZFtCRUCDrXgm040WTQch9v2Y_r2SIl
|
|
|
2592
2592
|
PgsFile/models/prompts/3. ICTCLAS prompt.txt,sha256=VFn6N_JViAbyy9NazA8gjX6SGo5mgBcZOf95aC9JB84,592
|
|
2593
2593
|
PgsFile/models/prompts/4. OCR prompt.txt,sha256=YxUQ2IlE52k0fcBnGsuOHqWAmfiEmIu6iRz5zecQ8dk,260
|
|
2594
2594
|
PgsFile/models/prompts/5. ATE prompt.txt,sha256=5wu0gGlsV7DI0LruYM3-uAC6brppyYD0IoiFVjMqm5Y,1553
|
|
2595
|
-
PgsFile/models/prompts/6. ATE3 prompt.txt,sha256=
|
|
2595
|
+
PgsFile/models/prompts/6. ATE3 prompt.txt,sha256=xn5HRkzIkrbdUP2I-huTd0KpuiQdRYFZZO02yJiOguM,1279
|
|
2596
2596
|
PgsFile/models/prompts/7. SentAlign prompt.txt,sha256=hXpqqC-CAgo8EytkJ0MaLhevLefALazWriY-ew39jxs,1537
|
|
2597
2597
|
PgsFile/models/prompts/8. TitleCase prompt.txt,sha256=4p-LfGy0xAj2uPi9amyMm41T6Z17VNpFFsGZOgWhROs,1136
|
|
2598
2598
|
PgsFile/models/prompts/9. TextClassification prompt.txt,sha256=JhQJu3rQSstNtkIkxPR1K-QmH9sGBEhbVKHAi7ItMUA,1066
|
|
2599
|
-
pgsfile-0.5.
|
|
2600
|
-
pgsfile-0.5.
|
|
2601
|
-
pgsfile-0.5.
|
|
2602
|
-
pgsfile-0.5.
|
|
2603
|
-
pgsfile-0.5.
|
|
2599
|
+
pgsfile-0.5.8.dist-info/licenses/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2600
|
+
pgsfile-0.5.8.dist-info/METADATA,sha256=a3zd1hCPn2eQv49UsxmCPDpg6ADBVnl4sNCzfcd-Ba0,3399
|
|
2601
|
+
pgsfile-0.5.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2602
|
+
pgsfile-0.5.8.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2603
|
+
pgsfile-0.5.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|