PgsFile 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +261 -0
- PgsFile/__init__.py +3 -2
- {PgsFile-0.3.5.dist-info → PgsFile-0.3.6.dist-info}/METADATA +1 -1
- {PgsFile-0.3.5.dist-info → PgsFile-0.3.6.dist-info}/RECORD +7 -7
- {PgsFile-0.3.5.dist-info → PgsFile-0.3.6.dist-info}/LICENSE +0 -0
- {PgsFile-0.3.5.dist-info → PgsFile-0.3.6.dist-info}/WHEEL +0 -0
- {PgsFile-0.3.5.dist-info → PgsFile-0.3.6.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -1237,6 +1237,259 @@ claws_c7_tags = {
|
|
|
1237
1237
|
}
|
|
1238
1238
|
}
|
|
1239
1239
|
|
|
1240
|
+
spacy_pos_tags = {
|
|
1241
|
+
"$": {
|
|
1242
|
+
"description": "Dollar sign",
|
|
1243
|
+
"chinese_translation": "美元符号",
|
|
1244
|
+
"examples": ["$"]
|
|
1245
|
+
},
|
|
1246
|
+
"''": {
|
|
1247
|
+
"description": "Closing quotation mark",
|
|
1248
|
+
"chinese_translation": "闭合引号",
|
|
1249
|
+
"examples": ["'"]
|
|
1250
|
+
},
|
|
1251
|
+
",": {
|
|
1252
|
+
"description": "Comma",
|
|
1253
|
+
"chinese_translation": "逗号",
|
|
1254
|
+
"examples": [","]
|
|
1255
|
+
},
|
|
1256
|
+
"-LRB-": {
|
|
1257
|
+
"description": "Left round bracket (i.e., '(')",
|
|
1258
|
+
"chinese_translation": "左圆括号",
|
|
1259
|
+
"examples": ["("]
|
|
1260
|
+
},
|
|
1261
|
+
"-RRB-": {
|
|
1262
|
+
"description": "Right round bracket (i.e., ')')",
|
|
1263
|
+
"chinese_translation": "右圆括号",
|
|
1264
|
+
"examples": [")"]
|
|
1265
|
+
},
|
|
1266
|
+
".": {
|
|
1267
|
+
"description": "Sentence-final punctuation",
|
|
1268
|
+
"chinese_translation": "句末标点",
|
|
1269
|
+
"examples": ["."]
|
|
1270
|
+
},
|
|
1271
|
+
":": {
|
|
1272
|
+
"description": "Colon, semi-colon, or dash",
|
|
1273
|
+
"chinese_translation": "冒号、分号或破折号",
|
|
1274
|
+
"examples": [":", ";", "-"]
|
|
1275
|
+
},
|
|
1276
|
+
"ADD": {
|
|
1277
|
+
"description": "Email address",
|
|
1278
|
+
"chinese_translation": "电子邮件地址",
|
|
1279
|
+
"examples": ["example@example.com"]
|
|
1280
|
+
},
|
|
1281
|
+
"AFX": {
|
|
1282
|
+
"description": "Affix",
|
|
1283
|
+
"chinese_translation": "词缀",
|
|
1284
|
+
"examples": ["un-", "re-", "-ing"]
|
|
1285
|
+
},
|
|
1286
|
+
"CC": {
|
|
1287
|
+
"description": "Coordinating conjunction",
|
|
1288
|
+
"chinese_translation": "并列连词",
|
|
1289
|
+
"examples": ["and", "but", "or"]
|
|
1290
|
+
},
|
|
1291
|
+
"CD": {
|
|
1292
|
+
"description": "Cardinal number",
|
|
1293
|
+
"chinese_translation": "基数",
|
|
1294
|
+
"examples": ["one", "two", "three"]
|
|
1295
|
+
},
|
|
1296
|
+
"DT": {
|
|
1297
|
+
"description": "Determiner",
|
|
1298
|
+
"chinese_translation": "限定词",
|
|
1299
|
+
"examples": ["the", "a", "an"]
|
|
1300
|
+
},
|
|
1301
|
+
"EX": {
|
|
1302
|
+
"description": "Existential 'there'",
|
|
1303
|
+
"chinese_translation": "存在句中的there",
|
|
1304
|
+
"examples": ["there"]
|
|
1305
|
+
},
|
|
1306
|
+
"FW": {
|
|
1307
|
+
"description": "Foreign word",
|
|
1308
|
+
"chinese_translation": "外来词",
|
|
1309
|
+
"examples": ["rendezvous", "schadenfreude"]
|
|
1310
|
+
},
|
|
1311
|
+
"HYPH": {
|
|
1312
|
+
"description": "Hyphen",
|
|
1313
|
+
"chinese_translation": "连字符",
|
|
1314
|
+
"examples": ["-"]
|
|
1315
|
+
},
|
|
1316
|
+
"IN": {
|
|
1317
|
+
"description": "Preposition or subordinating conjunction",
|
|
1318
|
+
"chinese_translation": "介词或从属连词",
|
|
1319
|
+
"examples": ["in", "on", "at", "if", "because"]
|
|
1320
|
+
},
|
|
1321
|
+
"JJ": {
|
|
1322
|
+
"description": "Adjective",
|
|
1323
|
+
"chinese_translation": "形容词",
|
|
1324
|
+
"examples": ["happy", "sad", "big"]
|
|
1325
|
+
},
|
|
1326
|
+
"JJR": {
|
|
1327
|
+
"description": "Adjective, comparative",
|
|
1328
|
+
"chinese_translation": "形容词比较级",
|
|
1329
|
+
"examples": ["happier", "sadder", "bigger"]
|
|
1330
|
+
},
|
|
1331
|
+
"JJS": {
|
|
1332
|
+
"description": "Adjective, superlative",
|
|
1333
|
+
"chinese_translation": "形容词最高级",
|
|
1334
|
+
"examples": ["happiest", "saddest", "biggest"]
|
|
1335
|
+
},
|
|
1336
|
+
"LS": {
|
|
1337
|
+
"description": "List item marker",
|
|
1338
|
+
"chinese_translation": "列表项标记",
|
|
1339
|
+
"examples": ["1.", "2.", "3."]
|
|
1340
|
+
},
|
|
1341
|
+
"MD": {
|
|
1342
|
+
"description": "Modal",
|
|
1343
|
+
"chinese_translation": "情态动词",
|
|
1344
|
+
"examples": ["can", "could", "may"]
|
|
1345
|
+
},
|
|
1346
|
+
"NFP": {
|
|
1347
|
+
"description": "Superfluous punctuation",
|
|
1348
|
+
"chinese_translation": "多余的标点符号",
|
|
1349
|
+
"examples": ["..."]
|
|
1350
|
+
},
|
|
1351
|
+
"NN": {
|
|
1352
|
+
"description": "Noun, singular or mass",
|
|
1353
|
+
"chinese_translation": "单数或质量名词",
|
|
1354
|
+
"examples": ["cat", "water", "sand"]
|
|
1355
|
+
},
|
|
1356
|
+
"NNP": {
|
|
1357
|
+
"description": "Proper noun, singular",
|
|
1358
|
+
"chinese_translation": "单数专有名词",
|
|
1359
|
+
"examples": ["John", "London", "Everest"]
|
|
1360
|
+
},
|
|
1361
|
+
"NNPS": {
|
|
1362
|
+
"description": "Proper noun, plural",
|
|
1363
|
+
"chinese_translation": "复数专有名词",
|
|
1364
|
+
"examples": ["Smiths", "Alps"]
|
|
1365
|
+
},
|
|
1366
|
+
"NNS": {
|
|
1367
|
+
"description": "Noun, plural",
|
|
1368
|
+
"chinese_translation": "复数名词",
|
|
1369
|
+
"examples": ["cats", "dogs", "houses"]
|
|
1370
|
+
},
|
|
1371
|
+
"PDT": {
|
|
1372
|
+
"description": "Predeterminer",
|
|
1373
|
+
"chinese_translation": "前位限定词",
|
|
1374
|
+
"examples": ["all", "both", "half"]
|
|
1375
|
+
},
|
|
1376
|
+
"POS": {
|
|
1377
|
+
"description": "Possessive ending",
|
|
1378
|
+
"chinese_translation": "所有格结尾",
|
|
1379
|
+
"examples": ["'s"]
|
|
1380
|
+
},
|
|
1381
|
+
"PRP": {
|
|
1382
|
+
"description": "Personal pronoun",
|
|
1383
|
+
"chinese_translation": "人称代词",
|
|
1384
|
+
"examples": ["I", "you", "he"]
|
|
1385
|
+
},
|
|
1386
|
+
"PRP$": {
|
|
1387
|
+
"description": "Possessive pronoun",
|
|
1388
|
+
"chinese_translation": "所有格代词",
|
|
1389
|
+
"examples": ["my", "your", "his"]
|
|
1390
|
+
},
|
|
1391
|
+
"RB": {
|
|
1392
|
+
"description": "Adverb",
|
|
1393
|
+
"chinese_translation": "副词",
|
|
1394
|
+
"examples": ["quickly", "happily", "sadly"]
|
|
1395
|
+
},
|
|
1396
|
+
"RBR": {
|
|
1397
|
+
"description": "Adverb, comparative",
|
|
1398
|
+
"chinese_translation": "副词比较级",
|
|
1399
|
+
"examples": ["faster", "happier", "more quickly"]
|
|
1400
|
+
},
|
|
1401
|
+
"RBS": {
|
|
1402
|
+
"description": "Adverb, superlative",
|
|
1403
|
+
"chinese_translation": "副词最高级",
|
|
1404
|
+
"examples": ["fastest", "happiest", "most quickly"]
|
|
1405
|
+
},
|
|
1406
|
+
"RP": {
|
|
1407
|
+
"description": "Particle",
|
|
1408
|
+
"chinese_translation": "小品词",
|
|
1409
|
+
"examples": ["up", "down", "off"]
|
|
1410
|
+
},
|
|
1411
|
+
"SYM": {
|
|
1412
|
+
"description": "Symbol",
|
|
1413
|
+
"chinese_translation": "符号",
|
|
1414
|
+
"examples": ["+", "=", "<"]
|
|
1415
|
+
},
|
|
1416
|
+
"TO": {
|
|
1417
|
+
"description": "'to'",
|
|
1418
|
+
"chinese_translation": "'to'",
|
|
1419
|
+
"examples": ["to"]
|
|
1420
|
+
},
|
|
1421
|
+
"UH": {
|
|
1422
|
+
"description": "Interjection",
|
|
1423
|
+
"chinese_translation": "感叹词",
|
|
1424
|
+
"examples": ["oh", "ah", "wow"]
|
|
1425
|
+
},
|
|
1426
|
+
"VB": {
|
|
1427
|
+
"description": "Verb, base form",
|
|
1428
|
+
"chinese_translation": "动词原形",
|
|
1429
|
+
"examples": ["run", "jump", "eat"]
|
|
1430
|
+
},
|
|
1431
|
+
"VBD": {
|
|
1432
|
+
"description": "Verb, past tense",
|
|
1433
|
+
"chinese_translation": "动词过去式",
|
|
1434
|
+
"examples": ["ran", "jumped", "ate"]
|
|
1435
|
+
},
|
|
1436
|
+
"VBG": {
|
|
1437
|
+
"description": "Verb, gerund or present participle",
|
|
1438
|
+
"chinese_translation": "动词动名词或现在分词",
|
|
1439
|
+
"examples": ["running", "jumping", "eating"]
|
|
1440
|
+
},
|
|
1441
|
+
"VBN": {
|
|
1442
|
+
"description": "Verb, past participle",
|
|
1443
|
+
"chinese_translation": "动词过去分词",
|
|
1444
|
+
"examples": ["run", "jumped", "eaten"]
|
|
1445
|
+
},
|
|
1446
|
+
"VBP": {
|
|
1447
|
+
"description": "Verb, non-3rd person singular present",
|
|
1448
|
+
"chinese_translation": "动词非第三人称单数现在式",
|
|
1449
|
+
"examples": ["run", "jump", "eat"]
|
|
1450
|
+
},
|
|
1451
|
+
"VBZ": {
|
|
1452
|
+
"description": "Verb, 3rd person singular present",
|
|
1453
|
+
"chinese_translation": "动词第三人称单数现在式",
|
|
1454
|
+
"examples": ["runs", "jumps", "eats"]
|
|
1455
|
+
},
|
|
1456
|
+
"WDT": {
|
|
1457
|
+
"description": "Wh-determiner",
|
|
1458
|
+
"chinese_translation": "Wh限定词",
|
|
1459
|
+
"examples": ["which", "that", "what"]
|
|
1460
|
+
},
|
|
1461
|
+
"WP": {
|
|
1462
|
+
"description": "Wh-pronoun",
|
|
1463
|
+
"chinese_translation": "Wh代词",
|
|
1464
|
+
"examples": ["who", "whom", "what"]
|
|
1465
|
+
},
|
|
1466
|
+
"WP$": {
|
|
1467
|
+
"description": "Possessive wh-pronoun",
|
|
1468
|
+
"chinese_translation": "所有格Wh代词",
|
|
1469
|
+
"examples": ["whose"]
|
|
1470
|
+
},
|
|
1471
|
+
"WRB": {
|
|
1472
|
+
"description": "Wh-adverb",
|
|
1473
|
+
"chinese_translation": "Wh副词",
|
|
1474
|
+
"examples": ["where", "when", "why"]
|
|
1475
|
+
},
|
|
1476
|
+
"XX": {
|
|
1477
|
+
"description": "Unknown",
|
|
1478
|
+
"chinese_translation": "未知",
|
|
1479
|
+
"examples": []
|
|
1480
|
+
},
|
|
1481
|
+
"_SP": {
|
|
1482
|
+
"description": "Space",
|
|
1483
|
+
"chinese_translation": "空格",
|
|
1484
|
+
"examples": [" "]
|
|
1485
|
+
},
|
|
1486
|
+
"``": {
|
|
1487
|
+
"description": "Opening quotation mark",
|
|
1488
|
+
"chinese_translation": "开放引号",
|
|
1489
|
+
"examples": ["`"]
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1240
1493
|
|
|
1241
1494
|
def word_list(split_words):
|
|
1242
1495
|
"""
|
|
@@ -1359,6 +1612,14 @@ def remove_empty_folders(folder_path):
|
|
|
1359
1612
|
print(delet_root)
|
|
1360
1613
|
print("Folders removed: ",len(delet_root))
|
|
1361
1614
|
|
|
1615
|
+
def remove_file(file_path):
|
|
1616
|
+
import os
|
|
1617
|
+
if os.path.exists(file_path):
|
|
1618
|
+
os.remove(file_path)
|
|
1619
|
+
print(f'{file_path} removed!')
|
|
1620
|
+
else:
|
|
1621
|
+
print(f"{file_path} doesn't exist")
|
|
1622
|
+
|
|
1362
1623
|
def concatenate_excel_files(directory_path, output_file):
|
|
1363
1624
|
# List to hold DataFrames
|
|
1364
1625
|
dataframes = []
|
PgsFile/__init__.py
CHANGED
|
@@ -25,7 +25,8 @@ from .PgsFile import FilePath, FileName, DirList
|
|
|
25
25
|
from .PgsFile import get_subfolder_path, get_full_path
|
|
26
26
|
from .PgsFile import makedirec, makefile
|
|
27
27
|
from .PgsFile import source_path, next_folder_names, get_directory_tree_with_meta, find_txt_files_with_keyword
|
|
28
|
-
from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line
|
|
28
|
+
from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line
|
|
29
|
+
from .PgsFile import move_file, copy_file, remove_file
|
|
29
30
|
from .PgsFile import concatenate_excel_files
|
|
30
31
|
from .PgsFile import set_permanent_environment_variable
|
|
31
32
|
from .PgsFile import delete_permanent_environment_variable
|
|
@@ -35,7 +36,7 @@ from .PgsFile import get_system_info
|
|
|
35
36
|
# 6. Data cleaning
|
|
36
37
|
from .PgsFile import BigPunctuation, StopTags, Special, yhd
|
|
37
38
|
from .PgsFile import ZhStopWords, EnPunctuation, get_stopwords, get_CET_dics, get_BNC_dic
|
|
38
|
-
from .PgsFile import nltk_en_tags, nltk_tag_mapping, thulac_tags, ICTCLAS2008, LangCodes, pgs_abbres_words, usua_tag_set, claws_c7_tags
|
|
39
|
+
from .PgsFile import nltk_en_tags, nltk_tag_mapping, thulac_tags, ICTCLAS2008, LangCodes, pgs_abbres_words, usua_tag_set, claws_c7_tags, spacy_pos_tags
|
|
39
40
|
from .PgsFile import check_contain_chinese, check_contain_number
|
|
40
41
|
from .PgsFile import replace_chinese_punctuation_with_english
|
|
41
42
|
from .PgsFile import replace_english_punctuation_with_chinese
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.6
|
|
4
4
|
Summary: This module simplifies Python package management, script execution, file handling, web scraping, and multimedia downloads. The module supports LLM-based NLP tasks such as tokenization, lemmatization, POS tagging, NER, dependency parsing, MDD, WSD, and MIP analysis. It also generates word lists and plots data, aiding literary students. Ideal for scraping data, cleaning text, and analyzing language, it offers user-friendly tools to streamline workflows.
|
|
5
5
|
Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
|
|
6
6
|
Author: Pan Guisheng
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
2
|
-
PgsFile/__init__.py,sha256=
|
|
1
|
+
PgsFile/PgsFile.py,sha256=52Uxj1gii1F1J9rvWnR1cFffIumeqBDsJRaN_uLoZUg,149704
|
|
2
|
+
PgsFile/__init__.py,sha256=9vTeHtnxXaf_Qo36pes9o5_MU_M5M7MSMUddDWvkoDA,3408
|
|
3
3
|
PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
|
|
4
4
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
|
|
5
5
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
|
|
@@ -2585,8 +2585,8 @@ PgsFile/models/fonts/博洋行书3500.TTF,sha256=VrgeHr8cgOL6JD05QyuD9ZSyw4J2aIV
|
|
|
2585
2585
|
PgsFile/models/fonts/陆柬之行书字体.ttf,sha256=Zpd4Z7E9w-Qy74yklXHk4vM7HOtHuQgllvygxZZ1Hvs,1247288
|
|
2586
2586
|
PgsFile/models/prompts/1. MIP prompt.txt,sha256=4lHlHmleayRytqr1n9jtt6vn1rQvyf4BKeThpbwI8o8,1638
|
|
2587
2587
|
PgsFile/models/prompts/2. WSD prompt.txt,sha256=o-ZFtCRUCDrXgm040WTQch9v2Y_r2SIlrZaquilJjgQ,2348
|
|
2588
|
-
PgsFile-0.3.
|
|
2589
|
-
PgsFile-0.3.
|
|
2590
|
-
PgsFile-0.3.
|
|
2591
|
-
PgsFile-0.3.
|
|
2592
|
-
PgsFile-0.3.
|
|
2588
|
+
PgsFile-0.3.6.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2589
|
+
PgsFile-0.3.6.dist-info/METADATA,sha256=HkRDJ8CCZoGrkKIuwlSpju61tF5bdn-2Hbrwt4B6zd8,2892
|
|
2590
|
+
PgsFile-0.3.6.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
2591
|
+
PgsFile-0.3.6.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2592
|
+
PgsFile-0.3.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|