PgsFile 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PgsFile might be problematic. Click here for more details.

PgsFile/PgsFile.py CHANGED
@@ -1237,6 +1237,259 @@ claws_c7_tags = {
1237
1237
  }
1238
1238
  }
1239
1239
 
1240
+ spacy_pos_tags = {
1241
+ "$": {
1242
+ "description": "Dollar sign",
1243
+ "chinese_translation": "美元符号",
1244
+ "examples": ["$"]
1245
+ },
1246
+ "''": {
1247
+ "description": "Closing quotation mark",
1248
+ "chinese_translation": "闭合引号",
1249
+ "examples": ["'"]
1250
+ },
1251
+ ",": {
1252
+ "description": "Comma",
1253
+ "chinese_translation": "逗号",
1254
+ "examples": [","]
1255
+ },
1256
+ "-LRB-": {
1257
+ "description": "Left round bracket (i.e., '(')",
1258
+ "chinese_translation": "左圆括号",
1259
+ "examples": ["("]
1260
+ },
1261
+ "-RRB-": {
1262
+ "description": "Right round bracket (i.e., ')')",
1263
+ "chinese_translation": "右圆括号",
1264
+ "examples": [")"]
1265
+ },
1266
+ ".": {
1267
+ "description": "Sentence-final punctuation",
1268
+ "chinese_translation": "句末标点",
1269
+ "examples": ["."]
1270
+ },
1271
+ ":": {
1272
+ "description": "Colon, semi-colon, or dash",
1273
+ "chinese_translation": "冒号、分号或破折号",
1274
+ "examples": [":", ";", "-"]
1275
+ },
1276
+ "ADD": {
1277
+ "description": "Email address",
1278
+ "chinese_translation": "电子邮件地址",
1279
+ "examples": ["example@example.com"]
1280
+ },
1281
+ "AFX": {
1282
+ "description": "Affix",
1283
+ "chinese_translation": "词缀",
1284
+ "examples": ["un-", "re-", "-ing"]
1285
+ },
1286
+ "CC": {
1287
+ "description": "Coordinating conjunction",
1288
+ "chinese_translation": "并列连词",
1289
+ "examples": ["and", "but", "or"]
1290
+ },
1291
+ "CD": {
1292
+ "description": "Cardinal number",
1293
+ "chinese_translation": "基数",
1294
+ "examples": ["one", "two", "three"]
1295
+ },
1296
+ "DT": {
1297
+ "description": "Determiner",
1298
+ "chinese_translation": "限定词",
1299
+ "examples": ["the", "a", "an"]
1300
+ },
1301
+ "EX": {
1302
+ "description": "Existential 'there'",
1303
+ "chinese_translation": "存在句中的there",
1304
+ "examples": ["there"]
1305
+ },
1306
+ "FW": {
1307
+ "description": "Foreign word",
1308
+ "chinese_translation": "外来词",
1309
+ "examples": ["rendezvous", "schadenfreude"]
1310
+ },
1311
+ "HYPH": {
1312
+ "description": "Hyphen",
1313
+ "chinese_translation": "连字符",
1314
+ "examples": ["-"]
1315
+ },
1316
+ "IN": {
1317
+ "description": "Preposition or subordinating conjunction",
1318
+ "chinese_translation": "介词或从属连词",
1319
+ "examples": ["in", "on", "at", "if", "because"]
1320
+ },
1321
+ "JJ": {
1322
+ "description": "Adjective",
1323
+ "chinese_translation": "形容词",
1324
+ "examples": ["happy", "sad", "big"]
1325
+ },
1326
+ "JJR": {
1327
+ "description": "Adjective, comparative",
1328
+ "chinese_translation": "形容词比较级",
1329
+ "examples": ["happier", "sadder", "bigger"]
1330
+ },
1331
+ "JJS": {
1332
+ "description": "Adjective, superlative",
1333
+ "chinese_translation": "形容词最高级",
1334
+ "examples": ["happiest", "saddest", "biggest"]
1335
+ },
1336
+ "LS": {
1337
+ "description": "List item marker",
1338
+ "chinese_translation": "列表项标记",
1339
+ "examples": ["1.", "2.", "3."]
1340
+ },
1341
+ "MD": {
1342
+ "description": "Modal",
1343
+ "chinese_translation": "情态动词",
1344
+ "examples": ["can", "could", "may"]
1345
+ },
1346
+ "NFP": {
1347
+ "description": "Superfluous punctuation",
1348
+ "chinese_translation": "多余的标点符号",
1349
+ "examples": ["..."]
1350
+ },
1351
+ "NN": {
1352
+ "description": "Noun, singular or mass",
1353
+ "chinese_translation": "单数或质量名词",
1354
+ "examples": ["cat", "water", "sand"]
1355
+ },
1356
+ "NNP": {
1357
+ "description": "Proper noun, singular",
1358
+ "chinese_translation": "单数专有名词",
1359
+ "examples": ["John", "London", "Everest"]
1360
+ },
1361
+ "NNPS": {
1362
+ "description": "Proper noun, plural",
1363
+ "chinese_translation": "复数专有名词",
1364
+ "examples": ["Smiths", "Alps"]
1365
+ },
1366
+ "NNS": {
1367
+ "description": "Noun, plural",
1368
+ "chinese_translation": "复数名词",
1369
+ "examples": ["cats", "dogs", "houses"]
1370
+ },
1371
+ "PDT": {
1372
+ "description": "Predeterminer",
1373
+ "chinese_translation": "前位限定词",
1374
+ "examples": ["all", "both", "half"]
1375
+ },
1376
+ "POS": {
1377
+ "description": "Possessive ending",
1378
+ "chinese_translation": "所有格结尾",
1379
+ "examples": ["'s"]
1380
+ },
1381
+ "PRP": {
1382
+ "description": "Personal pronoun",
1383
+ "chinese_translation": "人称代词",
1384
+ "examples": ["I", "you", "he"]
1385
+ },
1386
+ "PRP$": {
1387
+ "description": "Possessive pronoun",
1388
+ "chinese_translation": "所有格代词",
1389
+ "examples": ["my", "your", "his"]
1390
+ },
1391
+ "RB": {
1392
+ "description": "Adverb",
1393
+ "chinese_translation": "副词",
1394
+ "examples": ["quickly", "happily", "sadly"]
1395
+ },
1396
+ "RBR": {
1397
+ "description": "Adverb, comparative",
1398
+ "chinese_translation": "副词比较级",
1399
+ "examples": ["faster", "happier", "more quickly"]
1400
+ },
1401
+ "RBS": {
1402
+ "description": "Adverb, superlative",
1403
+ "chinese_translation": "副词最高级",
1404
+ "examples": ["fastest", "happiest", "most quickly"]
1405
+ },
1406
+ "RP": {
1407
+ "description": "Particle",
1408
+ "chinese_translation": "小品词",
1409
+ "examples": ["up", "down", "off"]
1410
+ },
1411
+ "SYM": {
1412
+ "description": "Symbol",
1413
+ "chinese_translation": "符号",
1414
+ "examples": ["+", "=", "<"]
1415
+ },
1416
+ "TO": {
1417
+ "description": "'to'",
1418
+ "chinese_translation": "'to'",
1419
+ "examples": ["to"]
1420
+ },
1421
+ "UH": {
1422
+ "description": "Interjection",
1423
+ "chinese_translation": "感叹词",
1424
+ "examples": ["oh", "ah", "wow"]
1425
+ },
1426
+ "VB": {
1427
+ "description": "Verb, base form",
1428
+ "chinese_translation": "动词原形",
1429
+ "examples": ["run", "jump", "eat"]
1430
+ },
1431
+ "VBD": {
1432
+ "description": "Verb, past tense",
1433
+ "chinese_translation": "动词过去式",
1434
+ "examples": ["ran", "jumped", "ate"]
1435
+ },
1436
+ "VBG": {
1437
+ "description": "Verb, gerund or present participle",
1438
+ "chinese_translation": "动词动名词或现在分词",
1439
+ "examples": ["running", "jumping", "eating"]
1440
+ },
1441
+ "VBN": {
1442
+ "description": "Verb, past participle",
1443
+ "chinese_translation": "动词过去分词",
1444
+ "examples": ["run", "jumped", "eaten"]
1445
+ },
1446
+ "VBP": {
1447
+ "description": "Verb, non-3rd person singular present",
1448
+ "chinese_translation": "动词非第三人称单数现在式",
1449
+ "examples": ["run", "jump", "eat"]
1450
+ },
1451
+ "VBZ": {
1452
+ "description": "Verb, 3rd person singular present",
1453
+ "chinese_translation": "动词第三人称单数现在式",
1454
+ "examples": ["runs", "jumps", "eats"]
1455
+ },
1456
+ "WDT": {
1457
+ "description": "Wh-determiner",
1458
+ "chinese_translation": "Wh限定词",
1459
+ "examples": ["which", "that", "what"]
1460
+ },
1461
+ "WP": {
1462
+ "description": "Wh-pronoun",
1463
+ "chinese_translation": "Wh代词",
1464
+ "examples": ["who", "whom", "what"]
1465
+ },
1466
+ "WP$": {
1467
+ "description": "Possessive wh-pronoun",
1468
+ "chinese_translation": "所有格Wh代词",
1469
+ "examples": ["whose"]
1470
+ },
1471
+ "WRB": {
1472
+ "description": "Wh-adverb",
1473
+ "chinese_translation": "Wh副词",
1474
+ "examples": ["where", "when", "why"]
1475
+ },
1476
+ "XX": {
1477
+ "description": "Unknown",
1478
+ "chinese_translation": "未知",
1479
+ "examples": []
1480
+ },
1481
+ "_SP": {
1482
+ "description": "Space",
1483
+ "chinese_translation": "空格",
1484
+ "examples": [" "]
1485
+ },
1486
+ "``": {
1487
+ "description": "Opening quotation mark",
1488
+ "chinese_translation": "开放引号",
1489
+ "examples": ["`"]
1490
+ }
1491
+ }
1492
+
1240
1493
 
1241
1494
  def word_list(split_words):
1242
1495
  """
@@ -1359,6 +1612,14 @@ def remove_empty_folders(folder_path):
1359
1612
  print(delet_root)
1360
1613
  print("Folders removed: ",len(delet_root))
1361
1614
 
1615
+ def remove_file(file_path):
1616
+ import os
1617
+ if os.path.exists(file_path):
1618
+ os.remove(file_path)
1619
+ print(f'{file_path} removed!')
1620
+ else:
1621
+ print(f"{file_path} doesn't exist")
1622
+
1362
1623
  def concatenate_excel_files(directory_path, output_file):
1363
1624
  # List to hold DataFrames
1364
1625
  dataframes = []
PgsFile/__init__.py CHANGED
@@ -25,7 +25,8 @@ from .PgsFile import FilePath, FileName, DirList
25
25
  from .PgsFile import get_subfolder_path, get_full_path
26
26
  from .PgsFile import makedirec, makefile
27
27
  from .PgsFile import source_path, next_folder_names, get_directory_tree_with_meta, find_txt_files_with_keyword
28
- from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line, move_file, copy_file
28
+ from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines, remove_empty_last_line
29
+ from .PgsFile import move_file, copy_file, remove_file
29
30
  from .PgsFile import concatenate_excel_files
30
31
  from .PgsFile import set_permanent_environment_variable
31
32
  from .PgsFile import delete_permanent_environment_variable
@@ -35,7 +36,7 @@ from .PgsFile import get_system_info
35
36
  # 6. Data cleaning
36
37
  from .PgsFile import BigPunctuation, StopTags, Special, yhd
37
38
  from .PgsFile import ZhStopWords, EnPunctuation, get_stopwords, get_CET_dics, get_BNC_dic
38
- from .PgsFile import nltk_en_tags, nltk_tag_mapping, thulac_tags, ICTCLAS2008, LangCodes, pgs_abbres_words, usua_tag_set, claws_c7_tags
39
+ from .PgsFile import nltk_en_tags, nltk_tag_mapping, thulac_tags, ICTCLAS2008, LangCodes, pgs_abbres_words, usua_tag_set, claws_c7_tags, spacy_pos_tags
39
40
  from .PgsFile import check_contain_chinese, check_contain_number
40
41
  from .PgsFile import replace_chinese_punctuation_with_english
41
42
  from .PgsFile import replace_english_punctuation_with_chinese
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PgsFile
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: This module simplifies Python package management, script execution, file handling, web scraping, and multimedia downloads. The module supports LLM-based NLP tasks such as tokenization, lemmatization, POS tagging, NER, dependency parsing, MDD, WSD, and MIP analysis. It also generates word lists and plots data, aiding literary students. Ideal for scraping data, cleaning text, and analyzing language, it offers user-friendly tools to streamline workflows.
5
5
  Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
6
6
  Author: Pan Guisheng
@@ -1,5 +1,5 @@
1
- PgsFile/PgsFile.py,sha256=qKo8b6fgwBwhuPi4sEDKh4inr_1SoietOUIGbVLoToM,141592
2
- PgsFile/__init__.py,sha256=cS9OjLOCwguaFasoazpcqsCvBvJFCZijxFfzQACMJjE,3358
1
+ PgsFile/PgsFile.py,sha256=52Uxj1gii1F1J9rvWnR1cFffIumeqBDsJRaN_uLoZUg,149704
2
+ PgsFile/__init__.py,sha256=9vTeHtnxXaf_Qo36pes9o5_MU_M5M7MSMUddDWvkoDA,3408
3
3
  PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
4
4
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
5
5
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
@@ -2585,8 +2585,8 @@ PgsFile/models/fonts/博洋行书3500.TTF,sha256=VrgeHr8cgOL6JD05QyuD9ZSyw4J2aIV
2585
2585
  PgsFile/models/fonts/陆柬之行书字体.ttf,sha256=Zpd4Z7E9w-Qy74yklXHk4vM7HOtHuQgllvygxZZ1Hvs,1247288
2586
2586
  PgsFile/models/prompts/1. MIP prompt.txt,sha256=4lHlHmleayRytqr1n9jtt6vn1rQvyf4BKeThpbwI8o8,1638
2587
2587
  PgsFile/models/prompts/2. WSD prompt.txt,sha256=o-ZFtCRUCDrXgm040WTQch9v2Y_r2SIlrZaquilJjgQ,2348
2588
- PgsFile-0.3.5.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2589
- PgsFile-0.3.5.dist-info/METADATA,sha256=Hi45rI7as2xrOLm4nKIIbnGuGfmQEnxgGDHYqHPDqrI,2892
2590
- PgsFile-0.3.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
2591
- PgsFile-0.3.5.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2592
- PgsFile-0.3.5.dist-info/RECORD,,
2588
+ PgsFile-0.3.6.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2589
+ PgsFile-0.3.6.dist-info/METADATA,sha256=HkRDJ8CCZoGrkKIuwlSpju61tF5bdn-2Hbrwt4B6zd8,2892
2590
+ PgsFile-0.3.6.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
2591
+ PgsFile-0.3.6.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2592
+ PgsFile-0.3.6.dist-info/RECORD,,