PgsFile 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +46 -0
- PgsFile/__init__.py +1 -0
- {PgsFile-0.1.4.dist-info → PgsFile-0.1.5.dist-info}/METADATA +3 -3
- {PgsFile-0.1.4.dist-info → PgsFile-0.1.5.dist-info}/RECORD +7 -7
- {PgsFile-0.1.4.dist-info → PgsFile-0.1.5.dist-info}/WHEEL +1 -1
- {PgsFile-0.1.4.dist-info → PgsFile-0.1.5.dist-info}/LICENSE +0 -0
- {PgsFile-0.1.4.dist-info → PgsFile-0.1.5.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -1406,3 +1406,49 @@ class PGScraper(object):
|
|
|
1406
1406
|
else:
|
|
1407
1407
|
print(r.status_code,"invalid url",url)
|
|
1408
1408
|
return all_want_list
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
# -*- coding: utf-8 -*-
|
|
1412
|
+
"""
|
|
1413
|
+
Created on Thu Sep 17 16:11:45 2020
|
|
1414
|
+
Showing download progress and speed when audio-visual files like MP4, MP3, JPG etc are downloading!
|
|
1415
|
+
@author: Petercusin
|
|
1416
|
+
"""
|
|
1417
|
+
|
|
1418
|
+
import time
|
|
1419
|
+
from contextlib import closing
|
|
1420
|
+
|
|
1421
|
+
def audiovisual_downloader(url, path):
|
|
1422
|
+
with closing(requests.get(url, stream=True, headers=my_headers)) as r:
|
|
1423
|
+
chunk_size=1024*10
|
|
1424
|
+
content_size=int(r.headers['content-length'])
|
|
1425
|
+
print('Initiating download...')
|
|
1426
|
+
with open(path, "wb") as f:
|
|
1427
|
+
p=ProgressData(size=content_size, unit='Kb', block=chunk_size)
|
|
1428
|
+
for chunk in r.iter_content(chunk_size=chunk_size):
|
|
1429
|
+
f.write(chunk)
|
|
1430
|
+
p.output()
|
|
1431
|
+
|
|
1432
|
+
class ProgressData(object):
|
|
1433
|
+
def __init__(self, block,size, unit, file_name='', ):
|
|
1434
|
+
self.file_name=file_name
|
|
1435
|
+
self.block=block/1000.0
|
|
1436
|
+
self.size=size/1000.0
|
|
1437
|
+
self.unit=unit
|
|
1438
|
+
self.count=0
|
|
1439
|
+
self.start=time.time()
|
|
1440
|
+
def output(self):
|
|
1441
|
+
self.end=time.time()
|
|
1442
|
+
self.count += 1
|
|
1443
|
+
speed=self.block/(self.end-self.start) if (self.end-self.start)>0 else 0
|
|
1444
|
+
self.start=time.time()
|
|
1445
|
+
loaded=self.count*self.block
|
|
1446
|
+
progress=round(loaded/self.size, 4)
|
|
1447
|
+
if loaded >= self.size:
|
|
1448
|
+
print(u'%sYour download has finished successfully.\r\n'%self.file_name)
|
|
1449
|
+
else:
|
|
1450
|
+
print(u'{0}Download Progress: {1:.2f}{2}/{3:.2f}{4} {5:.2%} Download Speed: {6:.2f}{7}/s'.\
|
|
1451
|
+
format(self.file_name, loaded, self.unit,\
|
|
1452
|
+
self.size, self.unit, progress, speed, self.unit))
|
|
1453
|
+
print('%50s'%('/'*int((1-progress)*50)))
|
|
1454
|
+
|
PgsFile/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, data cleaning, and word list generation for literary students, making it more accessible and convenient to use.
|
|
3
|
+
Version: 0.1.5
|
|
4
|
+
Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, multimedia download, data cleaning, and word list generation for literary students, making it more accessible and convenient to use.
|
|
5
5
|
Home-page: https://mp.weixin.qq.com/s/F94jyCBOQ3VmiPmSjv6ZAw
|
|
6
6
|
Author: Pan Guisheng
|
|
7
7
|
Author-email: 895284504@qq.com
|
|
@@ -34,7 +34,7 @@ Function 5: This library provides support for common text cleaning tasks, such a
|
|
|
34
34
|
|
|
35
35
|
Function 6: It also manages Python package installations and uninstallations, and allows running scripts and commands in Python interactive command lines instead of Windows command prompt.
|
|
36
36
|
|
|
37
|
-
Function 7:
|
|
37
|
+
Function 7: Download audiovisual files like videos, images, and audio using audiovisual_downloader, which is extremely useful and efficient. Additionally, scrape newspaper data with PGScraper, a highly efficient tool for this purpose.
|
|
38
38
|
|
|
39
39
|
Table 1: The directory and size of Pgs-Corpora
|
|
40
40
|
├── Idioms (1, 171.78 KB)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
2
|
-
PgsFile/__init__.py,sha256=
|
|
1
|
+
PgsFile/PgsFile.py,sha256=T7O755N1kyi5OTJpjH8bsPqMBZrtsg5iPAqgK11l7mA,61528
|
|
2
|
+
PgsFile/__init__.py,sha256=nLukVrf9DO6wblmBsgfsPU90xULZ6AQrFOmjMC8Skio,1591
|
|
3
3
|
PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
|
|
4
4
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
|
|
5
5
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
|
|
@@ -2618,8 +2618,8 @@ PgsFile/models/slovene.pickle,sha256=faxlAhKzeHs5mWwBvSCEEVST5vbsOQurYfdnUlsIuOo
|
|
|
2618
2618
|
PgsFile/models/spanish.pickle,sha256=Jx3GAnxKrgVvcqm_q1ZFz2fhmL9PlyiVhE5A9ZiczcM,597831
|
|
2619
2619
|
PgsFile/models/swedish.pickle,sha256=QNUOva1sqodxXy4wCxIX7JLELeIFpUPMSlaQO9LJrPo,1034496
|
|
2620
2620
|
PgsFile/models/turkish.pickle,sha256=065H12UB0CdpiAnRLnUpLJw5KRBIhUM0KAL5Xbl2XMw,1225013
|
|
2621
|
-
PgsFile-0.1.
|
|
2622
|
-
PgsFile-0.1.
|
|
2623
|
-
PgsFile-0.1.
|
|
2624
|
-
PgsFile-0.1.
|
|
2625
|
-
PgsFile-0.1.
|
|
2621
|
+
PgsFile-0.1.5.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2622
|
+
PgsFile-0.1.5.dist-info/METADATA,sha256=U1aOqNmJ9hsgSsRGkAniXwAaYggDedGf1YI6V-YZtx0,4867
|
|
2623
|
+
PgsFile-0.1.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
2624
|
+
PgsFile-0.1.5.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2625
|
+
PgsFile-0.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|