PgsFile 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PgsFile might be problematic. Click here for more details.

PgsFile/PgsFile.py CHANGED
@@ -1406,3 +1406,49 @@ class PGScraper(object):
1406
1406
  else:
1407
1407
  print(r.status_code,"invalid url",url)
1408
1408
  return all_want_list
1409
+
1410
+
1411
+ # -*- coding: utf-8 -*-
1412
+ """
1413
+ Created on Thu Sep 17 16:11:45 2020
1414
+ Showing download progress and speed when audio-visual files like MP4, MP3, JPG etc are downloading!
1415
+ @author: Petercusin
1416
+ """
1417
+
1418
+ import time
1419
+ from contextlib import closing
1420
+
1421
+ def audiovisual_downloader(url, path):
1422
+ with closing(requests.get(url, stream=True, headers=my_headers)) as r:
1423
+ chunk_size=1024*10
1424
+ content_size=int(r.headers['content-length'])
1425
+ print('Initiating download...')
1426
+ with open(path, "wb") as f:
1427
+ p=ProgressData(size=content_size, unit='Kb', block=chunk_size)
1428
+ for chunk in r.iter_content(chunk_size=chunk_size):
1429
+ f.write(chunk)
1430
+ p.output()
1431
+
1432
+ class ProgressData(object):
1433
+ def __init__(self, block,size, unit, file_name='', ):
1434
+ self.file_name=file_name
1435
+ self.block=block/1000.0
1436
+ self.size=size/1000.0
1437
+ self.unit=unit
1438
+ self.count=0
1439
+ self.start=time.time()
1440
+ def output(self):
1441
+ self.end=time.time()
1442
+ self.count += 1
1443
+ speed=self.block/(self.end-self.start) if (self.end-self.start)>0 else 0
1444
+ self.start=time.time()
1445
+ loaded=self.count*self.block
1446
+ progress=round(loaded/self.size, 4)
1447
+ if loaded >= self.size:
1448
+ print(u'%sYour download has finished successfully.\r\n'%self.file_name)
1449
+ else:
1450
+ print(u'{0}Download Progress: {1:.2f}{2}/{3:.2f}{4} {5:.2%} Download Speed: {6:.2f}{7}/s'.\
1451
+ format(self.file_name, loaded, self.unit,\
1452
+ self.size, self.unit, progress, speed, self.unit))
1453
+ print('%50s'%('/'*int((1-progress)*50)))
1454
+
PgsFile/__init__.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from .PgsFile import PGScraper
2
+ from .PgsFile import audiovisual_downloader
2
3
 
3
4
  from .PgsFile import install_package, uninstall_package
4
5
  from .PgsFile import run_script, run_command
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PgsFile
3
- Version: 0.1.4
4
- Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, data cleaning, and word list generation for literary students, making it more accessible and convenient to use.
3
+ Version: 0.1.5
4
+ Summary: This module aims to simplify Python package management, script execution, file handling, web scraping, multimedia download, data cleaning, and word list generation for literary students, making it more accessible and convenient to use.
5
5
  Home-page: https://mp.weixin.qq.com/s/F94jyCBOQ3VmiPmSjv6ZAw
6
6
  Author: Pan Guisheng
7
7
  Author-email: 895284504@qq.com
@@ -34,7 +34,7 @@ Function 5: This library provides support for common text cleaning tasks, such a
34
34
 
35
35
  Function 6: It also manages Python package installations and uninstallations, and allows running scripts and commands in Python interactive command lines instead of Windows command prompt.
36
36
 
37
- Function 7: This is extremely beneficial for scraping newspaper data using PGScraper.
37
+ Function 7: Download audiovisual files like videos, images, and audio using audiovisual_downloader, which is extremely useful and efficient. Additionally, scrape newspaper data with PGScraper, a highly efficient tool for this purpose.
38
38
 
39
39
  Table 1: The directory and size of Pgs-Corpora
40
40
  ├── Idioms (1, 171.78 KB)
@@ -1,5 +1,5 @@
1
- PgsFile/PgsFile.py,sha256=MJDO16jtUglvGNCosdfeILa96UGPiShCp52VAJ0PsVg,59834
2
- PgsFile/__init__.py,sha256=bu0BNgzYNRNpbZAFhvLo-S3npg8kasLam1n9VkiCQNQ,1546
1
+ PgsFile/PgsFile.py,sha256=T7O755N1kyi5OTJpjH8bsPqMBZrtsg5iPAqgK11l7mA,61528
2
+ PgsFile/__init__.py,sha256=nLukVrf9DO6wblmBsgfsPU90xULZ6AQrFOmjMC8Skio,1591
3
3
  PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
4
4
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
5
5
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000001.txt,sha256=imOa6UoCOIZoPXT4_HNHgCUJtd4FTIdk2FZNHNBgJyg,3372
@@ -2618,8 +2618,8 @@ PgsFile/models/slovene.pickle,sha256=faxlAhKzeHs5mWwBvSCEEVST5vbsOQurYfdnUlsIuOo
2618
2618
  PgsFile/models/spanish.pickle,sha256=Jx3GAnxKrgVvcqm_q1ZFz2fhmL9PlyiVhE5A9ZiczcM,597831
2619
2619
  PgsFile/models/swedish.pickle,sha256=QNUOva1sqodxXy4wCxIX7JLELeIFpUPMSlaQO9LJrPo,1034496
2620
2620
  PgsFile/models/turkish.pickle,sha256=065H12UB0CdpiAnRLnUpLJw5KRBIhUM0KAL5Xbl2XMw,1225013
2621
- PgsFile-0.1.4.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2622
- PgsFile-0.1.4.dist-info/METADATA,sha256=nF-Qqjy5N7mXswO_HrASmMFEBXjmI_jNjr5Z3OcxemI,4697
2623
- PgsFile-0.1.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
2624
- PgsFile-0.1.4.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2625
- PgsFile-0.1.4.dist-info/RECORD,,
2621
+ PgsFile-0.1.5.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2622
+ PgsFile-0.1.5.dist-info/METADATA,sha256=U1aOqNmJ9hsgSsRGkAniXwAaYggDedGf1YI6V-YZtx0,4867
2623
+ PgsFile-0.1.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
2624
+ PgsFile-0.1.5.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2625
+ PgsFile-0.1.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5