pywaybackup 4.1.2__tar.gz → 4.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {pywaybackup-4.1.2/pywaybackup.egg-info → pywaybackup-4.1.3}/PKG-INFO +2 -1
  2. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pyproject.toml +18 -10
  3. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/Arguments.py +1 -1
  4. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/PyWayBackup.py +6 -2
  5. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/archive_download.py +11 -7
  6. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/files.py +9 -1
  7. {pywaybackup-4.1.2 → pywaybackup-4.1.3/pywaybackup.egg-info}/PKG-INFO +2 -1
  8. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup.egg-info/requires.txt +1 -0
  9. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/LICENSE +0 -0
  10. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/README.md +0 -0
  11. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/Exception.py +0 -0
  12. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/Snapshot.py +0 -0
  13. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/SnapshotCollection.py +0 -0
  14. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/Verbosity.py +0 -0
  15. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/Worker.py +0 -0
  16. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/__init__.py +0 -0
  17. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/archive_save.py +0 -0
  18. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/db.py +0 -0
  19. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/helper.py +0 -0
  20. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup/main.py +0 -0
  21. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup.egg-info/SOURCES.txt +0 -0
  22. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup.egg-info/dependency_links.txt +0 -0
  23. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup.egg-info/entry_points.txt +0 -0
  24. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/pywaybackup.egg-info/top_level.txt +0 -0
  25. {pywaybackup-4.1.2 → pywaybackup-4.1.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.2
3
+ Version: 4.1.3
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -29,6 +29,7 @@ Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downlo
29
29
  Requires-Python: >=3.8
30
30
  Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
+ Requires-Dist: ruff
32
33
  Requires-Dist: SQLAlchemy==2.0.43
33
34
  Requires-Dist: requests==2.32.3
34
35
  Requires-Dist: tqdm==4.67.1
@@ -2,20 +2,14 @@
2
2
  requires = ["setuptools", "wheel"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
+
5
6
  [tool.setuptools]
6
7
  packages = ["pywaybackup"]
7
8
 
9
+
8
10
  [project]
9
- name = "pywaybackup"
10
- version = "4.1.2"
11
- description = "Query and download archive.org as simple as possible."
12
- authors = [
13
- { name = "bitdruid", email = "bitdruid@outlook.com" }
14
- ]
15
- license = { file = "LICENSE" }
16
- readme = "README.md"
17
- requires-python = ">=3.8"
18
11
  dependencies = [
12
+ "ruff",
19
13
  "SQLAlchemy==2.0.43",
20
14
  "requests==2.32.3",
21
15
  "tqdm==4.67.1",
@@ -23,9 +17,23 @@ dependencies = [
23
17
  "python-magic==0.4.27; sys_platform == 'linux'",
24
18
  "python-magic-bin==0.4.14; sys_platform == 'win32' or sys_platform == 'darwin'",
25
19
  ]
20
+ name = "pywaybackup"
21
+ version = "4.1.3"
22
+ description = "Query and download archive.org as simple as possible."
23
+ authors = [{ name = "bitdruid", email = "bitdruid@outlook.com" }]
24
+ license = { file = "LICENSE" }
25
+ readme = "README.md"
26
+ requires-python = ">=3.8"
27
+
26
28
 
27
29
  [project.scripts]
28
30
  waybackup = "pywaybackup.main:cli"
29
31
 
32
+
30
33
  [project.urls]
31
- homepage = "https://github.com/bitdruid/python-wayback-machine-downloader"
34
+ homepage = "https://github.com/bitdruid/python-wayback-machine-downloader"
35
+
36
+
37
+ [tool.ruff]
38
+ line-length = 120
39
+ exclude = ["pywaybackup/Arguments.py"]
@@ -40,6 +40,7 @@ class Arguments:
40
40
  behavior.add_argument("--retry", type=int, default=0, metavar="", help="retry failed downloads (opt tries as int, else infinite)")
41
41
  behavior.add_argument("--workers", type=int, default=1, metavar="", help="number of workers (simultaneous downloads)")
42
42
  behavior.add_argument("--delay", type=int, default=0, metavar="", help="delay between each download in seconds")
43
+ behavior.add_argument("--wait", type=int, default=15, metavar="", help="seconds to wait before renewing connection after HTTP errors or snapshot download errors (default: 15)",)
43
44
 
44
45
  special = parser.add_argument_group("special")
45
46
  special.add_argument("--reset", action="store_true", help="reset the job and ignore existing cdx/db/csv files")
@@ -55,4 +56,3 @@ class Arguments:
55
56
  def get_args(self) -> dict:
56
57
  """Returns the parsed arguments as a dictionary."""
57
58
  return vars(self.args)
58
-
@@ -9,7 +9,7 @@ import pywaybackup.archive_save as archive_save
9
9
  from pywaybackup.archive_download import DownloadArchive
10
10
  from pywaybackup.db import Database as db
11
11
  from pywaybackup.Exception import Exception as ex
12
- from pywaybackup.files import CDXfile, CDXquery, CSVfile, File
12
+ from pywaybackup.files import CDXfile, CDXquery, CSVfile
13
13
  from pywaybackup.helper import sanitize_filename, url_split
14
14
  from pywaybackup.SnapshotCollection import SnapshotCollection
15
15
  from pywaybackup.Verbosity import Verbosity as vb
@@ -23,7 +23,7 @@ class _Status:
23
23
 
24
24
  Attributes:
25
25
  sc (SnapshotCollection): The current snapshot collection being processed.
26
- task (str): The current task being performed (e.g., 'initializing', 'downloading cdx', 'preparing snapshots', 'downloading snapshots', 'done').
26
+ task (str): The current task being performed (e.g., 'initializing', 'downloading cdx', ...).
27
27
  handled (int): The number of snapshots that have been processed so far.
28
28
  total (int): The total number of snapshots to be processed.
29
29
  progress (float): The progress of the backup process as a percentage.
@@ -129,6 +129,7 @@ class PyWayBackup:
129
129
  retry: int = 0,
130
130
  workers: int = 1,
131
131
  delay: int = 0,
132
+ wait: int = 15,
132
133
  reset: bool = False,
133
134
  keep: bool = False,
134
135
  silent: bool = True,
@@ -156,6 +157,8 @@ class PyWayBackup:
156
157
  self._retry = retry
157
158
  self._workers = workers
158
159
  self._delay = delay
160
+ self._wait = wait
161
+
159
162
  self._reset = reset
160
163
  self._keep = keep
161
164
 
@@ -344,6 +347,7 @@ class PyWayBackup:
344
347
  retry=self._retry,
345
348
  no_redirect=self._no_redirect,
346
349
  delay=self._delay,
350
+ wait=self._wait,
347
351
  workers=self._workers,
348
352
  )
349
353
  downloader.run(SnapshotCollection=collection)
@@ -81,7 +81,7 @@ class DownloadArchive:
81
81
  sc (SnapshotCollection): The snapshot collection being processed.
82
82
  """
83
83
 
84
- def __init__(self, mode: str, output: str, retry: int, no_redirect: bool, delay: int, workers: int):
84
+ def __init__(self, mode: str, output: str, retry: int, no_redirect: bool, delay: int, wait: int, workers: int):
85
85
  """
86
86
  Initialize the download manager with configuration options.
87
87
 
@@ -98,8 +98,8 @@ class DownloadArchive:
98
98
  self.retry = retry
99
99
  self.no_redirect = no_redirect
100
100
  self.delay = delay
101
+ self.wait = wait
101
102
  self.workers = workers
102
- self.no_redirect = no_redirect
103
103
  self.sc = None
104
104
 
105
105
  def run(self, SnapshotCollection: SnapshotCollection):
@@ -208,7 +208,7 @@ class DownloadArchive:
208
208
  f"\n-----> Worker: {worker.id}"
209
209
  f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
210
210
  f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
211
- f" - {e.__class__.__name__} - renewing connection in 15 seconds..."
211
+ f" - {e.__class__.__name__} - renewing connection in {self.wait * download_attempt} seconds..."
212
212
  ),
213
213
  )
214
214
  vb.write(
@@ -216,10 +216,10 @@ class DownloadArchive:
216
216
  content=(
217
217
  f"Worker: {worker.id}"
218
218
  f" - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total}"
219
- f" - renewing connection in 15 seconds..."
219
+ f" - renewing connection in {self.wait * download_attempt} seconds..."
220
220
  ),
221
221
  )
222
- time.sleep(15)
222
+ time.sleep(self.wait * download_attempt)
223
223
  worker.refresh_connection()
224
224
  continue
225
225
  else:
@@ -244,9 +244,13 @@ class DownloadArchive:
244
244
 
245
245
  # depends on user - retries after timeout or proceed to next snapshot
246
246
  if self.retry > 0:
247
- worker.message.store(verbose=True, result="FAILED", content="retry timeout: 15 seconds...")
247
+ worker.message.store(
248
+ verbose=True,
249
+ result="FAILED",
250
+ content=f"retry timeout: {self.wait * worker.attempt} seconds...",
251
+ )
248
252
  worker.message.write()
249
- time.sleep(15)
253
+ time.sleep(self.wait * worker.attempt)
250
254
  else:
251
255
  worker.message.store(verbose=None, result="FAILED", content="no attempt left")
252
256
  worker.message.write()
@@ -59,7 +59,15 @@ class CDXquery:
59
59
  )
60
60
  filter_filetype = f"&filter=original:.*\\.({'|'.join(self.filter_filetype)})$" if self.filter_filetype else ""
61
61
 
62
- return f"https://web.archive.org/cdx/search/cdx?output=json&url={cdx_url}{period}&fl=timestamp,digest,mimetype,statuscode,original{limit}{filter_filetype}{filter_statuscode}"
62
+ return (
63
+ f"https://web.archive.org/cdx/search/cdx?"
64
+ f"output=json"
65
+ f"&url={cdx_url}{period}"
66
+ f"&fl=timestamp,digest,mimetype,statuscode,original"
67
+ f"{limit}"
68
+ f"{filter_filetype}"
69
+ f"{filter_statuscode}"
70
+ )
63
71
 
64
72
 
65
73
  class File:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.2
3
+ Version: 4.1.3
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -29,6 +29,7 @@ Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downlo
29
29
  Requires-Python: >=3.8
30
30
  Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
+ Requires-Dist: ruff
32
33
  Requires-Dist: SQLAlchemy==2.0.43
33
34
  Requires-Dist: requests==2.32.3
34
35
  Requires-Dist: tqdm==4.67.1
@@ -1,3 +1,4 @@
1
+ ruff
1
2
  SQLAlchemy==2.0.43
2
3
  requests==2.32.3
3
4
  tqdm==4.67.1
File without changes
File without changes
File without changes