pywaybackup 4.1.2__tar.gz → 4.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {pywaybackup-4.1.2/pywaybackup.egg-info → pywaybackup-4.1.4}/PKG-INFO +12 -3
  2. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/README.md +10 -2
  3. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pyproject.toml +18 -10
  4. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/Arguments.py +15 -2
  5. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/PyWayBackup.py +8 -3
  6. pywaybackup-4.1.4/pywaybackup/Verbosity.py +221 -0
  7. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/archive_download.py +16 -10
  8. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/files.py +9 -1
  9. {pywaybackup-4.1.2 → pywaybackup-4.1.4/pywaybackup.egg-info}/PKG-INFO +12 -3
  10. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup.egg-info/requires.txt +1 -0
  11. pywaybackup-4.1.2/pywaybackup/Verbosity.py +0 -136
  12. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/LICENSE +0 -0
  13. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/Exception.py +0 -0
  14. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/Snapshot.py +0 -0
  15. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/SnapshotCollection.py +0 -0
  16. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/Worker.py +0 -0
  17. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/__init__.py +0 -0
  18. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/archive_save.py +0 -0
  19. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/db.py +0 -0
  20. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/helper.py +0 -0
  21. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup/main.py +0 -0
  22. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup.egg-info/SOURCES.txt +0 -0
  23. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup.egg-info/dependency_links.txt +0 -0
  24. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup.egg-info/entry_points.txt +0 -0
  25. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/pywaybackup.egg-info/top_level.txt +0 -0
  26. {pywaybackup-4.1.2 → pywaybackup-4.1.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.2
3
+ Version: 4.1.4
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -29,6 +29,7 @@ Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downlo
29
29
  Requires-Python: >=3.8
30
30
  Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
+ Requires-Dist: ruff
32
33
  Requires-Dist: SQLAlchemy==2.0.43
33
34
  Requires-Dist: requests==2.32.3
34
35
  Requires-Dist: tqdm==4.67.1
@@ -216,8 +217,13 @@ Parameters will change the download behavior for snapshots.
216
217
  - **`-m`**, **`--metadata`**<br>
217
218
  Folder where metadata will be saved (`cdx`/`db`/`csv`/`log`). If you are downloading into a network share, you SHOULD set this to a local path because sqlite locking mechanism may cause issues with network shares.
218
219
 
219
- - **`--verbose`**:<br>
220
- Increase output verbosity.
220
+ - **`-v`**, **`--verbose`** `[level]`:<br>
221
+ Set verbosity level. Available levels:
222
+ - `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
223
+ - `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
224
+ - `high` (or `debug`, `detailed`, `max`): Detailed verbose output
225
+
226
+ Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
221
227
 
222
228
  - **`--log`** <!-- `<path>` -->:<br>
223
229
  Saves a log file into the output-dir. `waybackup_<sanitized_url>.log`.
@@ -237,6 +243,9 @@ Parameters will change the download behavior for snapshots.
237
243
  - **`--delay`** `<seconds>`:<br>
238
244
  Delay between download requests in seconds. Default is no delay (0).
239
245
 
246
+ - **`--wait`** `<seconds>`:<br>
247
+ Seconds to wait before renewing connection after HTTP errors or snapshot download errors. Default is 15 seconds.
248
+
240
249
  #### Job Handling:
241
250
 
242
251
  - **`--reset`**:
@@ -178,8 +178,13 @@ Parameters will change the download behavior for snapshots.
178
178
  - **`-m`**, **`--metadata`**<br>
179
179
  Folder where metadata will be saved (`cdx`/`db`/`csv`/`log`). If you are downloading into a network share, you SHOULD set this to a local path because sqlite locking mechanism may cause issues with network shares.
180
180
 
181
- - **`--verbose`**:<br>
182
- Increase output verbosity.
181
+ - **`-v`**, **`--verbose`** `[level]`:<br>
182
+ Set verbosity level. Available levels:
183
+ - `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
184
+ - `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
185
+ - `high` (or `debug`, `detailed`, `max`): Detailed verbose output
186
+
187
+ Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
183
188
 
184
189
  - **`--log`** <!-- `<path>` -->:<br>
185
190
  Saves a log file into the output-dir. `waybackup_<sanitized_url>.log`.
@@ -199,6 +204,9 @@ Parameters will change the download behavior for snapshots.
199
204
  - **`--delay`** `<seconds>`:<br>
200
205
  Delay between download requests in seconds. Default is no delay (0).
201
206
 
207
+ - **`--wait`** `<seconds>`:<br>
208
+ Seconds to wait before renewing connection after HTTP errors or snapshot download errors. Default is 15 seconds.
209
+
202
210
  #### Job Handling:
203
211
 
204
212
  - **`--reset`**:
@@ -2,20 +2,14 @@
2
2
  requires = ["setuptools", "wheel"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
+
5
6
  [tool.setuptools]
6
7
  packages = ["pywaybackup"]
7
8
 
9
+
8
10
  [project]
9
- name = "pywaybackup"
10
- version = "4.1.2"
11
- description = "Query and download archive.org as simple as possible."
12
- authors = [
13
- { name = "bitdruid", email = "bitdruid@outlook.com" }
14
- ]
15
- license = { file = "LICENSE" }
16
- readme = "README.md"
17
- requires-python = ">=3.8"
18
11
  dependencies = [
12
+ "ruff",
19
13
  "SQLAlchemy==2.0.43",
20
14
  "requests==2.32.3",
21
15
  "tqdm==4.67.1",
@@ -23,9 +17,23 @@ dependencies = [
23
17
  "python-magic==0.4.27; sys_platform == 'linux'",
24
18
  "python-magic-bin==0.4.14; sys_platform == 'win32' or sys_platform == 'darwin'",
25
19
  ]
20
+ name = "pywaybackup"
21
+ version = "4.1.4"
22
+ description = "Query and download archive.org as simple as possible."
23
+ authors = [{ name = "bitdruid", email = "bitdruid@outlook.com" }]
24
+ license = { file = "LICENSE" }
25
+ readme = "README.md"
26
+ requires-python = ">=3.8"
27
+
26
28
 
27
29
  [project.scripts]
28
30
  waybackup = "pywaybackup.main:cli"
29
31
 
32
+
30
33
  [project.urls]
31
- homepage = "https://github.com/bitdruid/python-wayback-machine-downloader"
34
+ homepage = "https://github.com/bitdruid/python-wayback-machine-downloader"
35
+
36
+
37
+ [tool.ruff]
38
+ line-length = 120
39
+ exclude = ["pywaybackup/Arguments.py"]
@@ -33,13 +33,27 @@ class Arguments:
33
33
  behavior = parser.add_argument_group("manipulate behavior")
34
34
  behavior.add_argument("-o", "--output", type=str, metavar="", help="output for all files - defaults to current directory")
35
35
  behavior.add_argument("-m", "--metadata", type=str, metavar="", help="change directory for db/cdx/csv/log files")
36
- behavior.add_argument("-v", "--verbose", action="store_true", help="overwritten by progress - gives detailed output")
36
+ behavior.add_argument(
37
+ "-v", "--verbose",
38
+ type=str,
39
+ nargs="?",
40
+ const="default",
41
+ metavar="",
42
+ help="verbosity level: low, default, high (default if flag set without value)",
43
+ )
37
44
  behavior.add_argument("--log", action="store_true", help="save a log file into the output folder")
38
45
  behavior.add_argument("--progress", action="store_true", help="show a progress bar")
39
46
  behavior.add_argument("--no-redirect", action="store_true", help="do not follow redirects by archive.org")
40
47
  behavior.add_argument("--retry", type=int, default=0, metavar="", help="retry failed downloads (opt tries as int, else infinite)")
41
48
  behavior.add_argument("--workers", type=int, default=1, metavar="", help="number of workers (simultaneous downloads)")
42
49
  behavior.add_argument("--delay", type=int, default=0, metavar="", help="delay between each download in seconds")
50
+ behavior.add_argument(
51
+ "--wait",
52
+ type=int,
53
+ default=15,
54
+ metavar="",
55
+ help="seconds to wait before renewing connection after HTTP errors or snapshot download errors (default: 15)",
56
+ )
43
57
 
44
58
  special = parser.add_argument_group("special")
45
59
  special.add_argument("--reset", action="store_true", help="reset the job and ignore existing cdx/db/csv files")
@@ -55,4 +69,3 @@ class Arguments:
55
69
  def get_args(self) -> dict:
56
70
  """Returns the parsed arguments as a dictionary."""
57
71
  return vars(self.args)
58
-
@@ -4,12 +4,13 @@ import signal
4
4
  import sys
5
5
  import time
6
6
  from importlib.metadata import version
7
+ from typing import Union
7
8
 
8
9
  import pywaybackup.archive_save as archive_save
9
10
  from pywaybackup.archive_download import DownloadArchive
10
11
  from pywaybackup.db import Database as db
11
12
  from pywaybackup.Exception import Exception as ex
12
- from pywaybackup.files import CDXfile, CDXquery, CSVfile, File
13
+ from pywaybackup.files import CDXfile, CDXquery, CSVfile
13
14
  from pywaybackup.helper import sanitize_filename, url_split
14
15
  from pywaybackup.SnapshotCollection import SnapshotCollection
15
16
  from pywaybackup.Verbosity import Verbosity as vb
@@ -23,7 +24,7 @@ class _Status:
23
24
 
24
25
  Attributes:
25
26
  sc (SnapshotCollection): The current snapshot collection being processed.
26
- task (str): The current task being performed (e.g., 'initializing', 'downloading cdx', 'preparing snapshots', 'downloading snapshots', 'done').
27
+ task (str): The current task being performed (e.g., 'initializing', 'downloading cdx', ...).
27
28
  handled (int): The number of snapshots that have been processed so far.
28
29
  total (int): The total number of snapshots to be processed.
29
30
  progress (float): The progress of the backup process as a percentage.
@@ -122,13 +123,14 @@ class PyWayBackup:
122
123
  statuscode: str = None,
123
124
  output: str = None,
124
125
  metadata: str = None,
125
- verbose: bool = False,
126
+ verbose: Union[bool, str, int] = None,
126
127
  log: bool = False,
127
128
  progress: bool = False,
128
129
  no_redirect: bool = False,
129
130
  retry: int = 0,
130
131
  workers: int = 1,
131
132
  delay: int = 0,
133
+ wait: int = 15,
132
134
  reset: bool = False,
133
135
  keep: bool = False,
134
136
  silent: bool = True,
@@ -156,6 +158,8 @@ class PyWayBackup:
156
158
  self._retry = retry
157
159
  self._workers = workers
158
160
  self._delay = delay
161
+ self._wait = wait
162
+
159
163
  self._reset = reset
160
164
  self._keep = keep
161
165
 
@@ -344,6 +348,7 @@ class PyWayBackup:
344
348
  retry=self._retry,
345
349
  no_redirect=self._no_redirect,
346
350
  delay=self._delay,
351
+ wait=self._wait,
347
352
  workers=self._workers,
348
353
  )
349
354
  downloader.run(SnapshotCollection=collection)
@@ -0,0 +1,221 @@
1
+ from enum import IntEnum
2
+ from tqdm import tqdm
3
+ from typing import Union
4
+
5
+
6
+ # outside enum to avoid cls membership
7
+ _VERBOSITY_ALIASES = {
8
+ "NORMAL": "DEFAULT",
9
+ "VERBOSE": "DEFAULT",
10
+ "DETAIL": "HIGH",
11
+ "DETAILED": "HIGH",
12
+ "MAX": "HIGH",
13
+ "QUIET": "LOW",
14
+ "MINIMAL": "LOW",
15
+ "MIN": "LOW",
16
+ }
17
+
18
+
19
+ class VerbosityLevel(IntEnum):
20
+ """
21
+ Verbosity levels for output control.
22
+
23
+ - LOW: Essential output only (no verbose flag)
24
+ - DEFAULT: Standard verbose output (--verbose or --verbose default)
25
+ - HIGH: Detailed verbose output (--verbose high)
26
+ """
27
+
28
+ LOW = 0
29
+ DEFAULT = 1
30
+ HIGH = 2
31
+
32
+ @classmethod
33
+ def from_value(cls, value) -> "VerbosityLevel":
34
+ """
35
+ Convert various input types to VerbosityLevel.
36
+
37
+ Args:
38
+ value: Can be:
39
+ - None/False: LOW
40
+ - True: DEFAULT
41
+ - str: "low", "default", "high" (+ aliases: normal, info, debug, quiet, etc.)
42
+ - int: 0, 1, 2
43
+ - VerbosityLevel: returned as-is
44
+
45
+ Returns:
46
+ VerbosityLevel enum value
47
+
48
+ Raises:
49
+ ValueError: If string value is not a valid level or alias
50
+ """
51
+ if value is None or value is False:
52
+ return cls.LOW
53
+ if value is True:
54
+ return cls.DEFAULT
55
+ if isinstance(value, cls):
56
+ return value
57
+ if isinstance(value, int):
58
+ try:
59
+ return cls(value)
60
+ except ValueError:
61
+ raise ValueError(f"Invalid verbosity level: {value}. Valid levels: 0 (low), 1 (default), 2 (high)")
62
+ if isinstance(value, str):
63
+ upper_value = value.upper()
64
+ # check for aliases first
65
+ if upper_value in _VERBOSITY_ALIASES:
66
+ upper_value = _VERBOSITY_ALIASES[upper_value]
67
+ # try to get the enum member
68
+ try:
69
+ return cls[upper_value]
70
+ except KeyError:
71
+ valid = ", ".join([m.name.lower() for m in cls] + list(set(a.lower() for a in _VERBOSITY_ALIASES)))
72
+ raise ValueError(f"Invalid verbosity level: '{value}'. Valid levels: {valid}")
73
+ return cls.LOW
74
+
75
+
76
+ class Verbosity:
77
+ """
78
+ A class to manage verbosity levels, logging, progress and output.
79
+
80
+ Verbosity tiers:
81
+ - LOW (0): Essential output only - no verbose flag set
82
+ - DEFAULT (1): Standard verbose - --verbose or --verbose default
83
+ - HIGH (2): Detailed verbose - --verbose high
84
+ """
85
+
86
+ level = VerbosityLevel.LOW
87
+
88
+ PROGRESS = None
89
+ pbar = None
90
+
91
+ log = None
92
+
93
+ @classmethod
94
+ def init(cls, logfile=None, silent: bool = False, verbose: Union[bool, str, int] = False, progress=None):
95
+ cls.silent = silent
96
+ cls.level = VerbosityLevel.from_value(verbose)
97
+ cls.logfile = open(logfile, "w", encoding="utf-8") if logfile else None
98
+ cls.PROGRESS = progress
99
+
100
+ @classmethod
101
+ def fini(cls):
102
+ if cls.PROGRESS:
103
+ if cls.pbar is not None:
104
+ cls.pbar.close()
105
+ if cls.logfile:
106
+ cls.logfile.close()
107
+
108
+ @classmethod
109
+ def write(cls, verbose: Union[bool, str, int, None] = None, content: Union[str, list] = None):
110
+ """
111
+ Writes log entries to stdout or logfile based on verbosity level and progress-bar status.
112
+
113
+ Determines if the message should be printed based on verbosity level.
114
+
115
+ Args:
116
+ verbose: The required verbosity level for this message:
117
+ - None: Always printed (essential output)
118
+ - False/0/"low": Printed at LOW level and above
119
+ - True/1/"default": Printed at DEFAULT level and above
120
+ - 2/"high": Printed at HIGH level only
121
+ content: The message string or list of message dicts to log.
122
+ """
123
+ if not cls.silent:
124
+ if isinstance(content, str):
125
+ content = [{"verbose": verbose, "content": content}]
126
+ logline = cls.filter_verbosity(content)
127
+ if logline:
128
+ if cls.logfile:
129
+ cls.logfile.write(logline + "\n")
130
+ cls.logfile.flush()
131
+ if not cls.PROGRESS:
132
+ print(logline)
133
+
134
+ @classmethod
135
+ def progress(cls, progress: int, maxval: int = None):
136
+ """
137
+ Updates the progress bar.
138
+
139
+ - bar is initialized if calling with progress=0
140
+ - bar is updated if calling with progress > 0
141
+
142
+ """
143
+ if not cls.silent:
144
+ if cls.PROGRESS:
145
+ if cls.pbar is None and progress == 0:
146
+ cls.pbar = Progressbar(
147
+ unit=" snapshot",
148
+ desc="download file".ljust(15),
149
+ total=maxval,
150
+ ascii="░▒█",
151
+ bar_format="{l_bar}{bar:50}{r_bar}{bar:-10b}",
152
+ )
153
+ if cls.pbar is not None and progress is not None and progress > 0:
154
+ cls.pbar.update(progress)
155
+
156
+ @classmethod
157
+ def filter_verbosity(cls, message: list):
158
+ """
159
+ Removes messages from the list that do not match the verbosity level.
160
+
161
+ Messages are printed if:
162
+ - verbose is None (always print - essential output)
163
+ - The message's required level <= configured level
164
+
165
+ Returns a string containing the filtered messages, joined by newlines.
166
+ """
167
+ filtered_message = []
168
+ for msg in message:
169
+ msg_verbose = msg.get("verbose", None)
170
+ if msg_verbose is None:
171
+ # NONE is always printed
172
+ filtered_message.append(msg["content"])
173
+ else:
174
+ # convert message verbosity and compare
175
+ msg_level = VerbosityLevel.from_value(msg_verbose)
176
+ if msg_level <= cls.level:
177
+ filtered_message.append(msg["content"])
178
+ return "\n".join(filtered_message)
179
+
180
+
181
+ class Progressbar(Verbosity):
182
+ def __init__(
183
+ self,
184
+ unit: str,
185
+ desc: str,
186
+ unit_scale: bool = False,
187
+ total: int = None,
188
+ ascii: str = None,
189
+ bar_format: str = None,
190
+ ):
191
+ if not super().silent:
192
+ self.unit = unit
193
+ self.desc = desc
194
+ self.unit_scale = unit_scale
195
+ self.total = total
196
+ self.ascii = ascii
197
+ self.bar_format = bar_format
198
+ self.pbar = tqdm(
199
+ unit=self.unit,
200
+ desc=self.desc,
201
+ unit_scale=self.unit_scale,
202
+ total=self.total,
203
+ ascii=self.ascii,
204
+ bar_format=self.bar_format,
205
+ )
206
+
207
+ def update(self, progress: int):
208
+ """
209
+ Updates the progress bar with the given progress value.
210
+ """
211
+ if not super().silent:
212
+ if self.pbar is not None:
213
+ self.pbar.update(progress)
214
+ self.pbar.refresh()
215
+
216
+ def close(self):
217
+ """
218
+ Close the progress bar.
219
+ """
220
+ if self.pbar is not None:
221
+ self.pbar.close()
@@ -81,7 +81,7 @@ class DownloadArchive:
81
81
  sc (SnapshotCollection): The snapshot collection being processed.
82
82
  """
83
83
 
84
- def __init__(self, mode: str, output: str, retry: int, no_redirect: bool, delay: int, workers: int):
84
+ def __init__(self, mode: str, output: str, retry: int, no_redirect: bool, delay: int, wait: int, workers: int):
85
85
  """
86
86
  Initialize the download manager with configuration options.
87
87
 
@@ -98,8 +98,8 @@ class DownloadArchive:
98
98
  self.retry = retry
99
99
  self.no_redirect = no_redirect
100
100
  self.delay = delay
101
+ self.wait = wait
101
102
  self.workers = workers
102
- self.no_redirect = no_redirect
103
103
  self.sc = None
104
104
 
105
105
  def run(self, SnapshotCollection: SnapshotCollection):
@@ -156,9 +156,11 @@ class DownloadArchive:
156
156
  while worker.attempt <= retry_max_attempt: # retry as given by user
157
157
  worker.message.store(
158
158
  verbose=True,
159
- content=f"\n-----> Worker: {worker.id} \
160
- - Attempt: [{worker.attempt}/{retry_max_attempt}] \
161
- Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]",
159
+ content=(
160
+ f"\n-----> Worker: {worker.id}"
161
+ f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
162
+ f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
163
+ ),
162
164
  )
163
165
  download_attempt = 1
164
166
  download_max_attempt = 3
@@ -208,7 +210,7 @@ class DownloadArchive:
208
210
  f"\n-----> Worker: {worker.id}"
209
211
  f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
210
212
  f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
211
- f" - {e.__class__.__name__} - renewing connection in 15 seconds..."
213
+ f" - {e.__class__.__name__} - renewing connection in {self.wait * download_attempt} seconds..."
212
214
  ),
213
215
  )
214
216
  vb.write(
@@ -216,10 +218,10 @@ class DownloadArchive:
216
218
  content=(
217
219
  f"Worker: {worker.id}"
218
220
  f" - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total}"
219
- f" - renewing connection in 15 seconds..."
221
+ f" - renewing connection in {self.wait * download_attempt} seconds..."
220
222
  ),
221
223
  )
222
- time.sleep(15)
224
+ time.sleep(self.wait * download_attempt)
223
225
  worker.refresh_connection()
224
226
  continue
225
227
  else:
@@ -244,9 +246,13 @@ class DownloadArchive:
244
246
 
245
247
  # depends on user - retries after timeout or proceed to next snapshot
246
248
  if self.retry > 0:
247
- worker.message.store(verbose=True, result="FAILED", content="retry timeout: 15 seconds...")
249
+ worker.message.store(
250
+ verbose=True,
251
+ result="FAILED",
252
+ content=f"retry timeout: {self.wait * worker.attempt} seconds...",
253
+ )
248
254
  worker.message.write()
249
- time.sleep(15)
255
+ time.sleep(self.wait * worker.attempt)
250
256
  else:
251
257
  worker.message.store(verbose=None, result="FAILED", content="no attempt left")
252
258
  worker.message.write()
@@ -59,7 +59,15 @@ class CDXquery:
59
59
  )
60
60
  filter_filetype = f"&filter=original:.*\\.({'|'.join(self.filter_filetype)})$" if self.filter_filetype else ""
61
61
 
62
- return f"https://web.archive.org/cdx/search/cdx?output=json&url={cdx_url}{period}&fl=timestamp,digest,mimetype,statuscode,original{limit}{filter_filetype}{filter_statuscode}"
62
+ return (
63
+ f"https://web.archive.org/cdx/search/cdx?"
64
+ f"output=json"
65
+ f"&url={cdx_url}{period}"
66
+ f"&fl=timestamp,digest,mimetype,statuscode,original"
67
+ f"{limit}"
68
+ f"{filter_filetype}"
69
+ f"{filter_statuscode}"
70
+ )
63
71
 
64
72
 
65
73
  class File:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.2
3
+ Version: 4.1.4
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -29,6 +29,7 @@ Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downlo
29
29
  Requires-Python: >=3.8
30
30
  Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
+ Requires-Dist: ruff
32
33
  Requires-Dist: SQLAlchemy==2.0.43
33
34
  Requires-Dist: requests==2.32.3
34
35
  Requires-Dist: tqdm==4.67.1
@@ -216,8 +217,13 @@ Parameters will change the download behavior for snapshots.
216
217
  - **`-m`**, **`--metadata`**<br>
217
218
  Folder where metadata will be saved (`cdx`/`db`/`csv`/`log`). If you are downloading into a network share, you SHOULD set this to a local path because sqlite locking mechanism may cause issues with network shares.
218
219
 
219
- - **`--verbose`**:<br>
220
- Increase output verbosity.
220
+ - **`-v`**, **`--verbose`** `[level]`:<br>
221
+ Set verbosity level. Available levels:
222
+ - `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
223
+ - `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
224
+ - `high` (or `debug`, `detailed`, `max`): Detailed verbose output
225
+
226
+ Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
221
227
 
222
228
  - **`--log`** <!-- `<path>` -->:<br>
223
229
  Saves a log file into the output-dir. `waybackup_<sanitized_url>.log`.
@@ -237,6 +243,9 @@ Parameters will change the download behavior for snapshots.
237
243
  - **`--delay`** `<seconds>`:<br>
238
244
  Delay between download requests in seconds. Default is no delay (0).
239
245
 
246
+ - **`--wait`** `<seconds>`:<br>
247
+ Seconds to wait before renewing connection after HTTP errors or snapshot download errors. Default is 15 seconds.
248
+
240
249
  #### Job Handling:
241
250
 
242
251
  - **`--reset`**:
@@ -1,3 +1,4 @@
1
+ ruff
1
2
  SQLAlchemy==2.0.43
2
3
  requests==2.32.3
3
4
  tqdm==4.67.1
@@ -1,136 +0,0 @@
1
- from tqdm import tqdm
2
- from typing import Union
3
-
4
-
5
- class Verbosity:
6
- """
7
- A class to manage verbosity levels, logging, progress and output.
8
- """
9
-
10
- verbose = False
11
-
12
- PROGRESS = None
13
- pbar = None
14
-
15
- log = None
16
-
17
- @classmethod
18
- def init(cls, logfile=None, silent: bool = False, verbose: bool = False, progress=None):
19
- cls.silent = silent
20
- cls.verbose = verbose
21
- cls.logfile = open(logfile, "w", encoding="utf-8") if logfile else None
22
- cls.PROGRESS = progress
23
-
24
- @classmethod
25
- def fini(cls):
26
- if cls.PROGRESS:
27
- if cls.pbar is not None:
28
- cls.pbar.close()
29
- if cls.logfile:
30
- cls.logfile.close()
31
-
32
- @classmethod
33
- def write(cls, verbose: bool = None, content: Union[str, list] = None):
34
- """
35
- Writes log entries to stdout or logfile based on verbosity level and progress-bar status.
36
-
37
- Determines if the message should be printed based on verbosity level.
38
- - If None, the message is always printed.
39
-
40
- Content is a list and is filtered and concatenated to a single block of loglines.
41
- It should contain dictionaries with keys:
42
- - 'verbose': The verbosity level of the message (True/False).
43
- - 'content': The actual message to be logged.
44
- """
45
- if not cls.silent:
46
- if isinstance(content, str):
47
- content = [{"verbose": verbose, "content": content}]
48
- logline = cls.filter_verbosity(content)
49
- if logline:
50
- if cls.logfile:
51
- cls.logfile.write(logline + "\n")
52
- cls.logfile.flush()
53
- if not cls.PROGRESS:
54
- print(logline)
55
-
56
- @classmethod
57
- def progress(cls, progress: int, maxval: int = None):
58
- """
59
- Updates the progress bar.
60
-
61
- - bar is initialized if calling with progress=0
62
- - bar is updated if calling with progress > 0
63
-
64
- """
65
- if not cls.silent:
66
- if cls.PROGRESS:
67
- if cls.pbar is None and progress == 0:
68
- cls.pbar = Progressbar(
69
- unit=" snapshot",
70
- desc="download file".ljust(15),
71
- total=maxval,
72
- ascii="░▒█",
73
- bar_format="{l_bar}{bar:50}{r_bar}{bar:-10b}",
74
- )
75
- if cls.pbar is not None and progress is not None and progress > 0:
76
- cls.pbar.update(progress)
77
-
78
- @classmethod
79
- def filter_verbosity(cls, message: list):
80
- """
81
- Removes messages from the list that do not match the verbosity level.
82
-
83
- - True if message is verbose None (print always)
84
- - True if message has same verbosity as configured
85
-
86
- Returns a string containing the filtered messages, joined by newlines.
87
- """
88
- filtered_message = []
89
- for msg in message:
90
- verbose = msg.get("verbose", None)
91
- if verbose is None or verbose == cls.verbose:
92
- filtered_message.append(msg["content"])
93
- return "\n".join(filtered_message)
94
-
95
-
96
- class Progressbar(Verbosity):
97
- def __init__(
98
- self,
99
- unit: str,
100
- desc: str,
101
- unit_scale: bool = False,
102
- total: int = None,
103
- ascii: str = None,
104
- bar_format: str = None,
105
- ):
106
- if not super().silent:
107
- self.unit = unit
108
- self.desc = desc
109
- self.unit_scale = unit_scale
110
- self.total = total
111
- self.ascii = ascii
112
- self.bar_format = bar_format
113
- self.pbar = tqdm(
114
- unit=self.unit,
115
- desc=self.desc,
116
- unit_scale=self.unit_scale,
117
- total=self.total,
118
- ascii=self.ascii,
119
- bar_format=self.bar_format,
120
- )
121
-
122
- def update(self, progress: int):
123
- """
124
- Updates the progress bar with the given progress value.
125
- """
126
- if not super().silent:
127
- if self.pbar is not None:
128
- self.pbar.update(progress)
129
- self.pbar.refresh()
130
-
131
- def close(self):
132
- """
133
- Close the progress bar.
134
- """
135
- if self.pbar is not None:
136
- self.pbar.close()
File without changes
File without changes