pywaybackup 3.0.3__tar.gz → 3.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pywaybackup-3.0.3/pywaybackup.egg-info → pywaybackup-3.0.4}/PKG-INFO +1 -1
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pyproject.toml +1 -1
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/Arguments.py +1 -7
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/SnapshotCollection.py +29 -20
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/archive.py +9 -8
- {pywaybackup-3.0.3 → pywaybackup-3.0.4/pywaybackup.egg-info}/PKG-INFO +1 -1
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/LICENSE +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/README.md +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/Converter.py +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/Exception.py +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/Verbosity.py +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/__init__.py +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/db.py +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/helper.py +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup/main.py +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup.egg-info/SOURCES.txt +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup.egg-info/dependency_links.txt +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup.egg-info/entry_points.txt +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup.egg-info/requires.txt +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/pywaybackup.egg-info/top_level.txt +0 -0
- {pywaybackup-3.0.3 → pywaybackup-3.0.4}/setup.cfg +0 -0
|
@@ -74,13 +74,7 @@ class Configuration:
|
|
|
74
74
|
|
|
75
75
|
if cls.output is None:
|
|
76
76
|
cls.output = os.path.join(os.getcwd(), "waybackup_snapshots")
|
|
77
|
-
|
|
78
|
-
# check if output permissions are given
|
|
79
|
-
if not os.access(cls.output, os.W_OK):
|
|
80
|
-
print(f"\nNo write permissions for output folder: {cls.output}\n")
|
|
81
|
-
sys.exit(1)
|
|
82
|
-
else:
|
|
83
|
-
os.makedirs(cls.output, exist_ok=True)
|
|
77
|
+
os.makedirs(cls.output, exist_ok=True)
|
|
84
78
|
|
|
85
79
|
if cls.log is True:
|
|
86
80
|
cls.log = os.path.join(cls.output, f"waybackup_{sanitize_filename(cls.url)}.log")
|
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
from pywaybackup.Verbosity import Verbosity as vb
|
|
2
|
-
from pywaybackup.helper import url_split
|
|
3
|
-
from pywaybackup.db import Database
|
|
4
|
-
from tqdm import tqdm
|
|
5
1
|
import json
|
|
6
2
|
import csv
|
|
7
3
|
import os
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from pywaybackup.Verbosity import Verbosity as vb
|
|
9
|
+
from pywaybackup.helper import url_split
|
|
10
|
+
from pywaybackup.db import Database
|
|
11
|
+
|
|
12
|
+
LOCK = threading.Lock() # thread safe lock
|
|
8
13
|
|
|
9
14
|
class SnapshotCollection:
|
|
10
15
|
"""
|
|
@@ -278,27 +283,31 @@ class SnapshotCollection:
|
|
|
278
283
|
"""
|
|
279
284
|
Modify a snapshot-row in the snapshot table.
|
|
280
285
|
"""
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
286
|
+
global LOCK
|
|
287
|
+
with LOCK:
|
|
288
|
+
connection.cursor.execute(
|
|
289
|
+
f"""
|
|
290
|
+
UPDATE snapshot_tbl
|
|
291
|
+
SET {column} = ?
|
|
292
|
+
WHERE rowid = ?
|
|
293
|
+
""",
|
|
294
|
+
(value, snapshot_id)
|
|
295
|
+
)
|
|
296
|
+
connection.conn.commit()
|
|
290
297
|
|
|
291
298
|
def get_snapshot(connection):
|
|
292
299
|
"""
|
|
293
300
|
Get a snapshot-row from the snapshot table with response NULL. (not processed)
|
|
294
301
|
"""
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
+
global LOCK
|
|
303
|
+
with LOCK:
|
|
304
|
+
connection.cursor.execute(
|
|
305
|
+
"""
|
|
306
|
+
SELECT rowid, * FROM snapshot_tbl WHERE response IS NULL LIMIT 1
|
|
307
|
+
"""
|
|
308
|
+
)
|
|
309
|
+
row = connection.cursor.fetchone()
|
|
310
|
+
return row
|
|
302
311
|
|
|
303
312
|
@classmethod
|
|
304
313
|
def create_output(cls, url: str, timestamp: str, output: str):
|
|
@@ -224,15 +224,16 @@ def download_loop(output, worker, retry, no_redirect, delay):
|
|
|
224
224
|
|
|
225
225
|
snapshot = sc.get_snapshot(db)
|
|
226
226
|
if not snapshot: break
|
|
227
|
-
|
|
228
|
-
|
|
227
|
+
# mark as locked for other workers // only visual because get_snapshot fetches by NULL
|
|
228
|
+
sc.modify_snapshot(db, snapshot["rowid"], "response", "LOCK")
|
|
229
|
+
SNAPSHOT_CURRENT = snapshot["rowid"]
|
|
229
230
|
|
|
230
231
|
retry_attempt = 1
|
|
231
232
|
retry_max_attempt = retry if retry > 0 else retry + 1
|
|
232
233
|
status_message = Message()
|
|
233
234
|
|
|
234
235
|
while retry_attempt <= retry_max_attempt: # retry as given by user
|
|
235
|
-
status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}]")
|
|
236
|
+
status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}]")
|
|
236
237
|
download_attempt = 1
|
|
237
238
|
download_max_attempt = 3
|
|
238
239
|
|
|
@@ -247,19 +248,19 @@ def download_loop(output, worker, retry, no_redirect, delay):
|
|
|
247
248
|
if isinstance(e, (timeout, ConnectionRefusedError, ConnectionResetError)):
|
|
248
249
|
if download_attempt < download_max_attempt:
|
|
249
250
|
download_attempt += 1 # try again 2x with same connection
|
|
250
|
-
vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - requesting again in 50 seconds...")
|
|
251
|
+
vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - requesting again in 50 seconds...")
|
|
251
252
|
time.sleep(50)
|
|
252
253
|
continue
|
|
253
254
|
elif isinstance(e, http.client.HTTPException):
|
|
254
255
|
if download_attempt < download_max_attempt:
|
|
255
256
|
download_attempt = download_max_attempt # try again 1x with new connection
|
|
256
|
-
vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - renewing connection in 15 seconds...")
|
|
257
|
+
vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - renewing connection in 15 seconds...")
|
|
257
258
|
time.sleep(15)
|
|
258
259
|
connection.close()
|
|
259
260
|
connection = http.client.HTTPSConnection("web.archive.org")
|
|
260
261
|
continue
|
|
261
262
|
else:
|
|
262
|
-
ex.exception(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - EXCEPTION - {e}", e=e)
|
|
263
|
+
ex.exception(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - EXCEPTION - {e}", e=e)
|
|
263
264
|
retry_attempt = retry_max_attempt
|
|
264
265
|
break
|
|
265
266
|
|
|
@@ -272,11 +273,11 @@ def download_loop(output, worker, retry, no_redirect, delay):
|
|
|
272
273
|
|
|
273
274
|
# depends on user - retries after timeout or proceed to next snapshot
|
|
274
275
|
if retry > 0:
|
|
275
|
-
status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed - retry Timeout: 15 seconds...")
|
|
276
|
+
status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed - retry Timeout: 15 seconds...")
|
|
276
277
|
status_message.write()
|
|
277
278
|
time.sleep(15)
|
|
278
279
|
else:
|
|
279
|
-
status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed")
|
|
280
|
+
status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed")
|
|
280
281
|
status_message.write()
|
|
281
282
|
sc.SNAPSHOT_HANDLED += 1
|
|
282
283
|
break # break all loops and do a user-defined retry
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|