pywaybackup 3.0.2__tar.gz → 3.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {pywaybackup-3.0.2/pywaybackup.egg-info → pywaybackup-3.0.4}/PKG-INFO +2 -2
  2. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pyproject.toml +1 -1
  3. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/Arguments.py +3 -0
  4. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/Exception.py +1 -1
  5. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/SnapshotCollection.py +29 -20
  6. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/archive.py +9 -8
  7. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/main.py +2 -2
  8. {pywaybackup-3.0.2 → pywaybackup-3.0.4/pywaybackup.egg-info}/PKG-INFO +2 -2
  9. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/LICENSE +0 -0
  10. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/README.md +0 -0
  11. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/Converter.py +0 -0
  12. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/Verbosity.py +0 -0
  13. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/__init__.py +0 -0
  14. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/db.py +0 -0
  15. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup/helper.py +0 -0
  16. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup.egg-info/SOURCES.txt +0 -0
  17. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup.egg-info/dependency_links.txt +0 -0
  18. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup.egg-info/entry_points.txt +0 -0
  19. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup.egg-info/requires.txt +0 -0
  20. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/pywaybackup.egg-info/top_level.txt +0 -0
  21. {pywaybackup-3.0.2 → pywaybackup-3.0.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: pywaybackup
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -7,7 +7,7 @@ packages = ["pywaybackup"]
7
7
 
8
8
  [project]
9
9
  name = "pywaybackup"
10
- version = "3.0.2"
10
+ version = "3.0.4"
11
11
  description = "Query and download archive.org as simple as possible."
12
12
  authors = [
13
13
  { name = "bitdruid", email = "bitdruid@outlook.com" }
@@ -5,6 +5,7 @@ import argparse
5
5
  from importlib.metadata import version
6
6
 
7
7
  from pywaybackup.helper import url_split, sanitize_filename
8
+ from pywaybackup.Exception import Exception as ex
8
9
 
9
10
  class Arguments:
10
11
 
@@ -84,6 +85,8 @@ class Configuration:
84
85
  cls.mode = "last"
85
86
  if cls.first:
86
87
  cls.mode = "first"
88
+ if cls.save:
89
+ cls.mode = "save"
87
90
 
88
91
  if cls.filetype:
89
92
  cls.filetype = [ft.lower().strip() for ft in cls.filetype.split(",")]
@@ -105,4 +105,4 @@ class Exception:
105
105
  sys.__excepthook__(exception_type, exception, traceback)
106
106
  return
107
107
  Exception.exception("UNCAUGHT EXCEPTION", exception, traceback) # uncaught exceptions also with custom scheme
108
-
108
+
@@ -1,10 +1,15 @@
1
- from pywaybackup.Verbosity import Verbosity as vb
2
- from pywaybackup.helper import url_split
3
- from pywaybackup.db import Database
4
- from tqdm import tqdm
5
1
  import json
6
2
  import csv
7
3
  import os
4
+ import threading
5
+
6
+ from tqdm import tqdm
7
+
8
+ from pywaybackup.Verbosity import Verbosity as vb
9
+ from pywaybackup.helper import url_split
10
+ from pywaybackup.db import Database
11
+
12
+ LOCK = threading.Lock() # thread safe lock
8
13
 
9
14
  class SnapshotCollection:
10
15
  """
@@ -278,27 +283,31 @@ class SnapshotCollection:
278
283
  """
279
284
  Modify a snapshot-row in the snapshot table.
280
285
  """
281
- connection.cursor.execute(
282
- f"""
283
- UPDATE snapshot_tbl
284
- SET {column} = ?
285
- WHERE rowid = ?
286
- """,
287
- (value, snapshot_id)
288
- )
289
- connection.conn.commit()
286
+ global LOCK
287
+ with LOCK:
288
+ connection.cursor.execute(
289
+ f"""
290
+ UPDATE snapshot_tbl
291
+ SET {column} = ?
292
+ WHERE rowid = ?
293
+ """,
294
+ (value, snapshot_id)
295
+ )
296
+ connection.conn.commit()
290
297
 
291
298
  def get_snapshot(connection):
292
299
  """
293
300
  Get a snapshot-row from the snapshot table with response NULL. (not processed)
294
301
  """
295
- connection.cursor.execute(
296
- """
297
- SELECT rowid, * FROM snapshot_tbl WHERE response IS NULL LIMIT 1
298
- """
299
- )
300
- row = connection.cursor.fetchone()
301
- return row
302
+ global LOCK
303
+ with LOCK:
304
+ connection.cursor.execute(
305
+ """
306
+ SELECT rowid, * FROM snapshot_tbl WHERE response IS NULL LIMIT 1
307
+ """
308
+ )
309
+ row = connection.cursor.fetchone()
310
+ return row
302
311
 
303
312
  @classmethod
304
313
  def create_output(cls, url: str, timestamp: str, output: str):
@@ -224,15 +224,16 @@ def download_loop(output, worker, retry, no_redirect, delay):
224
224
 
225
225
  snapshot = sc.get_snapshot(db)
226
226
  if not snapshot: break
227
- sc.modify_snapshot(db, snapshot["rowid"], "response", "LOCK") # mark as locked for other workers
228
- SNAPSHOT_CURRENT = sc.SNAPSHOT_HANDLED + 1
227
+ # mark as locked for other workers // only visual because get_snapshot fetches by NULL
228
+ sc.modify_snapshot(db, snapshot["rowid"], "response", "LOCK")
229
+ SNAPSHOT_CURRENT = snapshot["rowid"]
229
230
 
230
231
  retry_attempt = 1
231
232
  retry_max_attempt = retry if retry > 0 else retry + 1
232
233
  status_message = Message()
233
234
 
234
235
  while retry_attempt <= retry_max_attempt: # retry as given by user
235
- status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}]")
236
+ status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}]")
236
237
  download_attempt = 1
237
238
  download_max_attempt = 3
238
239
 
@@ -247,19 +248,19 @@ def download_loop(output, worker, retry, no_redirect, delay):
247
248
  if isinstance(e, (timeout, ConnectionRefusedError, ConnectionResetError)):
248
249
  if download_attempt < download_max_attempt:
249
250
  download_attempt += 1 # try again 2x with same connection
250
- vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - requesting again in 50 seconds...")
251
+ vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - requesting again in 50 seconds...")
251
252
  time.sleep(50)
252
253
  continue
253
254
  elif isinstance(e, http.client.HTTPException):
254
255
  if download_attempt < download_max_attempt:
255
256
  download_attempt = download_max_attempt # try again 1x with new connection
256
- vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - renewing connection in 15 seconds...")
257
+ vb.write(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - {e.__class__.__name__} - renewing connection in 15 seconds...")
257
258
  time.sleep(15)
258
259
  connection.close()
259
260
  connection = http.client.HTTPSConnection("web.archive.org")
260
261
  continue
261
262
  else:
262
- ex.exception(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - EXCEPTION - {e}", e=e)
263
+ ex.exception(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - EXCEPTION - {e}", e=e)
263
264
  retry_attempt = retry_max_attempt
264
265
  break
265
266
 
@@ -272,11 +273,11 @@ def download_loop(output, worker, retry, no_redirect, delay):
272
273
 
273
274
  # depends on user - retries after timeout or proceed to next snapshot
274
275
  if retry > 0:
275
- status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed - retry Timeout: 15 seconds...")
276
+ status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed - retry Timeout: 15 seconds...")
276
277
  status_message.write()
277
278
  time.sleep(15)
278
279
  else:
279
- status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed")
280
+ status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed")
280
281
  status_message.write()
281
282
  sc.SNAPSHOT_HANDLED += 1
282
283
  break # break all loops and do a user-defined retry
@@ -19,13 +19,13 @@ def main():
19
19
  db.init(config.dbfile, config.query_identifier)
20
20
  sc.init(config.mode)
21
21
 
22
- archive.startup()
23
-
24
22
  if config.save:
25
23
  archive.save_page(config.url)
26
24
 
27
25
  else:
28
26
 
27
+ archive.startup()
28
+
29
29
  try:
30
30
  archive.query_list(config.csvfile, config.cdxfile, config.range, config.limit, config.start, config.end, config.explicit, config.filetype)
31
31
  archive.download_list(config.output, config.retry, config.no_redirect, config.delay, config.workers)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: pywaybackup
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
File without changes
File without changes
File without changes