PyPI - pywaybackup - Versions diffs - 3.0.4__tar.gz → 3.1.0__tar.gz - Mend

pywaybackup 3.0.4tar.gz → 3.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{pywaybackup-3.0.4/pywaybackup.egg-info → pywaybackup-3.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pywaybackup
-Version: 3.0.4
+Version: 3.1.0
 Summary: Query and download archive.org as simple as possible.
 Author-email: bitdruid <bitdruid@outlook.com>
 License: MIT License
@@ -29,6 +29,7 @@ Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downlo
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: pysqlite3-binary==0.5.4
 Requires-Dist: requests==2.31.0
 Requires-Dist: tqdm==4.66.2
 Requires-Dist: python-magic==0.4.27; sys_platform == "linux"
@@ -39,7 +40,7 @@ Requires-Dist: python-magic-bin==0.4.14; sys_platform == "win32"
 [![PyPI](https://img.shields.io/pypi/v/pywaybackup)](https://pypi.org/project/pywaybackup/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/pywaybackup)](https://pypi.org/project/pywaybackup/)
 ![Python Version](https://img.shields.io/badge/Python-3.8-blue)
-![Python_Sqlite3 Version](https://img.shields.io/badge/Python_Sqlite3-3.25-blue)
+<!-- ![Python_Sqlite3 Version](https://img.shields.io/badge/Python_Sqlite3-3.35-blue) -->
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/README.md RENAMED Viewed

@@ -3,7 +3,7 @@
 [![PyPI](https://img.shields.io/pypi/v/pywaybackup)](https://pypi.org/project/pywaybackup/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/pywaybackup)](https://pypi.org/project/pywaybackup/)
 ![Python Version](https://img.shields.io/badge/Python-3.8-blue)
-![Python_Sqlite3 Version](https://img.shields.io/badge/Python_Sqlite3-3.25-blue)
+<!-- ![Python_Sqlite3 Version](https://img.shields.io/badge/Python_Sqlite3-3.35-blue) -->
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ packages = ["pywaybackup"]
 [project]
 name = "pywaybackup"
-version = "3.0.4"
+version = "3.1.0"
 description = "Query and download archive.org as simple as possible."
 authors = [
     { name = "bitdruid", email = "bitdruid@outlook.com" }
@@ -16,6 +16,7 @@ license = { file = "LICENSE" }
 readme = "README.md"
 requires-python = ">=3.8"
 dependencies = [
+    "pysqlite3-binary==0.5.4",
     "requests==2.31.0",
     "tqdm==4.66.2",
     "python-magic==0.4.27; sys_platform == 'linux'",

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup/Arguments.py RENAMED Viewed

@@ -2,6 +2,7 @@
 import sys
 import os
 import argparse
 from importlib.metadata import version
 from pywaybackup.helper import url_split, sanitize_filename
@@ -74,7 +75,7 @@ class Configuration:
         if cls.output is None:
             cls.output = os.path.join(os.getcwd(), "waybackup_snapshots")
-        os.makedirs(cls.output, exist_ok=True)
+        os.makedirs(cls.output, exist_ok=True) if not cls.save else None
         if cls.log is True:
             cls.log = os.path.join(cls.output, f"waybackup_{sanitize_filename(cls.url)}.log")

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup/Exception.py RENAMED Viewed

@@ -1,34 +1,33 @@
 import sys
 import os
-from datetime import datetime
+import re
 import linecache
 import traceback
-import re
+from datetime import datetime
 from importlib.metadata import version
-class Exception:
+class Exception:
     new_debug = True
     output = None
     command = None
     @classmethod
     def init(cls, output=None, command=None):
-        sys.excepthook = cls.exception_handler # set custom exception handler (uncaught exceptions)
+        sys.excepthook = (
+            cls.exception_handler
+        )  # set custom exception handler (uncaught exceptions)
         cls.output = output
         cls.command = command
     @classmethod
     def exception(cls, message: str, e: Exception, tb=None):
         custom_tb = sys.exc_info()[-1] if tb is None else tb
-        original_tb = cls.relativate_path("".join(traceback.format_exception(type(e), e, e.__traceback__)))
-        exception_message = (
-            "-------------------------\n"
-            f"!-- Exception: {message}\n"
+        original_tb = cls.relativate_path(
+            "".join(traceback.format_exception(type(e), e, e.__traceback__))
         )
+        exception_message = f"-------------------------\n!-- Exception: {message}\n"
         if custom_tb is not None:
             while custom_tb.tb_next:  # loop to last traceback frame
                 custom_tb = custom_tb.tb_next
@@ -46,10 +45,7 @@ class Exception:
             )
         else:
             exception_message += "!-- Traceback is None\n"
-        exception_message += (
-            f"!-- Description: {e}\n"
-            "-------------------------"
-        )
+        exception_message += f"!-- Description: {e}\n-------------------------"
         print(exception_message)
         debug_file = os.path.join(cls.output, "waybackup_error.log")
         print(f"Exception log: {debug_file}")
@@ -85,10 +81,10 @@ class Exception:
             if os.path.isfile(input):  # case single path
                 return os.path.relpath(input, os.getcwd())
             input_modified = ""
-            input_lines = input.split('\n')
-            if len(input_lines) == 1: # case single line
+            input_lines = input.split("\n")
+            if len(input_lines) == 1:  # case single line
                 return input
-            for line in input.split('\n'): # case multiple lines
+            for line in input.split("\n"):  # case multiple lines
                 match = path_pattern.search(line)
                 if match:
                     original_path = match.group(1)
@@ -104,5 +100,6 @@ class Exception:
         if issubclass(exception_type, KeyboardInterrupt):
             sys.__excepthook__(exception_type, exception, traceback)
             return
-        Exception.exception("UNCAUGHT EXCEPTION", exception, traceback) # uncaught exceptions also with custom scheme
+        Exception.exception(
+            "UNCAUGHT EXCEPTION", exception, traceback
+        )  # uncaught exceptions also with custom scheme

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup/SnapshotCollection.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import json
 import csv
 import os
-import threading
 from tqdm import tqdm
@@ -9,8 +8,6 @@ from pywaybackup.Verbosity import Verbosity as vb
 from pywaybackup.helper import url_split
 from pywaybackup.db import Database
-LOCK = threading.Lock() # thread safe lock
 class SnapshotCollection:
     """
     Represents the interaction with the snapshot-collection contained in the snapshot database.
@@ -283,31 +280,36 @@ class SnapshotCollection:
         """
         Modify a snapshot-row in the snapshot table.
         """
-        global LOCK
-        with LOCK:
-            connection.cursor.execute(
-                f"""
-                UPDATE snapshot_tbl
-                SET {column} = ?
-                WHERE rowid = ?
-                """,
-                (value, snapshot_id)
-            )
-            connection.conn.commit()
+        connection.cursor.execute(
+            f"""
+            UPDATE snapshot_tbl
+            SET {column} = ?
+            WHERE rowid = ?
+            """,
+            (value, snapshot_id)
+        )
+        connection.conn.commit()
     def get_snapshot(connection):
         """
         Get a snapshot-row from the snapshot table with response NULL. (not processed)
         """
-        global LOCK
-        with LOCK:
-            connection.cursor.execute(
-                """
-                SELECT rowid, * FROM snapshot_tbl WHERE response IS NULL LIMIT 1
-                """
+        # mark as locked for other workers // only visual because get_snapshot fetches by NULL
+        connection.cursor.execute(
+            """
+            UPDATE snapshot_tbl
+            SET response = 'LOCK'
+            WHERE rowid = (
+                SELECT rowid FROM snapshot_tbl
+                WHERE response IS NULL
+                LIMIT 1
             )
-            row = connection.cursor.fetchone()
-            return row
+            RETURNING rowid, *;
+            """
+        )
+        row = connection.cursor.fetchone()
+        connection.conn.commit()
+        return row
     @classmethod
     def create_output(cls, url: str, timestamp: str, output: str):

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup/Verbosity.py RENAMED Viewed

@@ -1,4 +1,3 @@
-import sys
 from tqdm import tqdm
 class Verbosity:
@@ -63,21 +62,24 @@ class Verbosity:
                 cls.pbar.refresh()
     @classmethod
-    def generate_logline(cls, status: str = "", type: str = "", message: str = ""):
+    def generate_logline(cls, status: str, type: str, message: str):
         """
-        STATUS     -> TYPE: MESSAGE
+        STATUS     ➔ TYPE: MESSAGE
         """
         if not status and not type:
             return message
-        status_length = 11
+        status_length = 10
         type_length = 5
         status = status.ljust(status_length)
+        status = f"{status} -> "
         type = type.ljust(type_length)
+        type = f"{type}: " if type.strip() else ""
-        log_entry = f"{status} -> {type}: {message}"
+        log_entry = f"{status}{type}{message}"
         return log_entry

pywaybackup-3.0.4/pywaybackup/archive.py → pywaybackup-3.1.0/pywaybackup/archive_download.py RENAMED Viewed

@@ -1,84 +1,26 @@
-import requests
-import os
 import gzip
+import http.client
+import os
 import threading
 import time
 import urllib.parse
-import http.client
+from datetime import datetime
+from socket import timeout
 from urllib.parse import urljoin
-from datetime import datetime, timezone
-from tqdm import tqdm
-from socket import timeout
+from importlib.metadata import version
-from pywaybackup.helper import url_get_timestamp, move_index, check_nt
+import requests
+from tqdm import tqdm
-from pywaybackup.SnapshotCollection import SnapshotCollection as sc
 from pywaybackup.Arguments import Configuration as config
-from pywaybackup.db import Database
-from importlib.metadata import version
+from pywaybackup.Exception import Exception as ex
+from pywaybackup.SnapshotCollection import SnapshotCollection as sc
 from pywaybackup.Verbosity import Message
 from pywaybackup.Verbosity import Verbosity as vb
-from pywaybackup.Exception import Exception as ex
-# GET: store page to wayback machine and response with redirect to snapshot
-# POST: store page to wayback machine and response with wayback machine status-page
-# tag_jobid = '<script>spn.watchJob("spn2-%s", "/_static/",6000);</script>'
-# tag_result_timeout = '<p>The same snapshot had been made %s minutes ago. You can make new capture of this URL after 1 hour.</p>'
-# tag_result_success = ' A snapshot was captured. Visit page: <a href="%s">%s</a>'
-def save_page(url: str):
-    """
-    Saves a webpage to the Wayback Machine.
-    Args:
-        url (str): The URL of the webpage to be saved.
-    Returns:
-        None: The function does not return any value. It only prints messages to the console.
-    """
-    vb.write(message="\nSaving page to the Wayback Machine...")
-    connection = http.client.HTTPSConnection("web.archive.org")
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
-    }
-    connection.request("GET", f"https://web.archive.org/save/{url}", headers=headers)
-    vb.write(message="\n-----> Request sent")
-    response = connection.getresponse()
-    response_status = response.status
-    if response_status == 302:
-        location = response.getheader("Location")
-        vb.write(message="\n-----> Response: 302 (redirect to snapshot)")
-        snapshot_timestamp = datetime.strptime(url_get_timestamp(location), '%Y%m%d%H%M%S').strftime('%Y-%m-%d %H:%M:%S')
-        current_timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
-        timestamp_difference = (datetime.strptime(current_timestamp, '%Y-%m-%d %H:%M:%S') - datetime.strptime(snapshot_timestamp, '%Y-%m-%d %H:%M:%S')).seconds / 60
-        timestamp_difference = int(round(timestamp_difference, 0))
-        if timestamp_difference < 1:
-            vb.write(message="\n-----> New snapshot created")
-        elif timestamp_difference > 1:
-            vb.write(message=f"\n-----> Snapshot already exists. (1 hour limit) - wait for {60 - timestamp_difference} minutes")
-            vb.write(message=f"TIMESTAMP SNAPSHOT: {snapshot_timestamp}")
-            vb.write(message=f"TIMESTAMP REQUEST : {current_timestamp}")
-            vb.write(message=f"\nLAST SNAPSHOT BACK: {timestamp_difference} minutes")
-        vb.write(message=f"\nURL: {location}")
-    elif response_status == 404:
-        vb.write(message="\n-----> Response: 404 (not found)")
-        vb.write(message=f"\nFAILED -> URL: {url}")
-    else:
-        vb.write(message="\n-----> Response: unexpected")
-        vb.write(message=f"\nFAILED -> URL: {url}")
+from pywaybackup.db import Database
+from pywaybackup.helper import check_nt, move_index, url_get_timestamp
-    connection.close()
@@ -89,7 +31,7 @@ def startup():
         vb.write(message=f"\n<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>")
         if Database.QUERY_EXIST:
-            vb.write(message=f"\nExisting query snapshots processed: {Database.QUERY_PROGRESS}\nResuming download... (to reset the job use '--reset')\n")
+            vb.write(message=f"\nDOWNLOAD job exist - processed: {Database.QUERY_PROGRESS}\nResuming download... (to reset the job use '--reset')\n")
             for i in range(5, -1, -1):
                 vb.write(message=f"\r{i}...")
@@ -224,8 +166,6 @@ def download_loop(output, worker, retry, no_redirect, delay):
             snapshot = sc.get_snapshot(db)
             if not snapshot: break
-            # mark as locked for other workers // only visual because get_snapshot fetches by NULL
-            sc.modify_snapshot(db, snapshot["rowid"], "response", "LOCK")
             SNAPSHOT_CURRENT = snapshot["rowid"]
             retry_attempt = 1
@@ -273,18 +213,18 @@ def download_loop(output, worker, retry, no_redirect, delay):
                     # depends on user - retries after timeout or proceed to next snapshot
                     if retry > 0:
-                        status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed - retry Timeout: 15 seconds...")
+                        status_message.store(status="FAILED", message="retry timeout: 15 seconds...")
                         status_message.write()
                         time.sleep(15)
                     else:
-                        status_message.store(message=f"\n-----> Worker: {worker} - Attempt: [{retry_attempt}/{retry_max_attempt}] Snapshot ID: [{SNAPSHOT_CURRENT}/{sc.SNAPSHOT_TOTAL}] - Download failed")
+                        status_message.store(status="FAILED", message="no attempt left")
                         status_message.write()
                     sc.SNAPSHOT_HANDLED += 1
                     break # break all loops and do a user-defined retry
                 retry_attempt += 1
                 # if retry_attempt > retry_max_attempt:
-                #     status_message.store(status="FAILED", type="HTTP", message="Max retries exceeded")
+                #     status_message.store(status="FAILED", message="Max retries exceeded")
                 #     status_message.store(status="", type="URL", message=snapshot["url_archive"])
                 #     status_message.write()
                 #     vb.progress(1)
@@ -309,7 +249,7 @@ def download(db, output, snapshot_entry, connection, status_message, no_redirect
     response, response_data, response_status, response_status_message = download_response(connection, encoded_download_url, headers)
     sc.modify_snapshot(db, snapshot_entry["rowid"], "response", response_status)
     if not no_redirect and response_status == 302:
-        status_message.store(status="REDIRECT", type="HTTP", message=f"{response.status} - {response_status_message}")
+        status_message.store(status="REDIRECT", message=f"{response.status} - {response_status_message}")
         status_message.store(status="", type="FROM", message=download_url)
         for _ in range(5):
             response, response_data, response_status, response_status_message = download_response(connection, encoded_download_url, headers)
@@ -327,7 +267,7 @@ def download(db, output, snapshot_entry, connection, status_message, no_redirect
         # if output_file is too long for windows, skip download
         if check_nt() and len(output_file) > 255:
-            status_message.store(status="PATH > 255", type="HTTP", message=f"{response.status} - {response_status_message}")
+            status_message.store(status="PATH > 255", message=f"{response.status} - {response_status_message}")
             status_message.store(status="", type="URL", message=download_url)
             sc.entry_modify(snapshot_entry, "file", "PATH TOO LONG TO SAVE FILE")
             #status_message.write()
@@ -348,9 +288,9 @@ def download(db, output, snapshot_entry, connection, status_message, no_redirect
                 file.write(response_data)
             # check if file is downloaded
             if os.path.isfile(output_file):
-                status_message.store(status="SUCCESS", type="HTTP", message=f"{response.status} - {response_status_message}")
+                status_message.store(status="SUCCESS", message=f"{response.status} - {response_status_message}")
         else:
-            status_message.store(status="EXISTING", type="HTTP", message=f"{response.status} - {response_status_message}")
+            status_message.store(status="EXISTING", message=f"{response.status} - {response_status_message}")
         status_message.store(status="", type="URL", message=download_url)
         status_message.store(status="", type="FILE", message=output_file)
         sc.modify_snapshot(db, snapshot_entry["rowid"], "file", output_file)
@@ -359,7 +299,7 @@ def download(db, output, snapshot_entry, connection, status_message, no_redirect
         #status_message.write()
         return True
     else:
-        status_message.store(status="UNEXPECTED", type="HTTP", message=f"{response.status} - {response_status_message}")
+        status_message.store(status="UNEXPECTED", message=f"{response.status} - {response_status_message}")
         status_message.store(status="", type="URL", message=download_url)
         #status_message.write()
         return False
@@ -375,7 +315,7 @@ def download_response(connection, encoded_download_url, headers):
 RESPONSE_CODE_DICT = {
     200: "OK",
     301: "Moved Permanently",
-    302: "Found (redirect)",
+    302: "Redirect",
     400: "Bad Request",
     403: "Forbidden",
     404: "Not Found",

pywaybackup-3.1.0/pywaybackup/archive_save.py ADDED Viewed

@@ -0,0 +1,81 @@
+import http.client
+from datetime import datetime, timezone
+from importlib.metadata import version
+from pywaybackup.helper import url_get_timestamp
+from pywaybackup.Verbosity import Verbosity as vb
+# def startup():
+#     try:
+#         vb.write(message=f"\n<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>")
+#         if Database.QUERY_EXIST:
+#             vb.write(message=f"\nSAVE job exist - processed {Database.QUERY_PROGRESS}\nResuming save... (to reset the job use '--reset')\n")
+#             for i in range(5, -1, -1):
+#                 vb.write(message=f"\r{i}...")
+#                 print("\033[F", end="")
+#                 print("\033[K", end="")
+#                 time.sleep(1)
+#             #vb.write(message="\n")
+#     except KeyboardInterrupt:
+#         os._exit(1)
+# GET: store page to wayback machine and response with redirect to snapshot
+# POST: store page to wayback machine and response with wayback machine status-page
+# tag_jobid = '<script>spn.watchJob("spn2-%s", "/_static/",6000);</script>'
+# tag_result_timeout = '<p>The same snapshot had been made %s minutes ago. You can make new capture of this URL after 1 hour.</p>'
+# tag_result_success = ' A snapshot was captured. Visit page: <a href="%s">%s</a>'
+def save_page(url: str):
+    """
+    Saves a webpage to the Wayback Machine.
+    Args:
+        url (str): The URL of the webpage to be saved.
+    Returns:
+        None: The function does not return any value. It only prints messages to the console.
+    """
+    try:
+        connection = http.client.HTTPSConnection("web.archive.org")
+        headers = {"User-Agent": f"bitdruid-python-wayback-downloader/{version('pywaybackup')}"}
+        vb.write(message="\nSaving page to the Wayback Machine...")
+        connection.request("GET", f"https://web.archive.org/save/{url}", headers=headers)
+        vb.write(message=f"\n-----> Request sent -> URL: {url}")
+        response = connection.getresponse()
+        response_status = response.status
+        if response_status == 302:
+            location = response.getheader("Location")
+            snapshot_timestamp = datetime.strptime(url_get_timestamp(location), '%Y%m%d%H%M%S').strftime('%Y-%m-%d %H:%M:%S')
+            current_timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
+            timestamp_difference = (datetime.strptime(current_timestamp, '%Y-%m-%d %H:%M:%S') - datetime.strptime(snapshot_timestamp, '%Y-%m-%d %H:%M:%S')).seconds / 60
+            timestamp_difference = int(round(timestamp_difference, 0))
+            if timestamp_difference < 1:
+                vb.write(message="\n-----> Response: 302 (new snapshot)")
+                vb.write(status="SNAPSHOT", type="URL", message=f"{location}")
+            elif timestamp_difference >= 1:
+                vb.write(message=f"\n-----> Response: 302 (existing snapshot - wait for {60 - timestamp_difference} minutes)")
+                vb.write(status="SNAPSHOT", type="URL", message=f"{location}")
+                vb.write(status="WAYBACK", type="TIME", message=f"{snapshot_timestamp}")
+                vb.write(status="REQUEST", type="TIME", message=f"{current_timestamp}")
+        elif response_status == 429:
+            vb.write(message="\n-----> Response: 429 (too many requests)")
+            vb.write(message="- no simultaneous allowed")
+            vb.write(message="- 15 per 5 minutes\n")
+        elif response_status == 520:
+            vb.write(message="\n-----> Response: 520 (job failed)")
+        elif response_status == 404:
+            vb.write(message="\n-----> Response: 404 (not found)")
+        else:
+            vb.write(message=f"\n-----> Response: {response_status} - UNHANDLED")
+        connection.close()
+    except ConnectionRefusedError:
+        vb.write(message="\nCONNECTION REFUSED -> could not connect to wayback machine")

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup/db.py RENAMED Viewed

@@ -1,4 +1,4 @@
-import sqlite3
+import pysqlite3 as sqlite3
 class Database:
@@ -40,7 +40,6 @@ class Database:
         db.cursor.execute(cls.snapshot_table)
         db.cursor.execute("SELECT query_identifier FROM waybackup_table WHERE query_identifier = ?", (query_identifier,))
         if db.cursor.fetchone():
-            print("found")
             cls.QUERY_EXIST = True
             cls.QUERY_PROGRESS = db.get_progress()
         else:

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup/helper.py RENAMED Viewed

@@ -1,4 +1,3 @@
 import os
 import shutil
 import magic
@@ -15,12 +14,13 @@ def sanitize_filename(input: str) -> str:
     """
     Sanitize a string to be used as (part of) a filename.
     """
-    disallowed = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
+    disallowed = ["<", ">", ":", '"', "/", "\\", "|", "?", "*"]
     for char in disallowed:
         input = input.replace(char, ".")
-    input = '.'.join(filter(None, input.split('.')))
+    input = ".".join(filter(None, input.split(".")))
     return input
 def sanitize_url(input: str) -> str:
     """
     Sanitize a url by encoding special characters.
@@ -32,12 +32,13 @@ def sanitize_url(input: str) -> str:
 def url_get_timestamp(url):
-        """
-        Extract the timestamp from a wayback machine URL.
-        """
-        timestamp = url.split("web/")[1].split("/")[0]
-        if "id_" in url: timestamp = timestamp.split("id_")[0]
-        return timestamp
+    """
+    Extract the timestamp from a wayback machine URL.
+    """
+    timestamp = url.split("web/")[1].split("/")[0]
+    if "id_" in url:
+        timestamp = timestamp.split("id_")[0]
+    return timestamp
 def url_split(url, index=False):
@@ -52,8 +53,8 @@ def url_split(url, index=False):
     if "://" in url:
         url = url.split("://")[1]
     domain = url.split("/")[0]
-    path = url[len(domain):]
-    domain = domain.split("@")[-1].split(":")[0] # remove mailto and port
+    path = url[len(domain) :]
+    domain = domain.split("@")[-1].split(":")[0]  # remove mailto and port
     path_parts = path.split("/")
     path_end = path_parts[-1]
     if not url.endswith("/") or "." in path_end:
@@ -87,21 +88,25 @@ def move_index(existpath: str = None, existfile: str = None, filebuffer: bytes =
         shutil.move(existpath, existpath + "_exist")
         os.makedirs(existpath, exist_ok=True)
         if not check_index_mime(existpath):
-            new_file = os.path.join(existpath, os.path.basename(os.path.normpath(existpath)))
+            new_file = os.path.join(
+                existpath, os.path.basename(os.path.normpath(existpath))
+            )
         else:
             new_file = os.path.join(existpath, "index.html")
         shutil.move(existpath + "_exist", new_file)
     elif existfile:
         if filebuffer:
             if not check_index_mime(filebuffer):
-                return os.path.join(existfile, os.path.basename(os.path.normpath(existfile)))
+                return os.path.join(
+                    existfile, os.path.basename(os.path.normpath(existfile))
+                )
             else:
                 return os.path.join(existfile, "index.html")
 def check_index_mime(filebuffer: bytes) -> bool:
     mime = magic.Magic(mime=True)
     mime_type = mime.from_buffer(filebuffer)
     if mime_type != "text/html":
         return False
-    return True
+    return True

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup/main.py RENAMED Viewed

@@ -1,11 +1,10 @@
 import os
 import signal
-import pywaybackup.archive as archive
+import pywaybackup.archive_download as archive_download
+import pywaybackup.archive_save as archive_save
 from pywaybackup.SnapshotCollection import SnapshotCollection as sc
 from pywaybackup.Arguments import Configuration as config
 from pywaybackup.db import Database as db
 from pywaybackup.Verbosity import Verbosity as vb
@@ -16,19 +15,22 @@ def main():
     config.init()
     ex.init(config.output, config.command)
     vb.init(config.progress, config.log)
+    if config.save:
+        archive_save.save_page(config.url)
+        os._exit(1)
     db.init(config.dbfile, config.query_identifier)
     sc.init(config.mode)
-    if config.save:
-        archive.save_page(config.url)
-    else:
+    if not config.save:
-        archive.startup()
+        archive_download.startup()
         try:
-            archive.query_list(config.csvfile, config.cdxfile, config.range, config.limit, config.start, config.end, config.explicit, config.filetype)
-            archive.download_list(config.output, config.retry, config.no_redirect, config.delay, config.workers)
+            archive_download.query_list(config.csvfile, config.cdxfile, config.range, config.limit, config.start, config.end, config.explicit, config.filetype)
+            archive_download.download_list(config.output, config.retry, config.no_redirect, config.delay, config.workers)
         except KeyboardInterrupt:
             print("\nInterrupted by user\n")
             config.keep = True
@@ -44,8 +46,8 @@ def main():
             vb.fini()
             if not config.keep:
-                os.remove(config.dbfile)
-                os.remove(config.cdxfile)
+                os.remove(config.dbfile) if os.path.exists(config.dbfile) else None
+                os.remove(config.cdxfile) if os.path.exists(config.cdxfile) else None
             os._exit(1)

{pywaybackup-3.0.4 → pywaybackup-3.1.0/pywaybackup.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pywaybackup
-Version: 3.0.4
+Version: 3.1.0
 Summary: Query and download archive.org as simple as possible.
 Author-email: bitdruid <bitdruid@outlook.com>
 License: MIT License
@@ -29,6 +29,7 @@ Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downlo
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: pysqlite3-binary==0.5.4
 Requires-Dist: requests==2.31.0
 Requires-Dist: tqdm==4.66.2
 Requires-Dist: python-magic==0.4.27; sys_platform == "linux"
@@ -39,7 +40,7 @@ Requires-Dist: python-magic-bin==0.4.14; sys_platform == "win32"
 [![PyPI](https://img.shields.io/pypi/v/pywaybackup)](https://pypi.org/project/pywaybackup/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/pywaybackup)](https://pypi.org/project/pywaybackup/)
 ![Python Version](https://img.shields.io/badge/Python-3.8-blue)
-![Python_Sqlite3 Version](https://img.shields.io/badge/Python_Sqlite3-3.25-blue)
+<!-- ![Python_Sqlite3 Version](https://img.shields.io/badge/Python_Sqlite3-3.35-blue) -->
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).

{pywaybackup-3.0.4 → pywaybackup-3.1.0}/pywaybackup.egg-info/SOURCES.txt RENAMED Viewed

@@ -7,7 +7,8 @@ pywaybackup/Exception.py
 pywaybackup/SnapshotCollection.py
 pywaybackup/Verbosity.py
 pywaybackup/__init__.py
-pywaybackup/archive.py
+pywaybackup/archive_download.py
+pywaybackup/archive_save.py
 pywaybackup/db.py
 pywaybackup/helper.py
 pywaybackup/main.py