pywaybackup 3.4.1__tar.gz → 4.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pywaybackup-3.4.1/pywaybackup.egg-info → pywaybackup-4.0.0}/PKG-INFO +43 -3
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/README.md +42 -2
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pyproject.toml +1 -1
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/Arguments.py +3 -2
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/Exception.py +7 -7
- pywaybackup-4.0.0/pywaybackup/PyWayBackup.py +499 -0
- pywaybackup-4.0.0/pywaybackup/Snapshot.py +129 -0
- pywaybackup-4.0.0/pywaybackup/SnapshotCollection.py +315 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/Verbosity.py +7 -7
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/Worker.py +11 -73
- pywaybackup-4.0.0/pywaybackup/archive_download.py +248 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/archive_save.py +1 -18
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/db.py +15 -6
- pywaybackup-4.0.0/pywaybackup/files.py +179 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/main.py +2 -2
- {pywaybackup-3.4.1 → pywaybackup-4.0.0/pywaybackup.egg-info}/PKG-INFO +43 -3
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup.egg-info/SOURCES.txt +2 -1
- pywaybackup-3.4.1/pywaybackup/Converter.py +0 -181
- pywaybackup-3.4.1/pywaybackup/PyWayBackup.py +0 -234
- pywaybackup-3.4.1/pywaybackup/SnapshotCollection.py +0 -416
- pywaybackup-3.4.1/pywaybackup/archive_download.py +0 -358
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/LICENSE +0 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/__init__.py +0 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup/helper.py +0 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup.egg-info/dependency_links.txt +0 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup.egg-info/entry_points.txt +0 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup.egg-info/requires.txt +0 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/pywaybackup.egg-info/top_level.txt +0 -0
- {pywaybackup-3.4.1 → pywaybackup-4.0.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pywaybackup
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0
|
|
4
4
|
Summary: Query and download archive.org as simple as possible.
|
|
5
5
|
Author-email: bitdruid <bitdruid@outlook.com>
|
|
6
6
|
License: MIT License
|
|
@@ -49,6 +49,17 @@ Internet-archive is a nice source for several OSINT-information. This tool is a
|
|
|
49
49
|
|
|
50
50
|
This tool allows you to download content from the Wayback Machine (archive.org). You can use it to download either the latest version or all versions of web page snapshots within a specified range.
|
|
51
51
|
|
|
52
|
+
# Content
|
|
53
|
+
|
|
54
|
+
➡️ [Installation](#installation) <br>
|
|
55
|
+
➡️ [notes / issues / hints](#notes--issues--hints) <br>
|
|
56
|
+
➡️ [import](#import) <br>
|
|
57
|
+
➡️ [cli](#cli) <br>
|
|
58
|
+
➡️ [Usage](#usage) <br>
|
|
59
|
+
➡️ [Examples](#examples) <br>
|
|
60
|
+
➡️ [Output](#output) <br>
|
|
61
|
+
➡️ [Contributing](#contributing) <br>
|
|
62
|
+
|
|
52
63
|
## Installation
|
|
53
64
|
|
|
54
65
|
### Pip
|
|
@@ -81,8 +92,14 @@ This tool allows you to download content from the Wayback Machine (archive.org).
|
|
|
81
92
|
You can import pywaybackup into your own scripts and run it. Args are the same as cli.
|
|
82
93
|
|
|
83
94
|
Additional args:
|
|
84
|
-
- `silent` (default
|
|
85
|
-
- `debug` (default
|
|
95
|
+
- `silent` (default False): If True, suppresses all output to the console.
|
|
96
|
+
- `debug` (default True): If False, disables writing errors to the error log file.
|
|
97
|
+
|
|
98
|
+
Use:
|
|
99
|
+
- `run()`
|
|
100
|
+
- `status()`
|
|
101
|
+
- `paths()`
|
|
102
|
+
- `stop()`
|
|
86
103
|
|
|
87
104
|
```python
|
|
88
105
|
from pywaybackup import PyWayBackup
|
|
@@ -114,6 +131,29 @@ output:
|
|
|
114
131
|
}
|
|
115
132
|
```
|
|
116
133
|
|
|
134
|
+
... or run it asynchronously and print the current status or stop it whenever needed.
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
import time
|
|
138
|
+
from pywaybackup import PyWayBackup
|
|
139
|
+
|
|
140
|
+
backup = PyWayBackup( ... )
|
|
141
|
+
backup.run(daemon=True)
|
|
142
|
+
print(backup.status())
|
|
143
|
+
time.sleep(10)
|
|
144
|
+
print(backup.status())
|
|
145
|
+
backup.stop()
|
|
146
|
+
```
|
|
147
|
+
output:
|
|
148
|
+
```bash
|
|
149
|
+
{
|
|
150
|
+
'task': 'downloading snapshots',
|
|
151
|
+
'current': 15,
|
|
152
|
+
'total': 84,
|
|
153
|
+
'progress': '18%'
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
117
157
|
## cli
|
|
118
158
|
|
|
119
159
|
- `-h`, `--help`: Show the help message and exit.
|
|
@@ -11,6 +11,17 @@ Internet-archive is a nice source for several OSINT-information. This tool is a
|
|
|
11
11
|
|
|
12
12
|
This tool allows you to download content from the Wayback Machine (archive.org). You can use it to download either the latest version or all versions of web page snapshots within a specified range.
|
|
13
13
|
|
|
14
|
+
# Content
|
|
15
|
+
|
|
16
|
+
➡️ [Installation](#installation) <br>
|
|
17
|
+
➡️ [notes / issues / hints](#notes--issues--hints) <br>
|
|
18
|
+
➡️ [import](#import) <br>
|
|
19
|
+
➡️ [cli](#cli) <br>
|
|
20
|
+
➡️ [Usage](#usage) <br>
|
|
21
|
+
➡️ [Examples](#examples) <br>
|
|
22
|
+
➡️ [Output](#output) <br>
|
|
23
|
+
➡️ [Contributing](#contributing) <br>
|
|
24
|
+
|
|
14
25
|
## Installation
|
|
15
26
|
|
|
16
27
|
### Pip
|
|
@@ -43,8 +54,14 @@ This tool allows you to download content from the Wayback Machine (archive.org).
|
|
|
43
54
|
You can import pywaybackup into your own scripts and run it. Args are the same as cli.
|
|
44
55
|
|
|
45
56
|
Additional args:
|
|
46
|
-
- `silent` (default
|
|
47
|
-
- `debug` (default
|
|
57
|
+
- `silent` (default False): If True, suppresses all output to the console.
|
|
58
|
+
- `debug` (default True): If False, disables writing errors to the error log file.
|
|
59
|
+
|
|
60
|
+
Use:
|
|
61
|
+
- `run()`
|
|
62
|
+
- `status()`
|
|
63
|
+
- `paths()`
|
|
64
|
+
- `stop()`
|
|
48
65
|
|
|
49
66
|
```python
|
|
50
67
|
from pywaybackup import PyWayBackup
|
|
@@ -76,6 +93,29 @@ output:
|
|
|
76
93
|
}
|
|
77
94
|
```
|
|
78
95
|
|
|
96
|
+
... or run it asynchronously and print the current status or stop it whenever needed.
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
import time
|
|
100
|
+
from pywaybackup import PyWayBackup
|
|
101
|
+
|
|
102
|
+
backup = PyWayBackup( ... )
|
|
103
|
+
backup.run(daemon=True)
|
|
104
|
+
print(backup.status())
|
|
105
|
+
time.sleep(10)
|
|
106
|
+
print(backup.status())
|
|
107
|
+
backup.stop()
|
|
108
|
+
```
|
|
109
|
+
output:
|
|
110
|
+
```bash
|
|
111
|
+
{
|
|
112
|
+
'task': 'downloading snapshots',
|
|
113
|
+
'current': 15,
|
|
114
|
+
'total': 84,
|
|
115
|
+
'progress': '18%'
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
79
119
|
## cli
|
|
80
120
|
|
|
81
121
|
- `-h`, `--help`: Show the help message and exit.
|
|
@@ -24,8 +24,8 @@ class Arguments:
|
|
|
24
24
|
optional = parser.add_argument_group("optional query parameters")
|
|
25
25
|
optional.add_argument("-e", "--explicit", action="store_true", help="search only for the explicit given url")
|
|
26
26
|
optional.add_argument("-r", "--range", type=int, metavar="", help="range in years to search")
|
|
27
|
-
optional.add_argument("--start", type=int, metavar="", help="start timestamp format:
|
|
28
|
-
optional.add_argument("--end", type=int, metavar="", help="end timestamp format:
|
|
27
|
+
optional.add_argument("--start", type=int, metavar="", help="start timestamp format: YYYYMMDDHHMMSS")
|
|
28
|
+
optional.add_argument("--end", type=int, metavar="", help="end timestamp format: YYYYMMDDHHMMSS")
|
|
29
29
|
optional.add_argument("--limit", type=int, nargs="?", const=True, metavar="int", help="limit the number of snapshots to download")
|
|
30
30
|
optional.add_argument("--filetype", type=str, metavar="", help="filetypes to download comma separated (js,css,...)")
|
|
31
31
|
optional.add_argument("--statuscode", type=str, metavar="", help="statuscodes to download comma separated (200,404,...)")
|
|
@@ -55,3 +55,4 @@ class Arguments:
|
|
|
55
55
|
def get_args(self) -> dict:
|
|
56
56
|
"""Returns the parsed arguments as a dictionary."""
|
|
57
57
|
return vars(self.args)
|
|
58
|
+
|
|
@@ -14,9 +14,9 @@ class Exception:
|
|
|
14
14
|
command = None
|
|
15
15
|
|
|
16
16
|
@classmethod
|
|
17
|
-
def init(cls,
|
|
17
|
+
def init(cls, debugfile=None, output=None, command=None):
|
|
18
18
|
sys.excepthook = cls.exception_handler # set custom exception handler (uncaught exceptions)
|
|
19
|
-
cls.
|
|
19
|
+
cls.debugfile = debugfile
|
|
20
20
|
cls.output = output
|
|
21
21
|
cls.command = command
|
|
22
22
|
|
|
@@ -45,18 +45,18 @@ class Exception:
|
|
|
45
45
|
exception_message += "!-- Traceback is None\n"
|
|
46
46
|
exception_message += f"!-- Description: {e}\n-------------------------"
|
|
47
47
|
print(exception_message)
|
|
48
|
-
if cls.
|
|
49
|
-
print(f"Exception log: {cls.
|
|
48
|
+
if cls.debugfile:
|
|
49
|
+
print(f"Exception log: {cls.debugfile}")
|
|
50
50
|
if cls.new_debug: # new run, overwrite file
|
|
51
51
|
cls.new_debug = False
|
|
52
|
-
f = open(cls.
|
|
52
|
+
f = open(cls.debugfile, "w", encoding="utf-8")
|
|
53
53
|
f.write("-------------------------\n")
|
|
54
54
|
f.write(f"Version: {version('pywaybackup')}\n")
|
|
55
55
|
f.write("-------------------------\n")
|
|
56
56
|
f.write(f"Command: {cls.command}\n")
|
|
57
57
|
f.write("-------------------------\n\n")
|
|
58
58
|
else: # current run, append to file
|
|
59
|
-
f = open(cls.
|
|
59
|
+
f = open(cls.debugfile, "a", encoding="utf-8")
|
|
60
60
|
f.write(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n")
|
|
61
61
|
f.write(exception_message + "\n")
|
|
62
62
|
f.write("!-- Local Variables:\n")
|
|
@@ -96,4 +96,4 @@ class Exception:
|
|
|
96
96
|
if issubclass(exception_type, KeyboardInterrupt):
|
|
97
97
|
sys.__excepthook__(exception_type, exception, traceback)
|
|
98
98
|
return
|
|
99
|
-
Exception.exception(
|
|
99
|
+
Exception.exception('UNCAUGHT EXCEPTION', exception, traceback) # uncaught exceptions also with custom scheme
|