pywaybackup 4.1.6__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pywaybackup-4.1.6/pywaybackup.egg-info → pywaybackup-4.2.0}/PKG-INFO +17 -7
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/README.md +14 -4
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pyproject.toml +3 -3
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/PyWayBackup.py +1 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/SnapshotCollection.py +21 -16
- pywaybackup-4.2.0/pywaybackup/arg_parser.py +55 -0
- pywaybackup-4.2.0/pywaybackup/arg_specs.py +257 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/db.py +18 -5
- pywaybackup-4.2.0/pywaybackup/interactive.py +144 -0
- pywaybackup-4.2.0/pywaybackup/main.py +32 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0/pywaybackup.egg-info}/PKG-INFO +17 -7
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/SOURCES.txt +3 -1
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/requires.txt +1 -1
- pywaybackup-4.1.6/pywaybackup/Arguments.py +0 -71
- pywaybackup-4.1.6/pywaybackup/main.py +0 -13
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/LICENSE +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Exception.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Snapshot.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Verbosity.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Worker.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/__init__.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/archive_download.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/archive_save.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/files.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/helper.py +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/dependency_links.txt +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/entry_points.txt +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/top_level.txt +0 -0
- {pywaybackup-4.1.6 → pywaybackup-4.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pywaybackup
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Query and download archive.org as simple as possible.
|
|
5
5
|
Author-email: bitdruid <bitdruid@outlook.com>
|
|
6
6
|
License: MIT License
|
|
@@ -26,11 +26,11 @@ License: MIT License
|
|
|
26
26
|
SOFTWARE.
|
|
27
27
|
|
|
28
28
|
Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downloader
|
|
29
|
-
Requires-Python:
|
|
29
|
+
Requires-Python: >=3.8
|
|
30
30
|
Description-Content-Type: text/markdown
|
|
31
31
|
License-File: LICENSE
|
|
32
32
|
Requires-Dist: ruff
|
|
33
|
-
Requires-Dist: SQLAlchemy==2.0.
|
|
33
|
+
Requires-Dist: SQLAlchemy==2.0.51
|
|
34
34
|
Requires-Dist: requests==2.32.3
|
|
35
35
|
Requires-Dist: tqdm==4.67.1
|
|
36
36
|
Requires-Dist: python-magic-standalone==0.4.28
|
|
@@ -39,7 +39,7 @@ Requires-Dist: python-magic-standalone==0.4.28
|
|
|
39
39
|
|
|
40
40
|
[](https://pypi.org/project/pywaybackup/)
|
|
41
41
|
[](https://pypi.org/project/pywaybackup/)
|
|
42
|
-

|
|
42
|
+

|
|
43
43
|
[](https://opensource.org/licenses/MIT)
|
|
44
44
|
|
|
45
45
|
Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).
|
|
@@ -68,6 +68,13 @@ This tool allows you to download content from the Wayback Machine (archive.org).
|
|
|
68
68
|
2. Run the tool <br>
|
|
69
69
|
`waybackup -h`
|
|
70
70
|
|
|
71
|
+
### Standalone binary
|
|
72
|
+
|
|
73
|
+
Prebuilt executables for Windows, Linux and macOS are attached to each [release](https://github.com/bitdruid/python-wayback-machine-downloader/releases). No Python required.
|
|
74
|
+
|
|
75
|
+
- Run from a terminal with arguments like the pip version: `waybackup -h`
|
|
76
|
+
- Or start it without arguments (e.g. double-click on Windows) to enter **interactive mode** — the tool will prompt you for URL, mode and optional settings.
|
|
77
|
+
|
|
71
78
|
### Manual
|
|
72
79
|
|
|
73
80
|
1. Clone the repository <br>
|
|
@@ -155,8 +162,9 @@ output:
|
|
|
155
162
|
|
|
156
163
|
## cli
|
|
157
164
|
|
|
158
|
-
- `-h`, `--help`: Show the help message and exit.
|
|
159
|
-
|
|
165
|
+
- `-h`, `--help`: Show the help message and exit. Version info is shown in the help header.
|
|
166
|
+
|
|
167
|
+
> **Interactive mode:** running `waybackup` without any arguments in a terminal starts a guided prompt for URL, mode and optional settings. Without a terminal (scripts/cron), the help is printed instead.
|
|
160
168
|
|
|
161
169
|
#### Required
|
|
162
170
|
|
|
@@ -171,6 +179,8 @@ output:
|
|
|
171
179
|
Last Version. Gives one folder containing the last version of each file of specified `--range`.
|
|
172
180
|
- **`-f`**, **`--first`**:<br>
|
|
173
181
|
First Version. Gives one folder containing the first version of each file of specified `--range`.
|
|
182
|
+
- **`-s`**, **`--save`**:<br>
|
|
183
|
+
Save a page to the wayback machine (no download).
|
|
174
184
|
|
|
175
185
|
#### Optional query parameters
|
|
176
186
|
|
|
@@ -219,7 +229,7 @@ Parameters will change the download behavior for snapshots.
|
|
|
219
229
|
Set verbosity level. Available levels:
|
|
220
230
|
- `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
|
|
221
231
|
- `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
|
|
222
|
-
- `high` (or `
|
|
232
|
+
- `high` (or `detailed`, `max`): Detailed verbose output
|
|
223
233
|
|
|
224
234
|
Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
|
|
225
235
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://pypi.org/project/pywaybackup/)
|
|
4
4
|
[](https://pypi.org/project/pywaybackup/)
|
|
5
|
-

|
|
5
|
+

|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
|
|
8
8
|
Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).
|
|
@@ -31,6 +31,13 @@ This tool allows you to download content from the Wayback Machine (archive.org).
|
|
|
31
31
|
2. Run the tool <br>
|
|
32
32
|
`waybackup -h`
|
|
33
33
|
|
|
34
|
+
### Standalone binary
|
|
35
|
+
|
|
36
|
+
Prebuilt executables for Windows, Linux and macOS are attached to each [release](https://github.com/bitdruid/python-wayback-machine-downloader/releases). No Python required.
|
|
37
|
+
|
|
38
|
+
- Run from a terminal with arguments like the pip version: `waybackup -h`
|
|
39
|
+
- Or start it without arguments (e.g. double-click on Windows) to enter **interactive mode** — the tool will prompt you for URL, mode and optional settings.
|
|
40
|
+
|
|
34
41
|
### Manual
|
|
35
42
|
|
|
36
43
|
1. Clone the repository <br>
|
|
@@ -118,8 +125,9 @@ output:
|
|
|
118
125
|
|
|
119
126
|
## cli
|
|
120
127
|
|
|
121
|
-
- `-h`, `--help`: Show the help message and exit.
|
|
122
|
-
|
|
128
|
+
- `-h`, `--help`: Show the help message and exit. Version info is shown in the help header.
|
|
129
|
+
|
|
130
|
+
> **Interactive mode:** running `waybackup` without any arguments in a terminal starts a guided prompt for URL, mode and optional settings. Without a terminal (scripts/cron), the help is printed instead.
|
|
123
131
|
|
|
124
132
|
#### Required
|
|
125
133
|
|
|
@@ -134,6 +142,8 @@ output:
|
|
|
134
142
|
Last Version. Gives one folder containing the last version of each file of specified `--range`.
|
|
135
143
|
- **`-f`**, **`--first`**:<br>
|
|
136
144
|
First Version. Gives one folder containing the first version of each file of specified `--range`.
|
|
145
|
+
- **`-s`**, **`--save`**:<br>
|
|
146
|
+
Save a page to the wayback machine (no download).
|
|
137
147
|
|
|
138
148
|
#### Optional query parameters
|
|
139
149
|
|
|
@@ -182,7 +192,7 @@ Parameters will change the download behavior for snapshots.
|
|
|
182
192
|
Set verbosity level. Available levels:
|
|
183
193
|
- `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
|
|
184
194
|
- `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
|
|
185
|
-
- `high` (or `
|
|
195
|
+
- `high` (or `detailed`, `max`): Detailed verbose output
|
|
186
196
|
|
|
187
197
|
Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
|
|
188
198
|
|
|
@@ -10,18 +10,18 @@ packages = ["pywaybackup"]
|
|
|
10
10
|
[project]
|
|
11
11
|
dependencies = [
|
|
12
12
|
"ruff",
|
|
13
|
-
"SQLAlchemy==2.0.
|
|
13
|
+
"SQLAlchemy==2.0.51",
|
|
14
14
|
"requests==2.32.3",
|
|
15
15
|
"tqdm==4.67.1",
|
|
16
16
|
"python-magic-standalone==0.4.28",
|
|
17
17
|
]
|
|
18
18
|
name = "pywaybackup"
|
|
19
|
-
version = "4.
|
|
19
|
+
version = "4.2.0"
|
|
20
20
|
description = "Query and download archive.org as simple as possible."
|
|
21
21
|
authors = [{ name = "bitdruid", email = "bitdruid@outlook.com" }]
|
|
22
22
|
license = { file = "LICENSE" }
|
|
23
23
|
readme = "README.md"
|
|
24
|
-
requires-python = ">=3.8
|
|
24
|
+
requires-python = ">=3.8"
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
[project.scripts]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
|
|
3
|
-
from pywaybackup.db import Database,
|
|
3
|
+
from pywaybackup.db import Database, and_, delete, func, or_, select, text, tuple_, update, waybackup_snapshots
|
|
4
4
|
from pywaybackup.files import CDXfile, CSVfile
|
|
5
5
|
from pywaybackup.Verbosity import Progressbar
|
|
6
6
|
from pywaybackup.Verbosity import Verbosity as vb
|
|
@@ -217,30 +217,35 @@ class SnapshotCollection:
|
|
|
217
217
|
def _index_snapshots(self):
|
|
218
218
|
"""
|
|
219
219
|
Create indexes for the snapshot table.
|
|
220
|
+
|
|
221
|
+
Raw DDL instead of sqlalchemy Index objects: Index(...) attaches to the
|
|
222
|
+
module-global table metadata, which accumulates duplicates when the
|
|
223
|
+
package is reused in-process (library usage) and breaks create_all().
|
|
220
224
|
"""
|
|
221
225
|
# index for filtering last snapshots
|
|
222
226
|
if self._mode_last:
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
+
self.db.session.execute(
|
|
228
|
+
text(
|
|
229
|
+
"CREATE INDEX IF NOT EXISTS idx_waybackup_snapshots_url_origin_timestamp_desc "
|
|
230
|
+
"ON waybackup_snapshots (url_origin, timestamp DESC)"
|
|
231
|
+
)
|
|
227
232
|
)
|
|
228
|
-
idx1.create(self.db.session.bind, checkfirst=True)
|
|
229
233
|
# index for filtering first snapshots
|
|
230
234
|
if self._mode_first:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
+
self.db.session.execute(
|
|
236
|
+
text(
|
|
237
|
+
"CREATE INDEX IF NOT EXISTS idx_waybackup_snapshots_url_origin_timestamp_asc "
|
|
238
|
+
"ON waybackup_snapshots (url_origin, timestamp ASC)"
|
|
239
|
+
)
|
|
235
240
|
)
|
|
236
|
-
idx2.create(self.db.session.bind, checkfirst=True)
|
|
237
241
|
# index for skippable snapshots
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
+
self.db.session.execute(
|
|
243
|
+
text(
|
|
244
|
+
"CREATE INDEX IF NOT EXISTS idx_waybackup_snapshots_timestamp_url_origin_response "
|
|
245
|
+
"ON waybackup_snapshots (timestamp, url_origin)"
|
|
246
|
+
)
|
|
242
247
|
)
|
|
243
|
-
|
|
248
|
+
self.db.session.commit()
|
|
244
249
|
|
|
245
250
|
def _filter_snapshots(self):
|
|
246
251
|
"""
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
from argparse import RawTextHelpFormatter
|
|
4
|
+
from importlib.metadata import version
|
|
5
|
+
|
|
6
|
+
from pywaybackup.arg_specs import ARG_GROUPS, ARG_SPECS, EXCLUSIVE_GROUPS
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Arguments:
|
|
10
|
+
def __init__(self):
|
|
11
|
+
parser = argparse.ArgumentParser(
|
|
12
|
+
description=f"<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>\nby @bitdruid -> https://github.com/bitdruid",
|
|
13
|
+
formatter_class=RawTextHelpFormatter,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
groups = {name: parser.add_argument_group(name) for name in ARG_GROUPS}
|
|
17
|
+
|
|
18
|
+
exclusive = {
|
|
19
|
+
ex_name: groups[ex_meta["parent_group"]].add_mutually_exclusive_group(required=ex_meta["required"])
|
|
20
|
+
for ex_name, ex_meta in EXCLUSIVE_GROUPS.items()
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
for spec in ARG_SPECS:
|
|
24
|
+
target = exclusive[spec.exclusive_group] if spec.exclusive_group else groups[spec.group]
|
|
25
|
+
target.add_argument(*spec.flags, **_argparse_kwargs(spec))
|
|
26
|
+
|
|
27
|
+
args = parser.parse_args(args=None if sys.argv[1:] else ["--help"]) # if no arguments are given, print help
|
|
28
|
+
|
|
29
|
+
args.silent = False
|
|
30
|
+
args.debug = True
|
|
31
|
+
|
|
32
|
+
self.args = args
|
|
33
|
+
|
|
34
|
+
def get_args(self) -> dict:
|
|
35
|
+
"""Returns the parsed arguments as a dictionary."""
|
|
36
|
+
return vars(self.args)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _argparse_kwargs(spec) -> dict:
|
|
40
|
+
"""Translate an ArgSpec into kwargs for argparse.add_argument()."""
|
|
41
|
+
kwargs = {"help": spec.help}
|
|
42
|
+
if spec.action == "store_true":
|
|
43
|
+
kwargs["action"] = "store_true"
|
|
44
|
+
kwargs["default"] = bool(spec.default)
|
|
45
|
+
elif spec.action == "optional_value":
|
|
46
|
+
kwargs["type"] = spec.type
|
|
47
|
+
kwargs["nargs"] = "?"
|
|
48
|
+
kwargs["const"] = spec.const
|
|
49
|
+
kwargs["metavar"] = spec.metavar
|
|
50
|
+
kwargs["default"] = spec.default
|
|
51
|
+
else:
|
|
52
|
+
kwargs["type"] = spec.type
|
|
53
|
+
kwargs["metavar"] = spec.metavar
|
|
54
|
+
kwargs["default"] = spec.default
|
|
55
|
+
return kwargs
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Single source of truth for waybackup CLI arguments.
|
|
3
|
+
|
|
4
|
+
Both Arguments (argparse) and Interactive (input prompts) consume this list,
|
|
5
|
+
so a new flag only needs to be added in one place. PyWayBackup.__init__ keeps
|
|
6
|
+
its explicit signature (vscode autocomplete).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, List, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ArgSpec:
|
|
15
|
+
name: str # internal key (matches PyWayBackup.__init__ kwarg)
|
|
16
|
+
flags: List[str] # CLI flags, e.g. ["-u", "--url"]
|
|
17
|
+
group: str # argparse group label
|
|
18
|
+
help: str # CLI help text
|
|
19
|
+
prompt: Optional[str] = None # interactive prompt label (None = skip in interactive)
|
|
20
|
+
type: Optional[type] = None # str / int / None for store_true
|
|
21
|
+
default: Any = None
|
|
22
|
+
action: str = "store" # "store" | "store_true" | "optional_value"
|
|
23
|
+
const: Any = None # used when action="optional_value"
|
|
24
|
+
metavar: str = "" # argparse metavar; ignored for store_true
|
|
25
|
+
exclusive_group: Optional[str] = None # name of a mutex group below
|
|
26
|
+
advanced: bool = False # show in interactive only when advanced opts enabled
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Mutually exclusive groups, keyed by name. Specs join them via exclusive_group=...
|
|
30
|
+
EXCLUSIVE_GROUPS = {
|
|
31
|
+
"mode": {"required": True, "parent_group": "required (one exclusive)"},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# argparse groups in display order
|
|
35
|
+
ARG_GROUPS = [
|
|
36
|
+
"required (one exclusive)",
|
|
37
|
+
"optional query parameters",
|
|
38
|
+
"manipulate behavior",
|
|
39
|
+
"special",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
ARG_SPECS: List[ArgSpec] = [
|
|
44
|
+
# required
|
|
45
|
+
ArgSpec(
|
|
46
|
+
name="url",
|
|
47
|
+
flags=["-u", "--url"],
|
|
48
|
+
group="required (one exclusive)",
|
|
49
|
+
help="url (with subdir/subdomain) to download",
|
|
50
|
+
prompt="URL to download (with subdir/subdomain)",
|
|
51
|
+
type=str,
|
|
52
|
+
),
|
|
53
|
+
ArgSpec(
|
|
54
|
+
name="all",
|
|
55
|
+
flags=["-a", "--all"],
|
|
56
|
+
group="required (one exclusive)",
|
|
57
|
+
help="download snapshots of all timestamps",
|
|
58
|
+
action="store_true",
|
|
59
|
+
default=False,
|
|
60
|
+
exclusive_group="mode",
|
|
61
|
+
),
|
|
62
|
+
ArgSpec(
|
|
63
|
+
name="last",
|
|
64
|
+
flags=["-l", "--last"],
|
|
65
|
+
group="required (one exclusive)",
|
|
66
|
+
help="download the last version of each file snapshot",
|
|
67
|
+
action="store_true",
|
|
68
|
+
default=False,
|
|
69
|
+
exclusive_group="mode",
|
|
70
|
+
),
|
|
71
|
+
ArgSpec(
|
|
72
|
+
name="first",
|
|
73
|
+
flags=["-f", "--first"],
|
|
74
|
+
group="required (one exclusive)",
|
|
75
|
+
help="download the first version of each file snapshot",
|
|
76
|
+
action="store_true",
|
|
77
|
+
default=False,
|
|
78
|
+
exclusive_group="mode",
|
|
79
|
+
),
|
|
80
|
+
ArgSpec(
|
|
81
|
+
name="save",
|
|
82
|
+
flags=["-s", "--save"],
|
|
83
|
+
group="required (one exclusive)",
|
|
84
|
+
help="save a page to the wayback machine",
|
|
85
|
+
action="store_true",
|
|
86
|
+
default=False,
|
|
87
|
+
exclusive_group="mode",
|
|
88
|
+
),
|
|
89
|
+
# -------------------- optional query --------------------
|
|
90
|
+
ArgSpec(
|
|
91
|
+
name="explicit",
|
|
92
|
+
flags=["-e", "--explicit"],
|
|
93
|
+
group="optional query parameters",
|
|
94
|
+
help="search only for the explicit given url",
|
|
95
|
+
action="store_true",
|
|
96
|
+
default=False,
|
|
97
|
+
),
|
|
98
|
+
ArgSpec(
|
|
99
|
+
name="range",
|
|
100
|
+
flags=["-r", "--range"],
|
|
101
|
+
group="optional query parameters",
|
|
102
|
+
help="range in years to search",
|
|
103
|
+
prompt="Range in years to search",
|
|
104
|
+
type=int,
|
|
105
|
+
advanced=True,
|
|
106
|
+
),
|
|
107
|
+
ArgSpec(
|
|
108
|
+
name="start",
|
|
109
|
+
flags=["--start"],
|
|
110
|
+
group="optional query parameters",
|
|
111
|
+
help="start timestamp format: YYYYMMDDHHMMSS",
|
|
112
|
+
type=int,
|
|
113
|
+
),
|
|
114
|
+
ArgSpec(
|
|
115
|
+
name="end",
|
|
116
|
+
flags=["--end"],
|
|
117
|
+
group="optional query parameters",
|
|
118
|
+
help="end timestamp format: YYYYMMDDHHMMSS",
|
|
119
|
+
type=int,
|
|
120
|
+
),
|
|
121
|
+
ArgSpec(
|
|
122
|
+
name="limit",
|
|
123
|
+
flags=["--limit"],
|
|
124
|
+
group="optional query parameters",
|
|
125
|
+
help="limit the number of snapshots to download",
|
|
126
|
+
prompt="Limit number of snapshots",
|
|
127
|
+
type=int,
|
|
128
|
+
action="optional_value",
|
|
129
|
+
const=True,
|
|
130
|
+
metavar="int",
|
|
131
|
+
advanced=True,
|
|
132
|
+
),
|
|
133
|
+
ArgSpec(
|
|
134
|
+
name="filetype",
|
|
135
|
+
flags=["--filetype"],
|
|
136
|
+
group="optional query parameters",
|
|
137
|
+
help="filetypes to download comma separated (js,css,...)",
|
|
138
|
+
type=str,
|
|
139
|
+
),
|
|
140
|
+
ArgSpec(
|
|
141
|
+
name="statuscode",
|
|
142
|
+
flags=["--statuscode"],
|
|
143
|
+
group="optional query parameters",
|
|
144
|
+
help="statuscodes to download comma separated (200,404,...)",
|
|
145
|
+
type=str,
|
|
146
|
+
),
|
|
147
|
+
# behavior
|
|
148
|
+
ArgSpec(
|
|
149
|
+
name="output",
|
|
150
|
+
flags=["-o", "--output"],
|
|
151
|
+
group="manipulate behavior",
|
|
152
|
+
help="output for all files - defaults to current directory",
|
|
153
|
+
prompt="Output directory",
|
|
154
|
+
type=str,
|
|
155
|
+
advanced=True,
|
|
156
|
+
),
|
|
157
|
+
ArgSpec(
|
|
158
|
+
name="metadata",
|
|
159
|
+
flags=["-m", "--metadata"],
|
|
160
|
+
group="manipulate behavior",
|
|
161
|
+
help="change directory for db/cdx/csv/log files",
|
|
162
|
+
type=str,
|
|
163
|
+
),
|
|
164
|
+
ArgSpec(
|
|
165
|
+
name="verbose",
|
|
166
|
+
flags=["-v", "--verbose"],
|
|
167
|
+
group="manipulate behavior",
|
|
168
|
+
help="verbosity level: low, default, high (default if flag set without value)",
|
|
169
|
+
type=str,
|
|
170
|
+
action="optional_value",
|
|
171
|
+
const="default",
|
|
172
|
+
),
|
|
173
|
+
ArgSpec(
|
|
174
|
+
name="log",
|
|
175
|
+
flags=["--log"],
|
|
176
|
+
group="manipulate behavior",
|
|
177
|
+
help="save a log file into the output folder",
|
|
178
|
+
prompt="Save log file?",
|
|
179
|
+
action="store_true",
|
|
180
|
+
default=False,
|
|
181
|
+
advanced=True,
|
|
182
|
+
),
|
|
183
|
+
ArgSpec(
|
|
184
|
+
name="progress",
|
|
185
|
+
flags=["--progress"],
|
|
186
|
+
group="manipulate behavior",
|
|
187
|
+
help="show a progress bar",
|
|
188
|
+
prompt="Show progress bar?",
|
|
189
|
+
action="store_true",
|
|
190
|
+
default=False,
|
|
191
|
+
advanced=True,
|
|
192
|
+
),
|
|
193
|
+
ArgSpec(
|
|
194
|
+
name="no_redirect",
|
|
195
|
+
flags=["--no-redirect"],
|
|
196
|
+
group="manipulate behavior",
|
|
197
|
+
help="do not follow redirects by archive.org",
|
|
198
|
+
action="store_true",
|
|
199
|
+
default=False,
|
|
200
|
+
),
|
|
201
|
+
ArgSpec(
|
|
202
|
+
name="retry",
|
|
203
|
+
flags=["--retry"],
|
|
204
|
+
group="manipulate behavior",
|
|
205
|
+
help="retry failed downloads (opt tries as int, else infinite)",
|
|
206
|
+
type=int,
|
|
207
|
+
default=0,
|
|
208
|
+
),
|
|
209
|
+
ArgSpec(
|
|
210
|
+
name="workers",
|
|
211
|
+
flags=["--workers"],
|
|
212
|
+
group="manipulate behavior",
|
|
213
|
+
help="number of workers (simultaneous downloads)",
|
|
214
|
+
prompt="Workers (parallel downloads)",
|
|
215
|
+
type=int,
|
|
216
|
+
default=1,
|
|
217
|
+
advanced=True,
|
|
218
|
+
),
|
|
219
|
+
ArgSpec(
|
|
220
|
+
name="delay",
|
|
221
|
+
flags=["--delay"],
|
|
222
|
+
group="manipulate behavior",
|
|
223
|
+
help="delay between each download in seconds",
|
|
224
|
+
type=int,
|
|
225
|
+
default=0,
|
|
226
|
+
),
|
|
227
|
+
ArgSpec(
|
|
228
|
+
name="wait",
|
|
229
|
+
flags=["--wait"],
|
|
230
|
+
group="manipulate behavior",
|
|
231
|
+
help="seconds to wait before renewing connection after HTTP errors or snapshot download errors (default: 15)",
|
|
232
|
+
type=int,
|
|
233
|
+
default=15,
|
|
234
|
+
),
|
|
235
|
+
# special
|
|
236
|
+
ArgSpec(
|
|
237
|
+
name="reset",
|
|
238
|
+
flags=["--reset"],
|
|
239
|
+
group="special",
|
|
240
|
+
help="reset the job and ignore existing cdx/db/csv files",
|
|
241
|
+
action="store_true",
|
|
242
|
+
default=False,
|
|
243
|
+
),
|
|
244
|
+
ArgSpec(
|
|
245
|
+
name="keep",
|
|
246
|
+
flags=["--keep"],
|
|
247
|
+
group="special",
|
|
248
|
+
help="keep all files after the job finished",
|
|
249
|
+
action="store_true",
|
|
250
|
+
default=False,
|
|
251
|
+
),
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def default_args() -> dict:
|
|
256
|
+
"""Return a dict of {name: default} for every spec — the canonical empty arg payload."""
|
|
257
|
+
return {spec.name: spec.default for spec in ARG_SPECS}
|
|
@@ -15,8 +15,7 @@ from sqlalchemy import ( # noqa: F401
|
|
|
15
15
|
tuple_,
|
|
16
16
|
update,
|
|
17
17
|
)
|
|
18
|
-
from sqlalchemy.
|
|
19
|
-
from sqlalchemy.orm import sessionmaker
|
|
18
|
+
from sqlalchemy.orm import declarative_base, sessionmaker
|
|
20
19
|
from typing import Optional # python 3.8
|
|
21
20
|
from pywaybackup.Verbosity import Verbosity as vb
|
|
22
21
|
|
|
@@ -95,6 +94,7 @@ class Database:
|
|
|
95
94
|
dbfile = None
|
|
96
95
|
query_identifier = None
|
|
97
96
|
query_exist = False
|
|
97
|
+
engine = None
|
|
98
98
|
sessman = sessionmaker()
|
|
99
99
|
query_progress = "0 / 0"
|
|
100
100
|
|
|
@@ -109,9 +109,9 @@ class Database:
|
|
|
109
109
|
"""
|
|
110
110
|
cls.dbfile = dbfile
|
|
111
111
|
cls.query_identifier = query_identifier
|
|
112
|
-
engine = create_engine(f"sqlite:///{dbfile}")
|
|
113
|
-
cls.sessman = sessionmaker(bind=engine)
|
|
114
|
-
Base.metadata.create_all(engine)
|
|
112
|
+
cls.engine = create_engine(f"sqlite:///{dbfile}")
|
|
113
|
+
cls.sessman = sessionmaker(bind=cls.engine)
|
|
114
|
+
Base.metadata.create_all(cls.engine)
|
|
115
115
|
|
|
116
116
|
db = Database()
|
|
117
117
|
if db.session.execute(
|
|
@@ -123,6 +123,19 @@ class Database:
|
|
|
123
123
|
db.session.execute(insert(waybackup_job).values(query_identifier=query_identifier))
|
|
124
124
|
db.close()
|
|
125
125
|
|
|
126
|
+
@classmethod
|
|
127
|
+
def close_engine(cls):
|
|
128
|
+
"""
|
|
129
|
+
Dispose of the SQLAlchemy engine and release SQLite file handles.
|
|
130
|
+
|
|
131
|
+
Required on Windows before the .db file can be deleted, since the OS
|
|
132
|
+
holds an exclusive lock on open files. No-op on platforms where this
|
|
133
|
+
isn't required, and idempotent if called more than once.
|
|
134
|
+
"""
|
|
135
|
+
if cls.engine is not None:
|
|
136
|
+
cls.engine.dispose()
|
|
137
|
+
cls.engine = None
|
|
138
|
+
|
|
126
139
|
def __init__(self):
|
|
127
140
|
"""
|
|
128
141
|
Create a new session.
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Interactive mode: prompt the user for arguments instead of parsing sys.argv.
|
|
3
|
+
|
|
4
|
+
Used when waybackup is launched without CLI arguments (e.g. double-clicking
|
|
5
|
+
the Windows .exe). Produces the same dict shape as Arguments.get_args() so
|
|
6
|
+
PyWayBackup(**args) works either way. Argument metadata is read from
|
|
7
|
+
arg_specs.ARG_SPECS so flags only have to be declared in one place.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from importlib.metadata import version
|
|
11
|
+
|
|
12
|
+
from pywaybackup.arg_specs import ARG_SPECS, EXCLUSIVE_GROUPS, default_args
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Interactive:
|
|
16
|
+
def __init__(self):
|
|
17
|
+
print(f"<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>")
|
|
18
|
+
print("Interactive mode - press Ctrl+C to abort.\n")
|
|
19
|
+
|
|
20
|
+
args = default_args()
|
|
21
|
+
|
|
22
|
+
# 1. Required URL
|
|
23
|
+
url_spec = _spec_by_name("url")
|
|
24
|
+
args["url"] = self._prompt_required(url_spec.prompt or url_spec.help)
|
|
25
|
+
|
|
26
|
+
# 2. Required exclusive group(s) — pick exactly one member
|
|
27
|
+
for ex_name in EXCLUSIVE_GROUPS:
|
|
28
|
+
members = [s for s in ARG_SPECS if s.exclusive_group == ex_name]
|
|
29
|
+
choice = self._prompt_choice(
|
|
30
|
+
ex_name.capitalize(),
|
|
31
|
+
[(s.name, s.help) for s in members],
|
|
32
|
+
)
|
|
33
|
+
for s in members:
|
|
34
|
+
args[s.name] = s.name == choice
|
|
35
|
+
|
|
36
|
+
# 3. Advanced options
|
|
37
|
+
if self._prompt_yes_no("Configure advanced options?", default=False):
|
|
38
|
+
for spec in ARG_SPECS:
|
|
39
|
+
if not spec.advanced:
|
|
40
|
+
continue
|
|
41
|
+
args[spec.name] = self._prompt_for(spec, args[spec.name])
|
|
42
|
+
|
|
43
|
+
# internal flags (parity with Arguments.py)
|
|
44
|
+
args["silent"] = False
|
|
45
|
+
args["debug"] = True
|
|
46
|
+
|
|
47
|
+
self.args = args
|
|
48
|
+
print()
|
|
49
|
+
|
|
50
|
+
def get_args(self) -> dict:
|
|
51
|
+
return self.args
|
|
52
|
+
|
|
53
|
+
def _prompt_for(self, spec, current):
|
|
54
|
+
label = spec.prompt or spec.help
|
|
55
|
+
if spec.action == "store_true":
|
|
56
|
+
return self._prompt_yes_no(f"{label}", default=bool(current))
|
|
57
|
+
if spec.type is int:
|
|
58
|
+
if current is None:
|
|
59
|
+
return self._prompt_optional_int(label)
|
|
60
|
+
return self._prompt_int(label, default=current)
|
|
61
|
+
if current is None:
|
|
62
|
+
return self._prompt_optional_str(label)
|
|
63
|
+
return self._prompt_str(label, default=current)
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _prompt_required(label):
|
|
67
|
+
while True:
|
|
68
|
+
value = input(f"{label}: ").strip()
|
|
69
|
+
if value:
|
|
70
|
+
return value
|
|
71
|
+
print(" (required, please enter a value)")
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def _prompt_optional_str(label):
|
|
75
|
+
value = input(f"{label} (blank to skip): ").strip()
|
|
76
|
+
return value or None
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def _prompt_str(label, default):
|
|
80
|
+
value = input(f"{label} [{default}]: ").strip()
|
|
81
|
+
return value if value else default
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def _prompt_optional_int(label):
|
|
85
|
+
while True:
|
|
86
|
+
value = input(f"{label} (blank to skip): ").strip()
|
|
87
|
+
if not value:
|
|
88
|
+
return None
|
|
89
|
+
try:
|
|
90
|
+
return int(value)
|
|
91
|
+
except ValueError:
|
|
92
|
+
print(" (please enter an integer or leave blank)")
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _prompt_int(label, default):
|
|
96
|
+
while True:
|
|
97
|
+
value = input(f"{label} [{default}]: ").strip()
|
|
98
|
+
if not value:
|
|
99
|
+
return default
|
|
100
|
+
try:
|
|
101
|
+
return int(value)
|
|
102
|
+
except ValueError:
|
|
103
|
+
print(" (please enter an integer)")
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def _prompt_yes_no(label, default):
|
|
107
|
+
suffix = "[Y/n]" if default else "[y/N]"
|
|
108
|
+
while True:
|
|
109
|
+
value = input(f"{label} {suffix}: ").strip().lower()
|
|
110
|
+
if not value:
|
|
111
|
+
return default
|
|
112
|
+
if value in ("y", "yes"):
|
|
113
|
+
return True
|
|
114
|
+
if value in ("n", "no"):
|
|
115
|
+
return False
|
|
116
|
+
print(" (please answer y or n)")
|
|
117
|
+
|
|
118
|
+
@staticmethod
|
|
119
|
+
def _prompt_choice(label, options):
|
|
120
|
+
# use first letter of each name as key, fall back to position number on collision
|
|
121
|
+
keys = []
|
|
122
|
+
used = set()
|
|
123
|
+
for name, _ in options:
|
|
124
|
+
k = name[0]
|
|
125
|
+
if k in used:
|
|
126
|
+
k = str(len(keys) + 1)
|
|
127
|
+
keys.append(k)
|
|
128
|
+
used.add(k)
|
|
129
|
+
print(f"\n{label}:")
|
|
130
|
+
for k, (name, desc) in zip(keys, options):
|
|
131
|
+
print(f" [{k}] {name}: {desc}")
|
|
132
|
+
valid = dict(zip(keys, [name for name, _ in options]))
|
|
133
|
+
while True:
|
|
134
|
+
value = input("Choice: ").strip().lower()
|
|
135
|
+
if value in valid:
|
|
136
|
+
return valid[value]
|
|
137
|
+
print(f" (please enter one of: {', '.join(sorted(valid))})")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _spec_by_name(name):
|
|
141
|
+
for s in ARG_SPECS:
|
|
142
|
+
if s.name == name:
|
|
143
|
+
return s
|
|
144
|
+
raise KeyError(name)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import signal
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from pywaybackup import PyWayBackup
|
|
5
|
+
from pywaybackup.arg_parser import Arguments
|
|
6
|
+
from pywaybackup.interactive import Interactive
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def cli():
|
|
10
|
+
# interactive only when launched with no args; scripts/cron without a tty get --help instead
|
|
11
|
+
interactive = len(sys.argv) <= 1 and sys.stdin is not None and sys.stdin.isatty()
|
|
12
|
+
try:
|
|
13
|
+
cli_input = Interactive() if interactive else Arguments()
|
|
14
|
+
except (KeyboardInterrupt, EOFError):
|
|
15
|
+
# ignore pyinstaller bl SIGINT while aborting
|
|
16
|
+
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
17
|
+
print("\nAborted.")
|
|
18
|
+
sys.exit(130)
|
|
19
|
+
cli_args = cli_input.get_args()
|
|
20
|
+
config = PyWayBackup(**cli_args)
|
|
21
|
+
try:
|
|
22
|
+
config.run(daemon=False)
|
|
23
|
+
finally:
|
|
24
|
+
if interactive:
|
|
25
|
+
try:
|
|
26
|
+
input("\nPress Enter to exit...")
|
|
27
|
+
except (KeyboardInterrupt, EOFError):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
if __name__ == "__main__":
|
|
32
|
+
cli()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pywaybackup
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Query and download archive.org as simple as possible.
|
|
5
5
|
Author-email: bitdruid <bitdruid@outlook.com>
|
|
6
6
|
License: MIT License
|
|
@@ -26,11 +26,11 @@ License: MIT License
|
|
|
26
26
|
SOFTWARE.
|
|
27
27
|
|
|
28
28
|
Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downloader
|
|
29
|
-
Requires-Python:
|
|
29
|
+
Requires-Python: >=3.8
|
|
30
30
|
Description-Content-Type: text/markdown
|
|
31
31
|
License-File: LICENSE
|
|
32
32
|
Requires-Dist: ruff
|
|
33
|
-
Requires-Dist: SQLAlchemy==2.0.
|
|
33
|
+
Requires-Dist: SQLAlchemy==2.0.51
|
|
34
34
|
Requires-Dist: requests==2.32.3
|
|
35
35
|
Requires-Dist: tqdm==4.67.1
|
|
36
36
|
Requires-Dist: python-magic-standalone==0.4.28
|
|
@@ -39,7 +39,7 @@ Requires-Dist: python-magic-standalone==0.4.28
|
|
|
39
39
|
|
|
40
40
|
[](https://pypi.org/project/pywaybackup/)
|
|
41
41
|
[](https://pypi.org/project/pywaybackup/)
|
|
42
|
-

|
|
42
|
+

|
|
43
43
|
[](https://opensource.org/licenses/MIT)
|
|
44
44
|
|
|
45
45
|
Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).
|
|
@@ -68,6 +68,13 @@ This tool allows you to download content from the Wayback Machine (archive.org).
|
|
|
68
68
|
2. Run the tool <br>
|
|
69
69
|
`waybackup -h`
|
|
70
70
|
|
|
71
|
+
### Standalone binary
|
|
72
|
+
|
|
73
|
+
Prebuilt executables for Windows, Linux and macOS are attached to each [release](https://github.com/bitdruid/python-wayback-machine-downloader/releases). No Python required.
|
|
74
|
+
|
|
75
|
+
- Run from a terminal with arguments like the pip version: `waybackup -h`
|
|
76
|
+
- Or start it without arguments (e.g. double-click on Windows) to enter **interactive mode** — the tool will prompt you for URL, mode and optional settings.
|
|
77
|
+
|
|
71
78
|
### Manual
|
|
72
79
|
|
|
73
80
|
1. Clone the repository <br>
|
|
@@ -155,8 +162,9 @@ output:
|
|
|
155
162
|
|
|
156
163
|
## cli
|
|
157
164
|
|
|
158
|
-
- `-h`, `--help`: Show the help message and exit.
|
|
159
|
-
|
|
165
|
+
- `-h`, `--help`: Show the help message and exit. Version info is shown in the help header.
|
|
166
|
+
|
|
167
|
+
> **Interactive mode:** running `waybackup` without any arguments in a terminal starts a guided prompt for URL, mode and optional settings. Without a terminal (scripts/cron), the help is printed instead.
|
|
160
168
|
|
|
161
169
|
#### Required
|
|
162
170
|
|
|
@@ -171,6 +179,8 @@ output:
|
|
|
171
179
|
Last Version. Gives one folder containing the last version of each file of specified `--range`.
|
|
172
180
|
- **`-f`**, **`--first`**:<br>
|
|
173
181
|
First Version. Gives one folder containing the first version of each file of specified `--range`.
|
|
182
|
+
- **`-s`**, **`--save`**:<br>
|
|
183
|
+
Save a page to the wayback machine (no download).
|
|
174
184
|
|
|
175
185
|
#### Optional query parameters
|
|
176
186
|
|
|
@@ -219,7 +229,7 @@ Parameters will change the download behavior for snapshots.
|
|
|
219
229
|
Set verbosity level. Available levels:
|
|
220
230
|
- `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
|
|
221
231
|
- `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
|
|
222
|
-
- `high` (or `
|
|
232
|
+
- `high` (or `detailed`, `max`): Detailed verbose output
|
|
223
233
|
|
|
224
234
|
Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
|
|
225
235
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
|
-
pywaybackup/Arguments.py
|
|
5
4
|
pywaybackup/Exception.py
|
|
6
5
|
pywaybackup/PyWayBackup.py
|
|
7
6
|
pywaybackup/Snapshot.py
|
|
@@ -11,9 +10,12 @@ pywaybackup/Worker.py
|
|
|
11
10
|
pywaybackup/__init__.py
|
|
12
11
|
pywaybackup/archive_download.py
|
|
13
12
|
pywaybackup/archive_save.py
|
|
13
|
+
pywaybackup/arg_parser.py
|
|
14
|
+
pywaybackup/arg_specs.py
|
|
14
15
|
pywaybackup/db.py
|
|
15
16
|
pywaybackup/files.py
|
|
16
17
|
pywaybackup/helper.py
|
|
18
|
+
pywaybackup/interactive.py
|
|
17
19
|
pywaybackup/main.py
|
|
18
20
|
pywaybackup.egg-info/PKG-INFO
|
|
19
21
|
pywaybackup.egg-info/SOURCES.txt
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
import argparse
|
|
3
|
-
|
|
4
|
-
from argparse import RawTextHelpFormatter
|
|
5
|
-
|
|
6
|
-
from importlib.metadata import version
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class Arguments:
|
|
10
|
-
def __init__(self):
|
|
11
|
-
parser = argparse.ArgumentParser(
|
|
12
|
-
description=f"<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>\nby @bitdruid -> https://github.com/bitdruid",
|
|
13
|
-
formatter_class=RawTextHelpFormatter,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
required = parser.add_argument_group("required (one exclusive)")
|
|
17
|
-
required.add_argument("-u", "--url", type=str, metavar="", help="url (with subdir/subdomain) to download")
|
|
18
|
-
exclusive_required = required.add_mutually_exclusive_group(required=True)
|
|
19
|
-
exclusive_required.add_argument("-a", "--all", action="store_true", help="download snapshots of all timestamps")
|
|
20
|
-
exclusive_required.add_argument("-l", "--last", action="store_true", help="download the last version of each file snapshot")
|
|
21
|
-
exclusive_required.add_argument("-f", "--first", action="store_true", help="download the first version of each file snapshot")
|
|
22
|
-
exclusive_required.add_argument("-s", "--save", action="store_true", help="save a page to the wayback machine")
|
|
23
|
-
|
|
24
|
-
optional = parser.add_argument_group("optional query parameters")
|
|
25
|
-
optional.add_argument("-e", "--explicit", action="store_true", help="search only for the explicit given url")
|
|
26
|
-
optional.add_argument("-r", "--range", type=int, metavar="", help="range in years to search")
|
|
27
|
-
optional.add_argument("--start", type=int, metavar="", help="start timestamp format: YYYYMMDDHHMMSS")
|
|
28
|
-
optional.add_argument("--end", type=int, metavar="", help="end timestamp format: YYYYMMDDHHMMSS")
|
|
29
|
-
optional.add_argument("--limit", type=int, nargs="?", const=True, metavar="int", help="limit the number of snapshots to download")
|
|
30
|
-
optional.add_argument("--filetype", type=str, metavar="", help="filetypes to download comma separated (js,css,...)")
|
|
31
|
-
optional.add_argument("--statuscode", type=str, metavar="", help="statuscodes to download comma separated (200,404,...)")
|
|
32
|
-
|
|
33
|
-
behavior = parser.add_argument_group("manipulate behavior")
|
|
34
|
-
behavior.add_argument("-o", "--output", type=str, metavar="", help="output for all files - defaults to current directory")
|
|
35
|
-
behavior.add_argument("-m", "--metadata", type=str, metavar="", help="change directory for db/cdx/csv/log files")
|
|
36
|
-
behavior.add_argument(
|
|
37
|
-
"-v", "--verbose",
|
|
38
|
-
type=str,
|
|
39
|
-
nargs="?",
|
|
40
|
-
const="default",
|
|
41
|
-
metavar="",
|
|
42
|
-
help="verbosity level: low, default, high (default if flag set without value)",
|
|
43
|
-
)
|
|
44
|
-
behavior.add_argument("--log", action="store_true", help="save a log file into the output folder")
|
|
45
|
-
behavior.add_argument("--progress", action="store_true", help="show a progress bar")
|
|
46
|
-
behavior.add_argument("--no-redirect", action="store_true", help="do not follow redirects by archive.org")
|
|
47
|
-
behavior.add_argument("--retry", type=int, default=0, metavar="", help="retry failed downloads (opt tries as int, else infinite)")
|
|
48
|
-
behavior.add_argument("--workers", type=int, default=1, metavar="", help="number of workers (simultaneous downloads)")
|
|
49
|
-
behavior.add_argument("--delay", type=int, default=0, metavar="", help="delay between each download in seconds")
|
|
50
|
-
behavior.add_argument(
|
|
51
|
-
"--wait",
|
|
52
|
-
type=int,
|
|
53
|
-
default=15,
|
|
54
|
-
metavar="",
|
|
55
|
-
help="seconds to wait before renewing connection after HTTP errors or snapshot download errors (default: 15)",
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
special = parser.add_argument_group("special")
|
|
59
|
-
special.add_argument("--reset", action="store_true", help="reset the job and ignore existing cdx/db/csv files")
|
|
60
|
-
special.add_argument("--keep", action="store_true", help="keep all files after the job finished")
|
|
61
|
-
|
|
62
|
-
args = parser.parse_args(args=None if sys.argv[1:] else ["--help"]) # if no arguments are given, print help
|
|
63
|
-
|
|
64
|
-
args.silent = False
|
|
65
|
-
args.debug = True
|
|
66
|
-
|
|
67
|
-
self.args = args
|
|
68
|
-
|
|
69
|
-
def get_args(self) -> dict:
|
|
70
|
-
"""Returns the parsed arguments as a dictionary."""
|
|
71
|
-
return vars(self.args)
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from pywaybackup import PyWayBackup
|
|
2
|
-
from pywaybackup.Arguments import Arguments as args
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def cli():
|
|
6
|
-
cli_input = args()
|
|
7
|
-
cli_args = cli_input.get_args()
|
|
8
|
-
config = PyWayBackup(**cli_args)
|
|
9
|
-
config.run(daemon=False)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
if __name__ == "__main__":
|
|
13
|
-
cli()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|