pywaybackup 4.1.6__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {pywaybackup-4.1.6/pywaybackup.egg-info → pywaybackup-4.2.0}/PKG-INFO +17 -7
  2. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/README.md +14 -4
  3. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pyproject.toml +3 -3
  4. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/PyWayBackup.py +1 -0
  5. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/SnapshotCollection.py +21 -16
  6. pywaybackup-4.2.0/pywaybackup/arg_parser.py +55 -0
  7. pywaybackup-4.2.0/pywaybackup/arg_specs.py +257 -0
  8. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/db.py +18 -5
  9. pywaybackup-4.2.0/pywaybackup/interactive.py +144 -0
  10. pywaybackup-4.2.0/pywaybackup/main.py +32 -0
  11. {pywaybackup-4.1.6 → pywaybackup-4.2.0/pywaybackup.egg-info}/PKG-INFO +17 -7
  12. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/SOURCES.txt +3 -1
  13. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/requires.txt +1 -1
  14. pywaybackup-4.1.6/pywaybackup/Arguments.py +0 -71
  15. pywaybackup-4.1.6/pywaybackup/main.py +0 -13
  16. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/LICENSE +0 -0
  17. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Exception.py +0 -0
  18. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Snapshot.py +0 -0
  19. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Verbosity.py +0 -0
  20. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/Worker.py +0 -0
  21. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/__init__.py +0 -0
  22. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/archive_download.py +0 -0
  23. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/archive_save.py +0 -0
  24. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/files.py +0 -0
  25. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup/helper.py +0 -0
  26. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/dependency_links.txt +0 -0
  27. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/entry_points.txt +0 -0
  28. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/pywaybackup.egg-info/top_level.txt +0 -0
  29. {pywaybackup-4.1.6 → pywaybackup-4.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.6
3
+ Version: 4.2.0
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -26,11 +26,11 @@ License: MIT License
26
26
  SOFTWARE.
27
27
 
28
28
  Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downloader
29
- Requires-Python: <3.14,>=3.8
29
+ Requires-Python: >=3.8
30
30
  Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
32
  Requires-Dist: ruff
33
- Requires-Dist: SQLAlchemy==2.0.43
33
+ Requires-Dist: SQLAlchemy==2.0.51
34
34
  Requires-Dist: requests==2.32.3
35
35
  Requires-Dist: tqdm==4.67.1
36
36
  Requires-Dist: python-magic-standalone==0.4.28
@@ -39,7 +39,7 @@ Requires-Dist: python-magic-standalone==0.4.28
39
39
 
40
40
  [![PyPI](https://img.shields.io/pypi/v/pywaybackup)](https://pypi.org/project/pywaybackup/)
41
41
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/pywaybackup)](https://pypi.org/project/pywaybackup/)
42
- ![Python Version](https://img.shields.io/badge/Python-3.8-blue)
42
+ ![Python Version](https://img.shields.io/badge/Python-3.8%2B-blue)
43
43
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
44
44
 
45
45
  Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).
@@ -68,6 +68,13 @@ This tool allows you to download content from the Wayback Machine (archive.org).
68
68
  2. Run the tool <br>
69
69
  `waybackup -h`
70
70
 
71
+ ### Standalone binary
72
+
73
+ Prebuilt executables for Windows, Linux and macOS are attached to each [release](https://github.com/bitdruid/python-wayback-machine-downloader/releases). No Python required.
74
+
75
+ - Run from a terminal with arguments like the pip version: `waybackup -h`
76
+ - Or start it without arguments (e.g. double-click on Windows) to enter **interactive mode** — the tool will prompt you for URL, mode and optional settings.
77
+
71
78
  ### Manual
72
79
 
73
80
  1. Clone the repository <br>
@@ -155,8 +162,9 @@ output:
155
162
 
156
163
  ## cli
157
164
 
158
- - `-h`, `--help`: Show the help message and exit.
159
- - `-v`, `--version`: Show information about the tool and exit.
165
+ - `-h`, `--help`: Show the help message and exit. Version info is shown in the help header.
166
+
167
+ > **Interactive mode:** running `waybackup` without any arguments in a terminal starts a guided prompt for URL, mode and optional settings. Without a terminal (scripts/cron), the help is printed instead.
160
168
 
161
169
  #### Required
162
170
 
@@ -171,6 +179,8 @@ output:
171
179
  Last Version. Gives one folder containing the last version of each file of specified `--range`.
172
180
  - **`-f`**, **`--first`**:<br>
173
181
  First Version. Gives one folder containing the first version of each file of specified `--range`.
182
+ - **`-s`**, **`--save`**:<br>
183
+ Save a page to the wayback machine (no download).
174
184
 
175
185
  #### Optional query parameters
176
186
 
@@ -219,7 +229,7 @@ Parameters will change the download behavior for snapshots.
219
229
  Set verbosity level. Available levels:
220
230
  - `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
221
231
  - `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
222
- - `high` (or `debug`, `detailed`, `max`): Detailed verbose output
232
+ - `high` (or `detailed`, `max`): Detailed verbose output
223
233
 
224
234
  Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
225
235
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![PyPI](https://img.shields.io/pypi/v/pywaybackup)](https://pypi.org/project/pywaybackup/)
4
4
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/pywaybackup)](https://pypi.org/project/pywaybackup/)
5
- ![Python Version](https://img.shields.io/badge/Python-3.8-blue)
5
+ ![Python Version](https://img.shields.io/badge/Python-3.8%2B-blue)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
7
 
8
8
  Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).
@@ -31,6 +31,13 @@ This tool allows you to download content from the Wayback Machine (archive.org).
31
31
  2. Run the tool <br>
32
32
  `waybackup -h`
33
33
 
34
+ ### Standalone binary
35
+
36
+ Prebuilt executables for Windows, Linux and macOS are attached to each [release](https://github.com/bitdruid/python-wayback-machine-downloader/releases). No Python required.
37
+
38
+ - Run from a terminal with arguments like the pip version: `waybackup -h`
39
+ - Or start it without arguments (e.g. double-click on Windows) to enter **interactive mode** — the tool will prompt you for URL, mode and optional settings.
40
+
34
41
  ### Manual
35
42
 
36
43
  1. Clone the repository <br>
@@ -118,8 +125,9 @@ output:
118
125
 
119
126
  ## cli
120
127
 
121
- - `-h`, `--help`: Show the help message and exit.
122
- - `-v`, `--version`: Show information about the tool and exit.
128
+ - `-h`, `--help`: Show the help message and exit. Version info is shown in the help header.
129
+
130
+ > **Interactive mode:** running `waybackup` without any arguments in a terminal starts a guided prompt for URL, mode and optional settings. Without a terminal (scripts/cron), the help is printed instead.
123
131
 
124
132
  #### Required
125
133
 
@@ -134,6 +142,8 @@ output:
134
142
  Last Version. Gives one folder containing the last version of each file of specified `--range`.
135
143
  - **`-f`**, **`--first`**:<br>
136
144
  First Version. Gives one folder containing the first version of each file of specified `--range`.
145
+ - **`-s`**, **`--save`**:<br>
146
+ Save a page to the wayback machine (no download).
137
147
 
138
148
  #### Optional query parameters
139
149
 
@@ -182,7 +192,7 @@ Parameters will change the download behavior for snapshots.
182
192
  Set verbosity level. Available levels:
183
193
  - `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
184
194
  - `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
185
- - `high` (or `debug`, `detailed`, `max`): Detailed verbose output
195
+ - `high` (or `detailed`, `max`): Detailed verbose output
186
196
 
187
197
  Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
188
198
 
@@ -10,18 +10,18 @@ packages = ["pywaybackup"]
10
10
  [project]
11
11
  dependencies = [
12
12
  "ruff",
13
- "SQLAlchemy==2.0.43",
13
+ "SQLAlchemy==2.0.51",
14
14
  "requests==2.32.3",
15
15
  "tqdm==4.67.1",
16
16
  "python-magic-standalone==0.4.28",
17
17
  ]
18
18
  name = "pywaybackup"
19
- version = "4.1.6"
19
+ version = "4.2.0"
20
20
  description = "Query and download archive.org as simple as possible."
21
21
  authors = [{ name = "bitdruid", email = "bitdruid@outlook.com" }]
22
22
  license = { file = "LICENSE" }
23
23
  readme = "README.md"
24
- requires-python = ">=3.8,<3.14"
24
+ requires-python = ">=3.8"
25
25
 
26
26
 
27
27
  [project.scripts]
@@ -510,6 +510,7 @@ class PyWayBackup:
510
510
  collection = SnapshotCollection()
511
511
  collection.close()
512
512
  self._csvfile.store_result()
513
+ db.close_engine()
513
514
  self._f_keep()
514
515
  vb.fini()
515
516
  signal.signal(signal.SIGINT, signal.SIG_IGN)
@@ -1,6 +1,6 @@
1
1
  import json
2
2
 
3
- from pywaybackup.db import Database, Index, and_, delete, func, or_, select, tuple_, update, waybackup_snapshots
3
+ from pywaybackup.db import Database, and_, delete, func, or_, select, text, tuple_, update, waybackup_snapshots
4
4
  from pywaybackup.files import CDXfile, CSVfile
5
5
  from pywaybackup.Verbosity import Progressbar
6
6
  from pywaybackup.Verbosity import Verbosity as vb
@@ -217,30 +217,35 @@ class SnapshotCollection:
217
217
  def _index_snapshots(self):
218
218
  """
219
219
  Create indexes for the snapshot table.
220
+
221
+ Raw DDL instead of sqlalchemy Index objects: Index(...) attaches to the
222
+ module-global table metadata, which accumulates duplicates when the
223
+ package is reused in-process (library usage) and breaks create_all().
220
224
  """
221
225
  # index for filtering last snapshots
222
226
  if self._mode_last:
223
- idx1 = Index(
224
- "idx_waybackup_snapshots_url_origin_timestamp_desc",
225
- waybackup_snapshots.url_origin,
226
- waybackup_snapshots.timestamp.desc(),
227
+ self.db.session.execute(
228
+ text(
229
+ "CREATE INDEX IF NOT EXISTS idx_waybackup_snapshots_url_origin_timestamp_desc "
230
+ "ON waybackup_snapshots (url_origin, timestamp DESC)"
231
+ )
227
232
  )
228
- idx1.create(self.db.session.bind, checkfirst=True)
229
233
  # index for filtering first snapshots
230
234
  if self._mode_first:
231
- idx2 = Index(
232
- "idx_waybackup_snapshots_url_origin_timestamp_asc",
233
- waybackup_snapshots.url_origin,
234
- waybackup_snapshots.timestamp.asc(),
235
+ self.db.session.execute(
236
+ text(
237
+ "CREATE INDEX IF NOT EXISTS idx_waybackup_snapshots_url_origin_timestamp_asc "
238
+ "ON waybackup_snapshots (url_origin, timestamp ASC)"
239
+ )
235
240
  )
236
- idx2.create(self.db.session.bind, checkfirst=True)
237
241
  # index for skippable snapshots
238
- idx3 = Index(
239
- "idx_waybackup_snapshots_timestamp_url_origin_response",
240
- waybackup_snapshots.timestamp,
241
- waybackup_snapshots.url_origin,
242
+ self.db.session.execute(
243
+ text(
244
+ "CREATE INDEX IF NOT EXISTS idx_waybackup_snapshots_timestamp_url_origin_response "
245
+ "ON waybackup_snapshots (timestamp, url_origin)"
246
+ )
242
247
  )
243
- idx3.create(self.db.session.bind, checkfirst=True)
248
+ self.db.session.commit()
244
249
 
245
250
  def _filter_snapshots(self):
246
251
  """
@@ -0,0 +1,55 @@
1
+ import argparse
2
+ import sys
3
+ from argparse import RawTextHelpFormatter
4
+ from importlib.metadata import version
5
+
6
+ from pywaybackup.arg_specs import ARG_GROUPS, ARG_SPECS, EXCLUSIVE_GROUPS
7
+
8
+
9
+ class Arguments:
10
+ def __init__(self):
11
+ parser = argparse.ArgumentParser(
12
+ description=f"<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>\nby @bitdruid -> https://github.com/bitdruid",
13
+ formatter_class=RawTextHelpFormatter,
14
+ )
15
+
16
+ groups = {name: parser.add_argument_group(name) for name in ARG_GROUPS}
17
+
18
+ exclusive = {
19
+ ex_name: groups[ex_meta["parent_group"]].add_mutually_exclusive_group(required=ex_meta["required"])
20
+ for ex_name, ex_meta in EXCLUSIVE_GROUPS.items()
21
+ }
22
+
23
+ for spec in ARG_SPECS:
24
+ target = exclusive[spec.exclusive_group] if spec.exclusive_group else groups[spec.group]
25
+ target.add_argument(*spec.flags, **_argparse_kwargs(spec))
26
+
27
+ args = parser.parse_args(args=None if sys.argv[1:] else ["--help"]) # if no arguments are given, print help
28
+
29
+ args.silent = False
30
+ args.debug = True
31
+
32
+ self.args = args
33
+
34
+ def get_args(self) -> dict:
35
+ """Returns the parsed arguments as a dictionary."""
36
+ return vars(self.args)
37
+
38
+
39
+ def _argparse_kwargs(spec) -> dict:
40
+ """Translate an ArgSpec into kwargs for argparse.add_argument()."""
41
+ kwargs = {"help": spec.help}
42
+ if spec.action == "store_true":
43
+ kwargs["action"] = "store_true"
44
+ kwargs["default"] = bool(spec.default)
45
+ elif spec.action == "optional_value":
46
+ kwargs["type"] = spec.type
47
+ kwargs["nargs"] = "?"
48
+ kwargs["const"] = spec.const
49
+ kwargs["metavar"] = spec.metavar
50
+ kwargs["default"] = spec.default
51
+ else:
52
+ kwargs["type"] = spec.type
53
+ kwargs["metavar"] = spec.metavar
54
+ kwargs["default"] = spec.default
55
+ return kwargs
@@ -0,0 +1,257 @@
1
+ """
2
+ Single source of truth for waybackup CLI arguments.
3
+
4
+ Both Arguments (argparse) and Interactive (input prompts) consume this list,
5
+ so a new flag only needs to be added in one place. PyWayBackup.__init__ keeps
6
+ its explicit signature (vscode autocomplete).
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, List, Optional
11
+
12
+
13
+ @dataclass
14
+ class ArgSpec:
15
+ name: str # internal key (matches PyWayBackup.__init__ kwarg)
16
+ flags: List[str] # CLI flags, e.g. ["-u", "--url"]
17
+ group: str # argparse group label
18
+ help: str # CLI help text
19
+ prompt: Optional[str] = None # interactive prompt label (None = skip in interactive)
20
+ type: Optional[type] = None # str / int / None for store_true
21
+ default: Any = None
22
+ action: str = "store" # "store" | "store_true" | "optional_value"
23
+ const: Any = None # used when action="optional_value"
24
+ metavar: str = "" # argparse metavar; ignored for store_true
25
+ exclusive_group: Optional[str] = None # name of a mutex group below
26
+ advanced: bool = False # show in interactive only when advanced opts enabled
27
+
28
+
29
+ # Mutually exclusive groups, keyed by name. Specs join them via exclusive_group=...
30
+ EXCLUSIVE_GROUPS = {
31
+ "mode": {"required": True, "parent_group": "required (one exclusive)"},
32
+ }
33
+
34
+ # argparse groups in display order
35
+ ARG_GROUPS = [
36
+ "required (one exclusive)",
37
+ "optional query parameters",
38
+ "manipulate behavior",
39
+ "special",
40
+ ]
41
+
42
+
43
+ ARG_SPECS: List[ArgSpec] = [
44
+ # required
45
+ ArgSpec(
46
+ name="url",
47
+ flags=["-u", "--url"],
48
+ group="required (one exclusive)",
49
+ help="url (with subdir/subdomain) to download",
50
+ prompt="URL to download (with subdir/subdomain)",
51
+ type=str,
52
+ ),
53
+ ArgSpec(
54
+ name="all",
55
+ flags=["-a", "--all"],
56
+ group="required (one exclusive)",
57
+ help="download snapshots of all timestamps",
58
+ action="store_true",
59
+ default=False,
60
+ exclusive_group="mode",
61
+ ),
62
+ ArgSpec(
63
+ name="last",
64
+ flags=["-l", "--last"],
65
+ group="required (one exclusive)",
66
+ help="download the last version of each file snapshot",
67
+ action="store_true",
68
+ default=False,
69
+ exclusive_group="mode",
70
+ ),
71
+ ArgSpec(
72
+ name="first",
73
+ flags=["-f", "--first"],
74
+ group="required (one exclusive)",
75
+ help="download the first version of each file snapshot",
76
+ action="store_true",
77
+ default=False,
78
+ exclusive_group="mode",
79
+ ),
80
+ ArgSpec(
81
+ name="save",
82
+ flags=["-s", "--save"],
83
+ group="required (one exclusive)",
84
+ help="save a page to the wayback machine",
85
+ action="store_true",
86
+ default=False,
87
+ exclusive_group="mode",
88
+ ),
89
+ # -------------------- optional query --------------------
90
+ ArgSpec(
91
+ name="explicit",
92
+ flags=["-e", "--explicit"],
93
+ group="optional query parameters",
94
+ help="search only for the explicit given url",
95
+ action="store_true",
96
+ default=False,
97
+ ),
98
+ ArgSpec(
99
+ name="range",
100
+ flags=["-r", "--range"],
101
+ group="optional query parameters",
102
+ help="range in years to search",
103
+ prompt="Range in years to search",
104
+ type=int,
105
+ advanced=True,
106
+ ),
107
+ ArgSpec(
108
+ name="start",
109
+ flags=["--start"],
110
+ group="optional query parameters",
111
+ help="start timestamp format: YYYYMMDDHHMMSS",
112
+ type=int,
113
+ ),
114
+ ArgSpec(
115
+ name="end",
116
+ flags=["--end"],
117
+ group="optional query parameters",
118
+ help="end timestamp format: YYYYMMDDHHMMSS",
119
+ type=int,
120
+ ),
121
+ ArgSpec(
122
+ name="limit",
123
+ flags=["--limit"],
124
+ group="optional query parameters",
125
+ help="limit the number of snapshots to download",
126
+ prompt="Limit number of snapshots",
127
+ type=int,
128
+ action="optional_value",
129
+ const=True,
130
+ metavar="int",
131
+ advanced=True,
132
+ ),
133
+ ArgSpec(
134
+ name="filetype",
135
+ flags=["--filetype"],
136
+ group="optional query parameters",
137
+ help="filetypes to download comma separated (js,css,...)",
138
+ type=str,
139
+ ),
140
+ ArgSpec(
141
+ name="statuscode",
142
+ flags=["--statuscode"],
143
+ group="optional query parameters",
144
+ help="statuscodes to download comma separated (200,404,...)",
145
+ type=str,
146
+ ),
147
+ # behavior
148
+ ArgSpec(
149
+ name="output",
150
+ flags=["-o", "--output"],
151
+ group="manipulate behavior",
152
+ help="output for all files - defaults to current directory",
153
+ prompt="Output directory",
154
+ type=str,
155
+ advanced=True,
156
+ ),
157
+ ArgSpec(
158
+ name="metadata",
159
+ flags=["-m", "--metadata"],
160
+ group="manipulate behavior",
161
+ help="change directory for db/cdx/csv/log files",
162
+ type=str,
163
+ ),
164
+ ArgSpec(
165
+ name="verbose",
166
+ flags=["-v", "--verbose"],
167
+ group="manipulate behavior",
168
+ help="verbosity level: low, default, high (default if flag set without value)",
169
+ type=str,
170
+ action="optional_value",
171
+ const="default",
172
+ ),
173
+ ArgSpec(
174
+ name="log",
175
+ flags=["--log"],
176
+ group="manipulate behavior",
177
+ help="save a log file into the output folder",
178
+ prompt="Save log file?",
179
+ action="store_true",
180
+ default=False,
181
+ advanced=True,
182
+ ),
183
+ ArgSpec(
184
+ name="progress",
185
+ flags=["--progress"],
186
+ group="manipulate behavior",
187
+ help="show a progress bar",
188
+ prompt="Show progress bar?",
189
+ action="store_true",
190
+ default=False,
191
+ advanced=True,
192
+ ),
193
+ ArgSpec(
194
+ name="no_redirect",
195
+ flags=["--no-redirect"],
196
+ group="manipulate behavior",
197
+ help="do not follow redirects by archive.org",
198
+ action="store_true",
199
+ default=False,
200
+ ),
201
+ ArgSpec(
202
+ name="retry",
203
+ flags=["--retry"],
204
+ group="manipulate behavior",
205
+ help="retry failed downloads (opt tries as int, else infinite)",
206
+ type=int,
207
+ default=0,
208
+ ),
209
+ ArgSpec(
210
+ name="workers",
211
+ flags=["--workers"],
212
+ group="manipulate behavior",
213
+ help="number of workers (simultaneous downloads)",
214
+ prompt="Workers (parallel downloads)",
215
+ type=int,
216
+ default=1,
217
+ advanced=True,
218
+ ),
219
+ ArgSpec(
220
+ name="delay",
221
+ flags=["--delay"],
222
+ group="manipulate behavior",
223
+ help="delay between each download in seconds",
224
+ type=int,
225
+ default=0,
226
+ ),
227
+ ArgSpec(
228
+ name="wait",
229
+ flags=["--wait"],
230
+ group="manipulate behavior",
231
+ help="seconds to wait before renewing connection after HTTP errors or snapshot download errors (default: 15)",
232
+ type=int,
233
+ default=15,
234
+ ),
235
+ # special
236
+ ArgSpec(
237
+ name="reset",
238
+ flags=["--reset"],
239
+ group="special",
240
+ help="reset the job and ignore existing cdx/db/csv files",
241
+ action="store_true",
242
+ default=False,
243
+ ),
244
+ ArgSpec(
245
+ name="keep",
246
+ flags=["--keep"],
247
+ group="special",
248
+ help="keep all files after the job finished",
249
+ action="store_true",
250
+ default=False,
251
+ ),
252
+ ]
253
+
254
+
255
+ def default_args() -> dict:
256
+ """Return a dict of {name: default} for every spec — the canonical empty arg payload."""
257
+ return {spec.name: spec.default for spec in ARG_SPECS}
@@ -15,8 +15,7 @@ from sqlalchemy import ( # noqa: F401
15
15
  tuple_,
16
16
  update,
17
17
  )
18
- from sqlalchemy.ext.declarative import declarative_base
19
- from sqlalchemy.orm import sessionmaker
18
+ from sqlalchemy.orm import declarative_base, sessionmaker
20
19
  from typing import Optional # python 3.8
21
20
  from pywaybackup.Verbosity import Verbosity as vb
22
21
 
@@ -95,6 +94,7 @@ class Database:
95
94
  dbfile = None
96
95
  query_identifier = None
97
96
  query_exist = False
97
+ engine = None
98
98
  sessman = sessionmaker()
99
99
  query_progress = "0 / 0"
100
100
 
@@ -109,9 +109,9 @@ class Database:
109
109
  """
110
110
  cls.dbfile = dbfile
111
111
  cls.query_identifier = query_identifier
112
- engine = create_engine(f"sqlite:///{dbfile}")
113
- cls.sessman = sessionmaker(bind=engine)
114
- Base.metadata.create_all(engine)
112
+ cls.engine = create_engine(f"sqlite:///{dbfile}")
113
+ cls.sessman = sessionmaker(bind=cls.engine)
114
+ Base.metadata.create_all(cls.engine)
115
115
 
116
116
  db = Database()
117
117
  if db.session.execute(
@@ -123,6 +123,19 @@ class Database:
123
123
  db.session.execute(insert(waybackup_job).values(query_identifier=query_identifier))
124
124
  db.close()
125
125
 
126
+ @classmethod
127
+ def close_engine(cls):
128
+ """
129
+ Dispose of the SQLAlchemy engine and release SQLite file handles.
130
+
131
+ Required on Windows before the .db file can be deleted, since the OS
132
+ holds an exclusive lock on open files. No-op on platforms where this
133
+ isn't required, and idempotent if called more than once.
134
+ """
135
+ if cls.engine is not None:
136
+ cls.engine.dispose()
137
+ cls.engine = None
138
+
126
139
  def __init__(self):
127
140
  """
128
141
  Create a new session.
@@ -0,0 +1,144 @@
1
+ """
2
+ Interactive mode: prompt the user for arguments instead of parsing sys.argv.
3
+
4
+ Used when waybackup is launched without CLI arguments (e.g. double-clicking
5
+ the Windows .exe). Produces the same dict shape as Arguments.get_args() so
6
+ PyWayBackup(**args) works either way. Argument metadata is read from
7
+ arg_specs.ARG_SPECS so flags only have to be declared in one place.
8
+ """
9
+
10
+ from importlib.metadata import version
11
+
12
+ from pywaybackup.arg_specs import ARG_SPECS, EXCLUSIVE_GROUPS, default_args
13
+
14
+
15
+ class Interactive:
16
+ def __init__(self):
17
+ print(f"<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>")
18
+ print("Interactive mode - press Ctrl+C to abort.\n")
19
+
20
+ args = default_args()
21
+
22
+ # 1. Required URL
23
+ url_spec = _spec_by_name("url")
24
+ args["url"] = self._prompt_required(url_spec.prompt or url_spec.help)
25
+
26
+ # 2. Required exclusive group(s) — pick exactly one member
27
+ for ex_name in EXCLUSIVE_GROUPS:
28
+ members = [s for s in ARG_SPECS if s.exclusive_group == ex_name]
29
+ choice = self._prompt_choice(
30
+ ex_name.capitalize(),
31
+ [(s.name, s.help) for s in members],
32
+ )
33
+ for s in members:
34
+ args[s.name] = s.name == choice
35
+
36
+ # 3. Advanced options
37
+ if self._prompt_yes_no("Configure advanced options?", default=False):
38
+ for spec in ARG_SPECS:
39
+ if not spec.advanced:
40
+ continue
41
+ args[spec.name] = self._prompt_for(spec, args[spec.name])
42
+
43
+ # internal flags (parity with Arguments.py)
44
+ args["silent"] = False
45
+ args["debug"] = True
46
+
47
+ self.args = args
48
+ print()
49
+
50
+ def get_args(self) -> dict:
51
+ return self.args
52
+
53
+ def _prompt_for(self, spec, current):
54
+ label = spec.prompt or spec.help
55
+ if spec.action == "store_true":
56
+ return self._prompt_yes_no(f"{label}", default=bool(current))
57
+ if spec.type is int:
58
+ if current is None:
59
+ return self._prompt_optional_int(label)
60
+ return self._prompt_int(label, default=current)
61
+ if current is None:
62
+ return self._prompt_optional_str(label)
63
+ return self._prompt_str(label, default=current)
64
+
65
+ @staticmethod
66
+ def _prompt_required(label):
67
+ while True:
68
+ value = input(f"{label}: ").strip()
69
+ if value:
70
+ return value
71
+ print(" (required, please enter a value)")
72
+
73
+ @staticmethod
74
+ def _prompt_optional_str(label):
75
+ value = input(f"{label} (blank to skip): ").strip()
76
+ return value or None
77
+
78
+ @staticmethod
79
+ def _prompt_str(label, default):
80
+ value = input(f"{label} [{default}]: ").strip()
81
+ return value if value else default
82
+
83
+ @staticmethod
84
+ def _prompt_optional_int(label):
85
+ while True:
86
+ value = input(f"{label} (blank to skip): ").strip()
87
+ if not value:
88
+ return None
89
+ try:
90
+ return int(value)
91
+ except ValueError:
92
+ print(" (please enter an integer or leave blank)")
93
+
94
+ @staticmethod
95
+ def _prompt_int(label, default):
96
+ while True:
97
+ value = input(f"{label} [{default}]: ").strip()
98
+ if not value:
99
+ return default
100
+ try:
101
+ return int(value)
102
+ except ValueError:
103
+ print(" (please enter an integer)")
104
+
105
+ @staticmethod
106
+ def _prompt_yes_no(label, default):
107
+ suffix = "[Y/n]" if default else "[y/N]"
108
+ while True:
109
+ value = input(f"{label} {suffix}: ").strip().lower()
110
+ if not value:
111
+ return default
112
+ if value in ("y", "yes"):
113
+ return True
114
+ if value in ("n", "no"):
115
+ return False
116
+ print(" (please answer y or n)")
117
+
118
+ @staticmethod
119
+ def _prompt_choice(label, options):
120
+ # use first letter of each name as key, fall back to position number on collision
121
+ keys = []
122
+ used = set()
123
+ for name, _ in options:
124
+ k = name[0]
125
+ if k in used:
126
+ k = str(len(keys) + 1)
127
+ keys.append(k)
128
+ used.add(k)
129
+ print(f"\n{label}:")
130
+ for k, (name, desc) in zip(keys, options):
131
+ print(f" [{k}] {name}: {desc}")
132
+ valid = dict(zip(keys, [name for name, _ in options]))
133
+ while True:
134
+ value = input("Choice: ").strip().lower()
135
+ if value in valid:
136
+ return valid[value]
137
+ print(f" (please enter one of: {', '.join(sorted(valid))})")
138
+
139
+
140
+ def _spec_by_name(name):
141
+ for s in ARG_SPECS:
142
+ if s.name == name:
143
+ return s
144
+ raise KeyError(name)
@@ -0,0 +1,32 @@
1
+ import signal
2
+ import sys
3
+
4
+ from pywaybackup import PyWayBackup
5
+ from pywaybackup.arg_parser import Arguments
6
+ from pywaybackup.interactive import Interactive
7
+
8
+
9
+ def cli():
10
+ # interactive only when launched with no args; scripts/cron without a tty get --help instead
11
+ interactive = len(sys.argv) <= 1 and sys.stdin is not None and sys.stdin.isatty()
12
+ try:
13
+ cli_input = Interactive() if interactive else Arguments()
14
+ except (KeyboardInterrupt, EOFError):
15
+ # ignore pyinstaller bl SIGINT while aborting
16
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
17
+ print("\nAborted.")
18
+ sys.exit(130)
19
+ cli_args = cli_input.get_args()
20
+ config = PyWayBackup(**cli_args)
21
+ try:
22
+ config.run(daemon=False)
23
+ finally:
24
+ if interactive:
25
+ try:
26
+ input("\nPress Enter to exit...")
27
+ except (KeyboardInterrupt, EOFError):
28
+ pass
29
+
30
+
31
+ if __name__ == "__main__":
32
+ cli()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.6
3
+ Version: 4.2.0
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -26,11 +26,11 @@ License: MIT License
26
26
  SOFTWARE.
27
27
 
28
28
  Project-URL: homepage, https://github.com/bitdruid/python-wayback-machine-downloader
29
- Requires-Python: <3.14,>=3.8
29
+ Requires-Python: >=3.8
30
30
  Description-Content-Type: text/markdown
31
31
  License-File: LICENSE
32
32
  Requires-Dist: ruff
33
- Requires-Dist: SQLAlchemy==2.0.43
33
+ Requires-Dist: SQLAlchemy==2.0.51
34
34
  Requires-Dist: requests==2.32.3
35
35
  Requires-Dist: tqdm==4.67.1
36
36
  Requires-Dist: python-magic-standalone==0.4.28
@@ -39,7 +39,7 @@ Requires-Dist: python-magic-standalone==0.4.28
39
39
 
40
40
  [![PyPI](https://img.shields.io/pypi/v/pywaybackup)](https://pypi.org/project/pywaybackup/)
41
41
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/pywaybackup)](https://pypi.org/project/pywaybackup/)
42
- ![Python Version](https://img.shields.io/badge/Python-3.8-blue)
42
+ ![Python Version](https://img.shields.io/badge/Python-3.8%2B-blue)
43
43
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
44
44
 
45
45
  Downloading archived web pages from the [Wayback Machine](https://archive.org/web/).
@@ -68,6 +68,13 @@ This tool allows you to download content from the Wayback Machine (archive.org).
68
68
  2. Run the tool <br>
69
69
  `waybackup -h`
70
70
 
71
+ ### Standalone binary
72
+
73
+ Prebuilt executables for Windows, Linux and macOS are attached to each [release](https://github.com/bitdruid/python-wayback-machine-downloader/releases). No Python required.
74
+
75
+ - Run from a terminal with arguments like the pip version: `waybackup -h`
76
+ - Or start it without arguments (e.g. double-click on Windows) to enter **interactive mode** — the tool will prompt you for URL, mode and optional settings.
77
+
71
78
  ### Manual
72
79
 
73
80
  1. Clone the repository <br>
@@ -155,8 +162,9 @@ output:
155
162
 
156
163
  ## cli
157
164
 
158
- - `-h`, `--help`: Show the help message and exit.
159
- - `-v`, `--version`: Show information about the tool and exit.
165
+ - `-h`, `--help`: Show the help message and exit. Version info is shown in the help header.
166
+
167
+ > **Interactive mode:** running `waybackup` without any arguments in a terminal starts a guided prompt for URL, mode and optional settings. Without a terminal (scripts/cron), the help is printed instead.
160
168
 
161
169
  #### Required
162
170
 
@@ -171,6 +179,8 @@ output:
171
179
  Last Version. Gives one folder containing the last version of each file of specified `--range`.
172
180
  - **`-f`**, **`--first`**:<br>
173
181
  First Version. Gives one folder containing the first version of each file of specified `--range`.
182
+ - **`-s`**, **`--save`**:<br>
183
+ Save a page to the wayback machine (no download).
174
184
 
175
185
  #### Optional query parameters
176
186
 
@@ -219,7 +229,7 @@ Parameters will change the download behavior for snapshots.
219
229
  Set verbosity level. Available levels:
220
230
  - `low` (or `quiet`, `minimal`, `min`): Essential output only (same as no flag)
221
231
  - `default` (or `normal`, `verbose`): Standard verbose output (default when flag is set)
222
- - `high` (or `debug`, `detailed`, `max`): Detailed verbose output
232
+ - `high` (or `detailed`, `max`): Detailed verbose output
223
233
 
224
234
  Examples: `--verbose`, `--verbose default`, `--verbose high`, `-v high`
225
235
 
@@ -1,7 +1,6 @@
1
1
  LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
- pywaybackup/Arguments.py
5
4
  pywaybackup/Exception.py
6
5
  pywaybackup/PyWayBackup.py
7
6
  pywaybackup/Snapshot.py
@@ -11,9 +10,12 @@ pywaybackup/Worker.py
11
10
  pywaybackup/__init__.py
12
11
  pywaybackup/archive_download.py
13
12
  pywaybackup/archive_save.py
13
+ pywaybackup/arg_parser.py
14
+ pywaybackup/arg_specs.py
14
15
  pywaybackup/db.py
15
16
  pywaybackup/files.py
16
17
  pywaybackup/helper.py
18
+ pywaybackup/interactive.py
17
19
  pywaybackup/main.py
18
20
  pywaybackup.egg-info/PKG-INFO
19
21
  pywaybackup.egg-info/SOURCES.txt
@@ -1,5 +1,5 @@
1
1
  ruff
2
- SQLAlchemy==2.0.43
2
+ SQLAlchemy==2.0.51
3
3
  requests==2.32.3
4
4
  tqdm==4.67.1
5
5
  python-magic-standalone==0.4.28
@@ -1,71 +0,0 @@
1
- import sys
2
- import argparse
3
-
4
- from argparse import RawTextHelpFormatter
5
-
6
- from importlib.metadata import version
7
-
8
-
9
- class Arguments:
10
- def __init__(self):
11
- parser = argparse.ArgumentParser(
12
- description=f"<<< python-wayback-machine-downloader v{version('pywaybackup')} >>>\nby @bitdruid -> https://github.com/bitdruid",
13
- formatter_class=RawTextHelpFormatter,
14
- )
15
-
16
- required = parser.add_argument_group("required (one exclusive)")
17
- required.add_argument("-u", "--url", type=str, metavar="", help="url (with subdir/subdomain) to download")
18
- exclusive_required = required.add_mutually_exclusive_group(required=True)
19
- exclusive_required.add_argument("-a", "--all", action="store_true", help="download snapshots of all timestamps")
20
- exclusive_required.add_argument("-l", "--last", action="store_true", help="download the last version of each file snapshot")
21
- exclusive_required.add_argument("-f", "--first", action="store_true", help="download the first version of each file snapshot")
22
- exclusive_required.add_argument("-s", "--save", action="store_true", help="save a page to the wayback machine")
23
-
24
- optional = parser.add_argument_group("optional query parameters")
25
- optional.add_argument("-e", "--explicit", action="store_true", help="search only for the explicit given url")
26
- optional.add_argument("-r", "--range", type=int, metavar="", help="range in years to search")
27
- optional.add_argument("--start", type=int, metavar="", help="start timestamp format: YYYYMMDDHHMMSS")
28
- optional.add_argument("--end", type=int, metavar="", help="end timestamp format: YYYYMMDDHHMMSS")
29
- optional.add_argument("--limit", type=int, nargs="?", const=True, metavar="int", help="limit the number of snapshots to download")
30
- optional.add_argument("--filetype", type=str, metavar="", help="filetypes to download comma separated (js,css,...)")
31
- optional.add_argument("--statuscode", type=str, metavar="", help="statuscodes to download comma separated (200,404,...)")
32
-
33
- behavior = parser.add_argument_group("manipulate behavior")
34
- behavior.add_argument("-o", "--output", type=str, metavar="", help="output for all files - defaults to current directory")
35
- behavior.add_argument("-m", "--metadata", type=str, metavar="", help="change directory for db/cdx/csv/log files")
36
- behavior.add_argument(
37
- "-v", "--verbose",
38
- type=str,
39
- nargs="?",
40
- const="default",
41
- metavar="",
42
- help="verbosity level: low, default, high (default if flag set without value)",
43
- )
44
- behavior.add_argument("--log", action="store_true", help="save a log file into the output folder")
45
- behavior.add_argument("--progress", action="store_true", help="show a progress bar")
46
- behavior.add_argument("--no-redirect", action="store_true", help="do not follow redirects by archive.org")
47
- behavior.add_argument("--retry", type=int, default=0, metavar="", help="retry failed downloads (opt tries as int, else infinite)")
48
- behavior.add_argument("--workers", type=int, default=1, metavar="", help="number of workers (simultaneous downloads)")
49
- behavior.add_argument("--delay", type=int, default=0, metavar="", help="delay between each download in seconds")
50
- behavior.add_argument(
51
- "--wait",
52
- type=int,
53
- default=15,
54
- metavar="",
55
- help="seconds to wait before renewing connection after HTTP errors or snapshot download errors (default: 15)",
56
- )
57
-
58
- special = parser.add_argument_group("special")
59
- special.add_argument("--reset", action="store_true", help="reset the job and ignore existing cdx/db/csv files")
60
- special.add_argument("--keep", action="store_true", help="keep all files after the job finished")
61
-
62
- args = parser.parse_args(args=None if sys.argv[1:] else ["--help"]) # if no arguments are given, print help
63
-
64
- args.silent = False
65
- args.debug = True
66
-
67
- self.args = args
68
-
69
- def get_args(self) -> dict:
70
- """Returns the parsed arguments as a dictionary."""
71
- return vars(self.args)
@@ -1,13 +0,0 @@
1
- from pywaybackup import PyWayBackup
2
- from pywaybackup.Arguments import Arguments as args
3
-
4
-
5
- def cli():
6
- cli_input = args()
7
- cli_args = cli_input.get_args()
8
- config = PyWayBackup(**cli_args)
9
- config.run(daemon=False)
10
-
11
-
12
- if __name__ == "__main__":
13
- cli()
File without changes
File without changes