pywaybackup 4.1.0__tar.gz → 4.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {pywaybackup-4.1.0/pywaybackup.egg-info → pywaybackup-4.1.2}/PKG-INFO +3 -2
  2. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pyproject.toml +3 -2
  3. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/Exception.py +2 -5
  4. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/PyWayBackup.py +5 -1
  5. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/Snapshot.py +3 -1
  6. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/SnapshotCollection.py +36 -9
  7. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/Verbosity.py +21 -5
  8. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/archive_download.py +53 -23
  9. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/archive_save.py +9 -3
  10. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/db.py +26 -9
  11. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/files.py +3 -1
  12. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/helper.py +3 -6
  13. {pywaybackup-4.1.0 → pywaybackup-4.1.2/pywaybackup.egg-info}/PKG-INFO +3 -2
  14. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup.egg-info/requires.txt +2 -1
  15. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/LICENSE +0 -0
  16. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/README.md +0 -0
  17. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/Arguments.py +0 -0
  18. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/Worker.py +0 -0
  19. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/__init__.py +0 -0
  20. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup/main.py +0 -0
  21. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup.egg-info/SOURCES.txt +0 -0
  22. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup.egg-info/dependency_links.txt +0 -0
  23. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup.egg-info/entry_points.txt +0 -0
  24. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/pywaybackup.egg-info/top_level.txt +0 -0
  25. {pywaybackup-4.1.0 → pywaybackup-4.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.0
3
+ Version: 4.1.2
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -32,8 +32,9 @@ License-File: LICENSE
32
32
  Requires-Dist: SQLAlchemy==2.0.43
33
33
  Requires-Dist: requests==2.32.3
34
34
  Requires-Dist: tqdm==4.67.1
35
+ Requires-Dist: pylibmagic==0.5.0
35
36
  Requires-Dist: python-magic==0.4.27; sys_platform == "linux"
36
- Requires-Dist: python-magic-bin==0.4.14; sys_platform == "win32"
37
+ Requires-Dist: python-magic-bin==0.4.14; sys_platform == "win32" or sys_platform == "darwin"
37
38
 
38
39
  # python wayback machine downloader
39
40
 
@@ -7,7 +7,7 @@ packages = ["pywaybackup"]
7
7
 
8
8
  [project]
9
9
  name = "pywaybackup"
10
- version = "4.1.0"
10
+ version = "4.1.2"
11
11
  description = "Query and download archive.org as simple as possible."
12
12
  authors = [
13
13
  { name = "bitdruid", email = "bitdruid@outlook.com" }
@@ -19,8 +19,9 @@ dependencies = [
19
19
  "SQLAlchemy==2.0.43",
20
20
  "requests==2.32.3",
21
21
  "tqdm==4.67.1",
22
+ "pylibmagic==0.5.0",
22
23
  "python-magic==0.4.27; sys_platform == 'linux'",
23
- "python-magic-bin==0.4.14; sys_platform == 'win32'",
24
+ "python-magic-bin==0.4.14; sys_platform == 'win32' or sys_platform == 'darwin'",
24
25
  ]
25
26
 
26
27
  [project.scripts]
@@ -36,10 +36,7 @@ class Exception:
36
36
  codeline = linecache.getline(filename, tb_line).strip()
37
37
  local_vars = tb_frame.f_locals
38
38
  exception_message += (
39
- f"!-- File: {filename}\n"
40
- f"!-- Function: {func_name}\n"
41
- f"!-- Line: {tb_line}\n"
42
- f"!-- Segment: {codeline}\n"
39
+ f"!-- File: {filename}\n!-- Function: {func_name}\n!-- Line: {tb_line}\n!-- Segment: {codeline}\n"
43
40
  )
44
41
  else:
45
42
  exception_message += "!-- Traceback is None\n"
@@ -96,4 +93,4 @@ class Exception:
96
93
  if issubclass(exception_type, KeyboardInterrupt):
97
94
  sys.__excepthook__(exception_type, exception, traceback)
98
95
  return
99
- Exception.exception('UNCAUGHT EXCEPTION', exception, traceback) # uncaught exceptions also with custom scheme
96
+ Exception.exception("UNCAUGHT EXCEPTION", exception, traceback) # uncaught exceptions also with custom scheme
@@ -407,7 +407,11 @@ class PyWayBackup:
407
407
  "log": self._log,
408
408
  "debug": self._debug,
409
409
  }
410
- return {key: (os.path.relpath(path) if rel else path) for key, path in files.items() if path and os.path.exists(path)}
410
+ return {
411
+ key: (os.path.relpath(path) if rel else path)
412
+ for key, path in files.items()
413
+ if path and os.path.exists(path)
414
+ }
411
415
 
412
416
  def status(self) -> dict:
413
417
  """
@@ -101,7 +101,9 @@ class Snapshot:
101
101
  value: New value to set for the column.
102
102
  """
103
103
  column = getattr(waybackup_snapshots, column)
104
- self._db.session.execute(update(waybackup_snapshots).where(waybackup_snapshots.scid == self.scid).values({column: value}))
104
+ self._db.session.execute(
105
+ update(waybackup_snapshots).where(waybackup_snapshots.scid == self.scid).values({column: value})
106
+ )
105
107
  self._db.session.commit()
106
108
 
107
109
  def create_output(self):
@@ -50,7 +50,9 @@ class SnapshotCollection:
50
50
 
51
51
  def _reset_locked_snapshots(self):
52
52
  """Reset locked snapshots to unprocessed in the database."""
53
- self.db.session.execute(update(waybackup_snapshots).where(waybackup_snapshots.response == "LOCK").values(response=None))
53
+ self.db.session.execute(
54
+ update(waybackup_snapshots).where(waybackup_snapshots.response == "LOCK").values(response=None)
55
+ )
54
56
  self.db.session.commit()
55
57
 
56
58
  def _finalize_db(self):
@@ -140,11 +142,19 @@ class SnapshotCollection:
140
142
  waybackup_snapshots.url_origin,
141
143
  waybackup_snapshots.url_archive,
142
144
  )
143
- .filter(tuple_(waybackup_snapshots.timestamp, waybackup_snapshots.url_origin, waybackup_snapshots.url_archive).in_(keys))
145
+ .filter(
146
+ tuple_(
147
+ waybackup_snapshots.timestamp, waybackup_snapshots.url_origin, waybackup_snapshots.url_archive
148
+ ).in_(keys)
149
+ )
144
150
  .all()
145
151
  )
146
152
  existing_rows = set(existing)
147
- new_rows = [row for row in unique_batch if (row["timestamp"], row["url_origin"], row["url_archive"]) not in existing_rows]
153
+ new_rows = [
154
+ row
155
+ for row in unique_batch
156
+ if (row["timestamp"], row["url_origin"], row["url_archive"]) not in existing_rows
157
+ ]
148
158
  if new_rows:
149
159
  self.db.session.bulk_insert_mappings(waybackup_snapshots, new_rows)
150
160
  self.db.session.commit()
@@ -200,17 +210,25 @@ class SnapshotCollection:
200
210
  # index for filtering last snapshots
201
211
  if self._mode_last:
202
212
  idx1 = Index(
203
- "idx_waybackup_snapshots_url_origin_timestamp_desc", waybackup_snapshots.url_origin, waybackup_snapshots.timestamp.desc()
213
+ "idx_waybackup_snapshots_url_origin_timestamp_desc",
214
+ waybackup_snapshots.url_origin,
215
+ waybackup_snapshots.timestamp.desc(),
204
216
  )
205
217
  idx1.create(self.db.session.bind, checkfirst=True)
206
218
  # index for filtering first snapshots
207
219
  if self._mode_first:
208
220
  idx2 = Index(
209
- "idx_waybackup_snapshots_url_origin_timestamp_asc", waybackup_snapshots.url_origin, waybackup_snapshots.timestamp.asc()
221
+ "idx_waybackup_snapshots_url_origin_timestamp_asc",
222
+ waybackup_snapshots.url_origin,
223
+ waybackup_snapshots.timestamp.asc(),
210
224
  )
211
225
  idx2.create(self.db.session.bind, checkfirst=True)
212
226
  # index for skippable snapshots
213
- idx3 = Index("idx_waybackup_snapshots_timestamp_url_origin_response", waybackup_snapshots.timestamp, waybackup_snapshots.url_origin)
227
+ idx3 = Index(
228
+ "idx_waybackup_snapshots_timestamp_url_origin_response",
229
+ waybackup_snapshots.timestamp,
230
+ waybackup_snapshots.url_origin,
231
+ )
214
232
  idx3.create(self.db.session.bind, checkfirst=True)
215
233
 
216
234
  def _filter_snapshots(self):
@@ -224,7 +242,9 @@ class SnapshotCollection:
224
242
  def _filter_mode():
225
243
  self._filter_mode = 0
226
244
  if self._mode_last or self._mode_first:
227
- ordering = waybackup_snapshots.timestamp.desc() if self._mode_last else waybackup_snapshots.timestamp.asc()
245
+ ordering = (
246
+ waybackup_snapshots.timestamp.desc() if self._mode_last else waybackup_snapshots.timestamp.asc()
247
+ )
228
248
  # assign row numbers per url_origin
229
249
  rownum = (
230
250
  func.row_number()
@@ -266,7 +286,9 @@ class SnapshotCollection:
266
286
 
267
287
  _filter_mode()
268
288
  _enumerate_counter()
269
- self._filter_response = self.db.session.query(waybackup_snapshots).where(waybackup_snapshots.response.in_(["404", "301"])).count()
289
+ self._filter_response = (
290
+ self.db.session.query(waybackup_snapshots).where(waybackup_snapshots.response.in_(["404", "301"])).count()
291
+ )
270
292
  self.db.session.commit()
271
293
 
272
294
  def _skip_set(self):
@@ -280,7 +302,12 @@ class SnapshotCollection:
280
302
  for row in f:
281
303
  self.db.session.execute(
282
304
  update(waybackup_snapshots)
283
- .where(and_(waybackup_snapshots.timestamp == row["timestamp"], waybackup_snapshots.url_origin == row["url_origin"]))
305
+ .where(
306
+ and_(
307
+ waybackup_snapshots.timestamp == row["timestamp"],
308
+ waybackup_snapshots.url_origin == row["url_origin"],
309
+ )
310
+ )
284
311
  .values(
285
312
  url_archive=row["url_archive"],
286
313
  redirect_url=row["redirect_url"],
@@ -68,9 +68,10 @@ class Verbosity:
68
68
  cls.pbar = Progressbar(
69
69
  unit=" snapshot",
70
70
  desc="download file".ljust(15),
71
- total=maxval, ascii="░▒█",
72
- bar_format="{l_bar}{bar:50}{r_bar}{bar:-10b}"
73
- )
71
+ total=maxval,
72
+ ascii="░▒█",
73
+ bar_format="{l_bar}{bar:50}{r_bar}{bar:-10b}",
74
+ )
74
75
  if cls.pbar is not None and progress is not None and progress > 0:
75
76
  cls.pbar.update(progress)
76
77
 
@@ -93,7 +94,15 @@ class Verbosity:
93
94
 
94
95
 
95
96
  class Progressbar(Verbosity):
96
- def __init__(self, unit: str, desc: str, unit_scale: bool = False, total: int = None, ascii: str = None, bar_format: str = None):
97
+ def __init__(
98
+ self,
99
+ unit: str,
100
+ desc: str,
101
+ unit_scale: bool = False,
102
+ total: int = None,
103
+ ascii: str = None,
104
+ bar_format: str = None,
105
+ ):
97
106
  if not super().silent:
98
107
  self.unit = unit
99
108
  self.desc = desc
@@ -101,7 +110,14 @@ class Progressbar(Verbosity):
101
110
  self.total = total
102
111
  self.ascii = ascii
103
112
  self.bar_format = bar_format
104
- self.pbar = tqdm(unit=self.unit, desc=self.desc, unit_scale=self.unit_scale, total=self.total, ascii=self.ascii, bar_format=self.bar_format)
113
+ self.pbar = tqdm(
114
+ unit=self.unit,
115
+ desc=self.desc,
116
+ unit_scale=self.unit_scale,
117
+ total=self.total,
118
+ ascii=self.ascii,
119
+ bar_format=self.bar_format,
120
+ )
105
121
 
106
122
  def update(self, progress: int):
107
123
  """
@@ -4,6 +4,7 @@ import os
4
4
  import threading
5
5
  import time
6
6
  import urllib.parse
7
+ from gzip import BadGzipFile
7
8
  from http import HTTPStatus
8
9
  from importlib.metadata import version
9
10
  from socket import timeout
@@ -168,22 +169,32 @@ class DownloadArchive:
168
169
  try:
169
170
  download_status = self._download(worker=worker)
170
171
 
171
- except (timeout, ConnectionRefusedError, ConnectionResetError, http.client.HTTPException, Exception) as e:
172
+ except (
173
+ timeout,
174
+ ConnectionRefusedError,
175
+ ConnectionResetError,
176
+ http.client.HTTPException,
177
+ Exception,
178
+ ) as e:
172
179
  if isinstance(e, (timeout, ConnectionRefusedError, ConnectionResetError)):
173
180
  if download_attempt < download_max_attempt:
174
181
  download_attempt += 1 # try again 2x with same connection
175
182
  vb.write(
176
183
  verbose=True,
177
- content=f"\n-----> Worker: {worker.id} \
178
- - Attempt: [{worker.attempt}/{retry_max_attempt}] \
179
- Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}] \
180
- - {e.__class__.__name__} - requesting again in 50 seconds...",
184
+ content=(
185
+ f"\n-----> Worker: {worker.id}"
186
+ f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
187
+ f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
188
+ f" - {e.__class__.__name__} - requesting again in 50 seconds..."
189
+ ),
181
190
  )
182
191
  vb.write(
183
192
  verbose=False,
184
- content=f"Worker: {worker.id} \
185
- - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total} \
186
- - requesting again in 50 seconds...",
193
+ content=(
194
+ f"Worker: {worker.id}"
195
+ f" - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total}"
196
+ f" - requesting again in 50 seconds..."
197
+ ),
187
198
  )
188
199
  time.sleep(50)
189
200
  continue
@@ -193,26 +204,32 @@ class DownloadArchive:
193
204
  download_attempt = download_max_attempt # try again 1x with new connection
194
205
  vb.write(
195
206
  verbose=True,
196
- content=f"\n-----> Worker: {worker.id} \
197
- - Attempt: [{worker.attempt}/{retry_max_attempt}] \
198
- Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}] \
199
- - {e.__class__.__name__} - renewing connection in 15 seconds...",
207
+ content=(
208
+ f"\n-----> Worker: {worker.id}"
209
+ f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
210
+ f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
211
+ f" - {e.__class__.__name__} - renewing connection in 15 seconds..."
212
+ ),
200
213
  )
201
214
  vb.write(
202
215
  verbose=False,
203
- content=f"Worker: {worker.id} \
204
- - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total} \
205
- - renewing connection in 15 seconds...",
216
+ content=(
217
+ f"Worker: {worker.id}"
218
+ f" - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total}"
219
+ f" - renewing connection in 15 seconds..."
220
+ ),
206
221
  )
207
222
  time.sleep(15)
208
223
  worker.refresh_connection()
209
224
  continue
210
225
  else:
211
226
  ex.exception(
212
- message=f"\n-----> Worker: {worker.id} \
213
- - Attempt: [{worker.attempt}/{retry_max_attempt}] \
214
- Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}] \
215
- - EXCEPTION - {e}",
227
+ message=(
228
+ f"\n-----> Worker: {worker.id}"
229
+ f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
230
+ f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
231
+ f" - EXCEPTION - {e}"
232
+ ),
216
233
  e=e,
217
234
  )
218
235
  worker.attempt = retry_max_attempt
@@ -279,7 +296,14 @@ class DownloadArchive:
279
296
  if not os.path.isfile(context.output_file):
280
297
  with open(context.output_file, "wb") as file:
281
298
  if context.response.getheader("Content-Encoding") == "gzip":
282
- context.response_data = gzip.decompress(context.response_data)
299
+ try:
300
+ context.response_data = gzip.decompress(context.response_data)
301
+ except BadGzipFile:
302
+ vb.write(
303
+ verbose=None,
304
+ content=f"Worker: {worker.id} - GZIP DECOMPRESS SKIPPED - {context.snapshot_url}",
305
+ )
306
+ pass
283
307
  file.write(context.response_data)
284
308
 
285
309
  # check if file is downloaded
@@ -298,7 +322,9 @@ class DownloadArchive:
298
322
  context (DownloadContext): The download context.
299
323
  worker (Worker): The worker instance.
300
324
  """
301
- worker.message.store(verbose=True, result="REDIRECT", content=f"{context.response_status} {context.response_status_message}")
325
+ worker.message.store(
326
+ verbose=True, result="REDIRECT", content=f"{context.response_status} {context.response_status_message}"
327
+ )
302
328
  worker.message.store(verbose=True, result="", info="FROM", content=context.snapshot_url)
303
329
  for _ in range(5):
304
330
  self.__download_response(context=context, worker=worker)
@@ -354,7 +380,9 @@ class DownloadArchive:
354
380
  Returns:
355
381
  bool: Always True (indicates result was processed).
356
382
  """
357
- worker.message.store(verbose=True, result=result, content=f"{context.response_status} {context.response_status_message}")
383
+ worker.message.store(
384
+ verbose=True, result=result, content=f"{context.response_status} {context.response_status_message}"
385
+ )
358
386
  worker.message.store(verbose=False, result=result)
359
387
  worker.message.store(verbose=True, result="", info="URL", content=context.snapshot_url)
360
388
  worker.message.store(verbose=True, result="", info="FILE", content=context.output_file)
@@ -371,7 +399,9 @@ class DownloadArchive:
371
399
  Returns:
372
400
  bool: Always False (indicates failure was processed).
373
401
  """
374
- worker.message.store(verbose=None, result="UNKNOWN", content=f"{context.response_status} {context.response_status_message}")
402
+ worker.message.store(
403
+ verbose=None, result="UNKNOWN", content=f"{context.response_status} {context.response_status_message}"
404
+ )
375
405
  worker.message.store(verbose=True, result="", info="URL", content=context.snapshot_url)
376
406
  return False
377
407
 
@@ -34,10 +34,13 @@ def save_page(url: str):
34
34
 
35
35
  if response_status == 302:
36
36
  location = response.getheader("Location")
37
- snapshot_timestamp = datetime.strptime(url_get_timestamp(location), "%Y%m%d%H%M%S").strftime("%Y-%m-%d %H:%M:%S")
37
+ snapshot_timestamp = datetime.strptime(url_get_timestamp(location), "%Y%m%d%H%M%S").strftime(
38
+ "%Y-%m-%d %H:%M:%S"
39
+ )
38
40
  current_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
39
41
  timestamp_difference = (
40
- datetime.strptime(current_timestamp, "%Y-%m-%d %H:%M:%S") - datetime.strptime(snapshot_timestamp, "%Y-%m-%d %H:%M:%S")
42
+ datetime.strptime(current_timestamp, "%Y-%m-%d %H:%M:%S")
43
+ - datetime.strptime(snapshot_timestamp, "%Y-%m-%d %H:%M:%S")
41
44
  ).seconds / 60
42
45
  timestamp_difference = int(round(timestamp_difference, 0))
43
46
 
@@ -45,7 +48,10 @@ def save_page(url: str):
45
48
  vb.write(verbose=None, content="\n-----> Response: 302 (new snapshot)")
46
49
  vb.write(verbose=None, content=f"SNAPSHOT URL: {location}")
47
50
  elif timestamp_difference >= 1:
48
- vb.write(verbose=None, content=f"\n-----> Response: 302 (existing snapshot - wait for {60 - timestamp_difference} minutes)")
51
+ vb.write(
52
+ verbose=None,
53
+ content=f"\n-----> Response: 302 (existing snapshot - wait for {60 - timestamp_difference} minutes)",
54
+ )
49
55
  vb.write(verbose=None, content=f"SNAPSHOT URL: {location}")
50
56
  vb.write(verbose=None, content=f"WAYBACK TIME: {snapshot_timestamp}")
51
57
  vb.write(verbose=None, content=f"REQUEST TIME: {current_timestamp}")
@@ -17,6 +17,7 @@ from sqlalchemy import (
17
17
  )
18
18
  from sqlalchemy.ext.declarative import declarative_base
19
19
  from sqlalchemy.orm import sessionmaker
20
+ from typing import Optional # python 3.8
20
21
 
21
22
  Base = declarative_base()
22
23
 
@@ -112,7 +113,9 @@ class Database:
112
113
  Base.metadata.create_all(engine)
113
114
 
114
115
  db = Database()
115
- if db.session.execute(select(waybackup_job.query_identifier).where(query_identifier == query_identifier)).fetchone():
116
+ if db.session.execute(
117
+ select(waybackup_job.query_identifier).where(query_identifier == query_identifier)
118
+ ).fetchone():
116
119
  cls.query_exist = True
117
120
  cls.query_progress = db.get_progress()
118
121
  else:
@@ -139,11 +142,13 @@ class Database:
139
142
  """
140
143
  progress = f"{(done):,} / {(total):,}"
141
144
  self.session.execute(
142
- update(waybackup_job).where(waybackup_job.query_identifier == self.query_identifier).values(query_progress=progress)
145
+ update(waybackup_job)
146
+ .where(waybackup_job.query_identifier == self.query_identifier)
147
+ .values(query_progress=progress)
143
148
  )
144
149
  self.session.commit()
145
150
 
146
- def get_progress(self) -> str | None:
151
+ def get_progress(self) -> Optional[str]:
147
152
  """
148
153
  str or None: Progress string (e.g., '5 / 10') or None if not found.
149
154
  """
@@ -151,7 +156,7 @@ class Database:
151
156
  select(waybackup_job.query_progress).where(waybackup_job.query_identifier == self.query_identifier)
152
157
  ).scalar_one_or_none()
153
158
 
154
- def get_insert_complete(self) -> int | None:
159
+ def get_insert_complete(self) -> Optional[int]:
155
160
  """
156
161
  int or None: 1 if complete, 0 if not, or None if not found.
157
162
  """
@@ -159,7 +164,7 @@ class Database:
159
164
  select(waybackup_job.insert_complete).where(waybackup_job.query_identifier == self.query_identifier)
160
165
  ).scalar_one_or_none()
161
166
 
162
- def get_index_complete(self) -> int | None:
167
+ def get_index_complete(self) -> Optional[int]:
163
168
  """
164
169
  int or None: 1 if complete, 0 if not, or None if not found.
165
170
  """
@@ -167,7 +172,7 @@ class Database:
167
172
  select(waybackup_job.index_complete).where(waybackup_job.query_identifier == self.query_identifier)
168
173
  ).scalar_one_or_none()
169
174
 
170
- def get_filter_complete(self) -> int | None:
175
+ def get_filter_complete(self) -> Optional[int]:
171
176
  """
172
177
  int or None: 1 if complete, 0 if not, or None if not found.
173
178
  """
@@ -179,19 +184,31 @@ class Database:
179
184
  """
180
185
  Mark the job's insertion phase as complete in the database.
181
186
  """
182
- self.session.execute(update(waybackup_job).where(waybackup_job.query_identifier == self.query_identifier).values(insert_complete=1))
187
+ self.session.execute(
188
+ update(waybackup_job)
189
+ .where(waybackup_job.query_identifier == self.query_identifier)
190
+ .values(insert_complete=1)
191
+ )
183
192
  self.session.commit()
184
193
 
185
194
  def set_index_complete(self):
186
195
  """
187
196
  Mark the job's indexing phase as complete in the database.
188
197
  """
189
- self.session.execute(update(waybackup_job).where(waybackup_job.query_identifier == self.query_identifier).values(index_complete=1))
198
+ self.session.execute(
199
+ update(waybackup_job)
200
+ .where(waybackup_job.query_identifier == self.query_identifier)
201
+ .values(index_complete=1)
202
+ )
190
203
  self.session.commit()
191
204
 
192
205
  def set_filter_complete(self):
193
206
  """
194
207
  Mark the job's filtering phase as complete in the database.
195
208
  """
196
- self.session.execute(update(waybackup_job).where(waybackup_job.query_identifier == self.query_identifier).values(filter_complete=1))
209
+ self.session.execute(
210
+ update(waybackup_job)
211
+ .where(waybackup_job.query_identifier == self.query_identifier)
212
+ .values(filter_complete=1)
213
+ )
197
214
  self.session.commit()
@@ -54,7 +54,9 @@ class CDXquery:
54
54
 
55
55
  limit = f"&limit={self.limit}" if self.limit else ""
56
56
 
57
- filter_statuscode = f"&filter=statuscode:({'|'.join(self.filter_statuscode)})$" if self.filter_statuscode else ""
57
+ filter_statuscode = (
58
+ f"&filter=statuscode:({'|'.join(self.filter_statuscode)})$" if self.filter_statuscode else ""
59
+ )
58
60
  filter_filetype = f"&filter=original:.*\\.({'|'.join(self.filter_filetype)})$" if self.filter_filetype else ""
59
61
 
60
62
  return f"https://web.archive.org/cdx/search/cdx?output=json&url={cdx_url}{period}&fl=timestamp,digest,mimetype,statuscode,original{limit}{filter_filetype}{filter_statuscode}"
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import shutil
3
+ import pylibmagic
3
4
  import magic
4
5
 
5
6
 
@@ -88,18 +89,14 @@ def move_index(existpath: str = None, existfile: str = None, filebuffer: bytes =
88
89
  shutil.move(existpath, existpath + "_exist")
89
90
  os.makedirs(existpath, exist_ok=True)
90
91
  if not check_index_mime(existpath):
91
- new_file = os.path.join(
92
- existpath, os.path.basename(os.path.normpath(existpath))
93
- )
92
+ new_file = os.path.join(existpath, os.path.basename(os.path.normpath(existpath)))
94
93
  else:
95
94
  new_file = os.path.join(existpath, "index.html")
96
95
  shutil.move(existpath + "_exist", new_file)
97
96
  elif existfile:
98
97
  if filebuffer:
99
98
  if not check_index_mime(filebuffer):
100
- return os.path.join(
101
- existfile, os.path.basename(os.path.normpath(existfile))
102
- )
99
+ return os.path.join(existfile, os.path.basename(os.path.normpath(existfile)))
103
100
  else:
104
101
  return os.path.join(existfile, "index.html")
105
102
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pywaybackup
3
- Version: 4.1.0
3
+ Version: 4.1.2
4
4
  Summary: Query and download archive.org as simple as possible.
5
5
  Author-email: bitdruid <bitdruid@outlook.com>
6
6
  License: MIT License
@@ -32,8 +32,9 @@ License-File: LICENSE
32
32
  Requires-Dist: SQLAlchemy==2.0.43
33
33
  Requires-Dist: requests==2.32.3
34
34
  Requires-Dist: tqdm==4.67.1
35
+ Requires-Dist: pylibmagic==0.5.0
35
36
  Requires-Dist: python-magic==0.4.27; sys_platform == "linux"
36
- Requires-Dist: python-magic-bin==0.4.14; sys_platform == "win32"
37
+ Requires-Dist: python-magic-bin==0.4.14; sys_platform == "win32" or sys_platform == "darwin"
37
38
 
38
39
  # python wayback machine downloader
39
40
 
@@ -1,9 +1,10 @@
1
1
  SQLAlchemy==2.0.43
2
2
  requests==2.32.3
3
3
  tqdm==4.67.1
4
+ pylibmagic==0.5.0
4
5
 
5
6
  [:sys_platform == "linux"]
6
7
  python-magic==0.4.27
7
8
 
8
- [:sys_platform == "win32"]
9
+ [:sys_platform == "win32" or sys_platform == "darwin"]
9
10
  python-magic-bin==0.4.14
File without changes
File without changes
File without changes