wayback_machine_downloader 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wayback_machine_downloader.rb +8 -7
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a37e6ffa46aae434f606fd8d887c2017a0a06a1
|
4
|
+
data.tar.gz: c05f60c12fec76ec3709371a31fff7f5de17782f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24a3096abfdc4506a873bb784380d169d1a0b45c39f5c1f91538f7c1f39d67d86b0a7d8f74103406b35d4718ab732a2ca442ee8bcbc27caf7bb3db2406530130
|
7
|
+
data.tar.gz: c19c30ccf76b8993e054aff2dabadb000e2b4fe90eff5ae61e93a170ef2aae79fc543b699e78b1df3df23c7b0ef1bc625b55da556943de9554aee3e732aa58a1
|
@@ -4,7 +4,7 @@ require_relative 'wayback_machine_downloader/tidy_bytes'
|
|
4
4
|
|
5
5
|
class WaybackMachineDownloader
|
6
6
|
|
7
|
-
VERSION = "0.1.
|
7
|
+
VERSION = "0.1.18"
|
8
8
|
|
9
9
|
attr_accessor :base_url, :timestamp
|
10
10
|
|
@@ -49,7 +49,7 @@ class WaybackMachineDownloader
|
|
49
49
|
file_list_curated
|
50
50
|
end
|
51
51
|
|
52
|
-
def
|
52
|
+
def get_file_list_by_timestamp
|
53
53
|
file_list_curated = get_file_list_curated
|
54
54
|
file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
|
55
55
|
file_list_curated.map do |file_remote_info|
|
@@ -61,12 +61,13 @@ class WaybackMachineDownloader
|
|
61
61
|
def download_files
|
62
62
|
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..."
|
63
63
|
puts
|
64
|
-
|
64
|
+
file_list_by_timestamp = get_file_list_by_timestamp
|
65
65
|
count = 0
|
66
66
|
file_list_by_timestamp.each do |file_remote_info|
|
67
67
|
count += 1
|
68
68
|
file_url = file_remote_info[:file_url]
|
69
69
|
file_id = file_remote_info[:file_id]
|
70
|
+
file_timestamp = file_remote_info[:timestamp]
|
70
71
|
file_path_elements = file_id.split('/')
|
71
72
|
if file_id == ""
|
72
73
|
dir_path = backup_path
|
@@ -83,7 +84,7 @@ class WaybackMachineDownloader
|
|
83
84
|
structure_dir_path dir_path
|
84
85
|
open(file_path, "wb") do |file|
|
85
86
|
begin
|
86
|
-
open("http://web.archive.org/web/#{
|
87
|
+
open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}") do |uri|
|
87
88
|
file.write(uri.read)
|
88
89
|
end
|
89
90
|
rescue OpenURI::HTTPError => e
|
@@ -96,13 +97,13 @@ class WaybackMachineDownloader
|
|
96
97
|
rescue StandardError => e
|
97
98
|
puts "#{file_url} # #{e}"
|
98
99
|
end
|
99
|
-
puts "#{file_url} -> #{file_path} (#{count}/#{
|
100
|
+
puts "#{file_url} -> #{file_path} (#{count}/#{file_list_by_timestamp.size})"
|
100
101
|
else
|
101
|
-
puts "#{file_url} # #{file_path} already exists. (#{count}/#{
|
102
|
+
puts "#{file_url} # #{file_path} already exists. (#{count}/#{file_list_by_timestamp.size})"
|
102
103
|
end
|
103
104
|
end
|
104
105
|
puts
|
105
|
-
puts "Download complete, saved in #{backup_path} (#{
|
106
|
+
puts "Download complete, saved in #{backup_path} (#{file_list_by_timestamp.size} files)"
|
106
107
|
end
|
107
108
|
|
108
109
|
def structure_dir_path dir_path
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wayback_machine_downloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hartator
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry-rescue
|
@@ -38,6 +38,34 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0.4'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.2'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.2'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5.2'
|
41
69
|
description: Download any website from the Wayback Machine. Wayback Machine by Internet
|
42
70
|
Archive (archive.org) is an awesome tool to view any website at any point of time
|
43
71
|
but lacks an export feature. Wayback Machine Downloader brings exactly this.
|
@@ -70,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
98
|
version: '0'
|
71
99
|
requirements: []
|
72
100
|
rubyforge_project:
|
73
|
-
rubygems_version: 2.4.5
|
101
|
+
rubygems_version: 2.4.5.1
|
74
102
|
signing_key:
|
75
103
|
specification_version: 4
|
76
104
|
summary: Download any website from the Wayback Machine.
|