wayback_machine_downloader 0.1.17 → 0.1.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/wayback_machine_downloader.rb +8 -7
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a37e6ffa46aae434f606fd8d887c2017a0a06a1
|
4
|
+
data.tar.gz: c05f60c12fec76ec3709371a31fff7f5de17782f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24a3096abfdc4506a873bb784380d169d1a0b45c39f5c1f91538f7c1f39d67d86b0a7d8f74103406b35d4718ab732a2ca442ee8bcbc27caf7bb3db2406530130
|
7
|
+
data.tar.gz: c19c30ccf76b8993e054aff2dabadb000e2b4fe90eff5ae61e93a170ef2aae79fc543b699e78b1df3df23c7b0ef1bc625b55da556943de9554aee3e732aa58a1
|
@@ -4,7 +4,7 @@ require_relative 'wayback_machine_downloader/tidy_bytes'
|
|
4
4
|
|
5
5
|
class WaybackMachineDownloader
|
6
6
|
|
7
|
-
VERSION = "0.1.
|
7
|
+
VERSION = "0.1.18"
|
8
8
|
|
9
9
|
attr_accessor :base_url, :timestamp
|
10
10
|
|
@@ -49,7 +49,7 @@ class WaybackMachineDownloader
|
|
49
49
|
file_list_curated
|
50
50
|
end
|
51
51
|
|
52
|
-
def
|
52
|
+
def get_file_list_by_timestamp
|
53
53
|
file_list_curated = get_file_list_curated
|
54
54
|
file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
|
55
55
|
file_list_curated.map do |file_remote_info|
|
@@ -61,12 +61,13 @@ class WaybackMachineDownloader
|
|
61
61
|
def download_files
|
62
62
|
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..."
|
63
63
|
puts
|
64
|
-
|
64
|
+
file_list_by_timestamp = get_file_list_by_timestamp
|
65
65
|
count = 0
|
66
66
|
file_list_by_timestamp.each do |file_remote_info|
|
67
67
|
count += 1
|
68
68
|
file_url = file_remote_info[:file_url]
|
69
69
|
file_id = file_remote_info[:file_id]
|
70
|
+
file_timestamp = file_remote_info[:timestamp]
|
70
71
|
file_path_elements = file_id.split('/')
|
71
72
|
if file_id == ""
|
72
73
|
dir_path = backup_path
|
@@ -83,7 +84,7 @@ class WaybackMachineDownloader
|
|
83
84
|
structure_dir_path dir_path
|
84
85
|
open(file_path, "wb") do |file|
|
85
86
|
begin
|
86
|
-
open("http://web.archive.org/web/#{
|
87
|
+
open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}") do |uri|
|
87
88
|
file.write(uri.read)
|
88
89
|
end
|
89
90
|
rescue OpenURI::HTTPError => e
|
@@ -96,13 +97,13 @@ class WaybackMachineDownloader
|
|
96
97
|
rescue StandardError => e
|
97
98
|
puts "#{file_url} # #{e}"
|
98
99
|
end
|
99
|
-
puts "#{file_url} -> #{file_path} (#{count}/#{
|
100
|
+
puts "#{file_url} -> #{file_path} (#{count}/#{file_list_by_timestamp.size})"
|
100
101
|
else
|
101
|
-
puts "#{file_url} # #{file_path} already exists. (#{count}/#{
|
102
|
+
puts "#{file_url} # #{file_path} already exists. (#{count}/#{file_list_by_timestamp.size})"
|
102
103
|
end
|
103
104
|
end
|
104
105
|
puts
|
105
|
-
puts "Download complete, saved in #{backup_path} (#{
|
106
|
+
puts "Download complete, saved in #{backup_path} (#{file_list_by_timestamp.size} files)"
|
106
107
|
end
|
107
108
|
|
108
109
|
def structure_dir_path dir_path
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wayback_machine_downloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hartator
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry-rescue
|
@@ -38,6 +38,34 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0.4'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.2'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.2'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5.2'
|
41
69
|
description: Download any website from the Wayback Machine. Wayback Machine by Internet
|
42
70
|
Archive (archive.org) is an awesome tool to view any website at any point of time
|
43
71
|
but lacks an export feature. Wayback Machine Downloader brings exactly this.
|
@@ -70,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
98
|
version: '0'
|
71
99
|
requirements: []
|
72
100
|
rubyforge_project:
|
73
|
-
rubygems_version: 2.4.5
|
101
|
+
rubygems_version: 2.4.5.1
|
74
102
|
signing_key:
|
75
103
|
specification_version: 4
|
76
104
|
summary: Download any website from the Wayback Machine.
|