wayback_machine_downloader 0.1.9 → 0.1.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/wayback_machine_downloader.rb +8 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 496908f840d930edf5e8b5f3b5c59e3d58e73be9
|
4
|
+
data.tar.gz: 063fd94ab7d33a93144d2681cf95441cd569aa4d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2297fb81c3f58ceab2ce9ce4deabfc4a9a2b1d3005f935ec1f19ab296120860a04704944b34e7fd85d75269f402b05fe113911aaae60c7b49f9093ed4b9feaf
|
7
|
+
data.tar.gz: 35ee0a0087c9bff653e874baa4c4a5dfeed9e62d08dfcef0d8b4aea39c933589aa7976382625f40e32c7fc0f45f8b11e3628db55338bce1a163be3847b345b27
|
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'open-uri'
|
2
2
|
require 'fileutils'
|
3
|
+
require_relative 'tidy_bytes'
|
3
4
|
|
4
5
|
class WaybackMachineDownloader
|
5
6
|
|
6
|
-
VERSION = "0.1.
|
7
|
+
VERSION = "0.1.10"
|
7
8
|
|
8
9
|
attr_accessor :base_url, :timestamp
|
9
10
|
|
@@ -31,7 +32,10 @@ class WaybackMachineDownloader
|
|
31
32
|
file_url = line[2]
|
32
33
|
file_id = file_url.split('/')[3..-1].join('/')
|
33
34
|
file_id = URI.unescape file_id
|
34
|
-
|
35
|
+
file_id = file_id.tidy_bytes unless file_id == ""
|
36
|
+
if file_id.nil?
|
37
|
+
puts "Malformed file url, ignoring: #{file_url}"
|
38
|
+
elsif @timestamp == 0 or file_timestamp <= @timestamp
|
35
39
|
if file_list_curated[file_id]
|
36
40
|
unless file_list_curated[file_id][:timestamp] > file_timestamp
|
37
41
|
file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
|
@@ -55,7 +59,7 @@ class WaybackMachineDownloader
|
|
55
59
|
end
|
56
60
|
|
57
61
|
def download_files
|
58
|
-
puts "Downlading #{@base_url} from Wayback Machine..."
|
62
|
+
puts "Downlading #{@base_url} to #{backup_path} from Wayback Machine..."
|
59
63
|
puts
|
60
64
|
file_list_curated = get_file_list_curated
|
61
65
|
count = 0
|
@@ -84,7 +88,7 @@ class WaybackMachineDownloader
|
|
84
88
|
rescue OpenURI::HTTPError => e
|
85
89
|
puts "#{file_url} # #{e}"
|
86
90
|
file.write(e.io.read)
|
87
|
-
rescue
|
91
|
+
rescue StandardError => e
|
88
92
|
puts "#{file_url} # #{e}"
|
89
93
|
end
|
90
94
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wayback_machine_downloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hartator
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-08-
|
11
|
+
date: 2015-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry-rescue
|