wayback_archiver 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +6 -14
- data/lib/wayback_archiver/archive.rb +21 -10
- data/lib/wayback_archiver/request.rb +1 -1
- data/lib/wayback_archiver/version.rb +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
metadata.gz: !binary |-
|
9
|
-
NzdhNDM5NTI2Njk5YzNmZjNmZTAzMThmZThiOTJlZGZkMWEyMmExMzE5NDli
|
10
|
-
ZDgzOWE1ZGVjY2FkOTI1M2MxOGQ3Mjc0YzY5NWQyNTRhZWJmOTJhOTBhYjM3
|
11
|
-
YTc0YTAwZDUzYjcwMjdiNzdjYTQzY2I2NmQ4Y2U3Mzk0NTY3NDk=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
YTBlY2ViNGE4OWFjMWIwNmQzNTM4ODMzMGI1MjkyNjMzNzMwMmQ4NDJhYjBk
|
14
|
-
YTk1MjYxYzQzOTZiZDQzODZhZGM4NGRkYjZmMDBiZWJkZTkwN2RlZTZjOWUz
|
15
|
-
NDVkMjc1N2E2ODdiZGI0OTMwZjAzMmYwZjQyNDg5MGZhNmNlN2Y=
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 66f995653260c6d884271708691177c6c4e1c5b2
|
4
|
+
data.tar.gz: b46518ada2f179c8034bb48c41f17195e5f4551d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5837775a0b99935895992589a75490859e4b3c2b089ee20e3d339e52913cfc56c1e5e055bd4650bb5678b93abf76f2d7204a27d67ea1e9403c69f20cdab1d422
|
7
|
+
data.tar.gz: 5d256b29a98bfba85dad1559075885fa0b85e9c632971a735207fff7c636aede116b45581cdf72f8a3054cb351f84ae1556ce1849a0f33738989a108bba7f8f1
|
@@ -1,18 +1,29 @@
|
|
1
1
|
module WaybackArchiver
|
2
2
|
class Archive
|
3
|
+
MAX_THREAD_COUNT = 8
|
3
4
|
|
4
|
-
def self.post(
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
def self.post(all_urls)
|
6
|
+
puts "Request will be sent with max #{MAX_THREAD_COUNT} parallel threads"
|
7
|
+
|
8
|
+
puts "Total urls to be sent: #{all_urls.length}"
|
9
|
+
threads = Array.new
|
10
|
+
group_size = (all_urls.length / MAX_THREAD_COUNT) + 1
|
11
|
+
|
12
|
+
all_urls.each_slice(group_size).to_a.each do |urls|
|
13
|
+
threads << Thread.new do
|
14
|
+
urls.each_with_index do |url, index|
|
15
|
+
request_url = "#{BASE_URL}#{url}"
|
16
|
+
begin
|
17
|
+
res = Request.get_response(request_url)
|
18
|
+
print "#{url} #{res.code} => #{res.message} \n"
|
19
|
+
rescue Exception => e
|
20
|
+
puts "Error message: #{e.message}"
|
21
|
+
puts "Failed to archive: #{url}"
|
22
|
+
end
|
23
|
+
end
|
14
24
|
end
|
15
25
|
end
|
26
|
+
threads.each(&:join)
|
16
27
|
end
|
17
28
|
|
18
29
|
end
|
@@ -7,7 +7,7 @@ module WaybackArchiver
|
|
7
7
|
http = Net::HTTP.new(uri.host, uri.port)
|
8
8
|
http.use_ssl = true if url.include?('https://')
|
9
9
|
|
10
|
-
request
|
10
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
11
11
|
response = http.request(request)
|
12
12
|
response
|
13
13
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wayback_archiver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jacob Burenstam
|
@@ -28,17 +28,17 @@ dependencies:
|
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '>='
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
description:
|
41
|
+
description: 'Send URLs to Wayback Machine. From: sitemap, file or single URL.'
|
42
42
|
email:
|
43
43
|
- burenstam@gmail.com
|
44
44
|
executables:
|
@@ -48,9 +48,9 @@ extra_rdoc_files: []
|
|
48
48
|
files:
|
49
49
|
- bin/wayback_archiver
|
50
50
|
- lib/wayback_archiver/archive.rb
|
51
|
-
- lib/wayback_archiver/version.rb
|
52
51
|
- lib/wayback_archiver/collector.rb
|
53
52
|
- lib/wayback_archiver/request.rb
|
53
|
+
- lib/wayback_archiver/version.rb
|
54
54
|
- lib/wayback_archiver.rb
|
55
55
|
homepage: https://github.com/buren/wayback_archiver
|
56
56
|
licenses:
|
@@ -62,17 +62,17 @@ require_paths:
|
|
62
62
|
- lib
|
63
63
|
required_ruby_version: !ruby/object:Gem::Requirement
|
64
64
|
requirements:
|
65
|
-
- -
|
65
|
+
- - '>='
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '0'
|
68
68
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
69
|
requirements:
|
70
|
-
- -
|
70
|
+
- - '>='
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: '0'
|
73
73
|
requirements: []
|
74
74
|
rubyforge_project:
|
75
|
-
rubygems_version: 2.0.
|
75
|
+
rubygems_version: 2.0.0
|
76
76
|
signing_key:
|
77
77
|
specification_version: 4
|
78
78
|
summary: Send URLs to Wayback Machine
|