wayback_archiver 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- NjcyNWE4NTU0MDY1NDYwMTZiY2RkNTkyMTM2ZjE3NTg5YTVhZjdjMA==
5
- data.tar.gz: !binary |-
6
- YmQzYmEwMGVjMGQwNWY4NmNmOWZhY2MwNDIyNmM0MjBjNzg3YmM4ZA==
7
- !binary "U0hBNTEy":
8
- metadata.gz: !binary |-
9
- NzdhNDM5NTI2Njk5YzNmZjNmZTAzMThmZThiOTJlZGZkMWEyMmExMzE5NDli
10
- ZDgzOWE1ZGVjY2FkOTI1M2MxOGQ3Mjc0YzY5NWQyNTRhZWJmOTJhOTBhYjM3
11
- YTc0YTAwZDUzYjcwMjdiNzdjYTQzY2I2NmQ4Y2U3Mzk0NTY3NDk=
12
- data.tar.gz: !binary |-
13
- YTBlY2ViNGE4OWFjMWIwNmQzNTM4ODMzMGI1MjkyNjMzNzMwMmQ4NDJhYjBk
14
- YTk1MjYxYzQzOTZiZDQzODZhZGM4NGRkYjZmMDBiZWJkZTkwN2RlZTZjOWUz
15
- NDVkMjc1N2E2ODdiZGI0OTMwZjAzMmYwZjQyNDg5MGZhNmNlN2Y=
2
+ SHA1:
3
+ metadata.gz: 66f995653260c6d884271708691177c6c4e1c5b2
4
+ data.tar.gz: b46518ada2f179c8034bb48c41f17195e5f4551d
5
+ SHA512:
6
+ metadata.gz: 5837775a0b99935895992589a75490859e4b3c2b089ee20e3d339e52913cfc56c1e5e055bd4650bb5678b93abf76f2d7204a27d67ea1e9403c69f20cdab1d422
7
+ data.tar.gz: 5d256b29a98bfba85dad1559075885fa0b85e9c632971a735207fff7c636aede116b45581cdf72f8a3054cb351f84ae1556ce1849a0f33738989a108bba7f8f1
@@ -1,18 +1,29 @@
1
1
  module WaybackArchiver
2
2
  class Archive
3
+ MAX_THREAD_COUNT = 8
3
4
 
4
- def self.post(urls)
5
- urls.each_with_index do |url, index|
6
- request_url = "#{BASE_URL}#{url}"
7
- puts "Archiving (#{index + 1}/#{urls.length}): #{url}"
8
- begin
9
- res = Request.get_response(request_url)
10
- puts "#{res.code} => #{res.message}"
11
- rescue Exception => e
12
- puts "Error message: #{e.message}"
13
- puts "Failed to archive: #{url}"
5
+ def self.post(all_urls)
6
+ puts "Request will be sent with max #{MAX_THREAD_COUNT} parallel threads"
7
+
8
+ puts "Total urls to be sent: #{all_urls.length}"
9
+ threads = Array.new
10
+ group_size = (all_urls.length / MAX_THREAD_COUNT) + 1
11
+
12
+ all_urls.each_slice(group_size).to_a.each do |urls|
13
+ threads << Thread.new do
14
+ urls.each_with_index do |url, index|
15
+ request_url = "#{BASE_URL}#{url}"
16
+ begin
17
+ res = Request.get_response(request_url)
18
+ print "#{url} #{res.code} => #{res.message} \n"
19
+ rescue Exception => e
20
+ puts "Error message: #{e.message}"
21
+ puts "Failed to archive: #{url}"
22
+ end
23
+ end
14
24
  end
15
25
  end
26
+ threads.each(&:join)
16
27
  end
17
28
 
18
29
  end
@@ -7,7 +7,7 @@ module WaybackArchiver
7
7
  http = Net::HTTP.new(uri.host, uri.port)
8
8
  http.use_ssl = true if url.include?('https://')
9
9
 
10
- request = Net::HTTP::Get.new(uri.request_uri)
10
+ request = Net::HTTP::Get.new(uri.request_uri)
11
11
  response = http.request(request)
12
12
  response
13
13
  end
@@ -1,3 +1,3 @@
1
1
  module WaybackArchiver
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.4'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wayback_archiver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
@@ -28,17 +28,17 @@ dependencies:
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ! '>='
38
+ - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- description: ! 'Send URLs to Wayback Machine. From: sitemap, file or single URL.'
41
+ description: 'Send URLs to Wayback Machine. From: sitemap, file or single URL.'
42
42
  email:
43
43
  - burenstam@gmail.com
44
44
  executables:
@@ -48,9 +48,9 @@ extra_rdoc_files: []
48
48
  files:
49
49
  - bin/wayback_archiver
50
50
  - lib/wayback_archiver/archive.rb
51
- - lib/wayback_archiver/version.rb
52
51
  - lib/wayback_archiver/collector.rb
53
52
  - lib/wayback_archiver/request.rb
53
+ - lib/wayback_archiver/version.rb
54
54
  - lib/wayback_archiver.rb
55
55
  homepage: https://github.com/buren/wayback_archiver
56
56
  licenses:
@@ -62,17 +62,17 @@ require_paths:
62
62
  - lib
63
63
  required_ruby_version: !ruby/object:Gem::Requirement
64
64
  requirements:
65
- - - ! '>='
65
+ - - '>='
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  required_rubygems_version: !ruby/object:Gem::Requirement
69
69
  requirements:
70
- - - ! '>='
70
+ - - '>='
71
71
  - !ruby/object:Gem::Version
72
72
  version: '0'
73
73
  requirements: []
74
74
  rubyforge_project:
75
- rubygems_version: 2.0.6
75
+ rubygems_version: 2.0.0
76
76
  signing_key:
77
77
  specification_version: 4
78
78
  summary: Send URLs to Wayback Machine