title_grabber 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/exe/title-grabber +4 -0
- data/lib/http_helper.rb +3 -2
- data/lib/title_grabber/version.rb +1 -1
- data/lib/title_grabber.rb +3 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6642eaa211c58d0debf01ff8b0129832ef2c83d7e280f048bab0656ae7f0aec1
|
4
|
+
data.tar.gz: 3d9bc77e04fd081dd9c3792c9cc378c6eac60a818c8cdd9efdfed134f52ec57d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5455e96ff7c4ad6654c0c3e57fff4dc60627c55fe586d7bc72f9492c591765451101bfaee1a3537a881713f469600ffc7343081ff9fcc73d56aa733af42e4e1
|
7
|
+
data.tar.gz: 5474dcf7991beb7ea7c48cfe95fa98ab02cd976d068da9a1246ad518eb4963b5c701f46f54c41735b2d77f5309391dcf8f82692b803b401c1b70ef1e75d3147c
|
data/Gemfile.lock
CHANGED
data/exe/title-grabber
CHANGED
@@ -32,6 +32,10 @@ OptionParser.new do |args|
|
|
32
32
|
arguments[:write_to] = timeout
|
33
33
|
end
|
34
34
|
|
35
|
+
args.on("--max-redirects REDIRECTS", Integer, "Max. # of HTTP redirects to follow. Defaults to the value of the MAX_REDIRECTS env var or #{TitleGrabber::MAX_REDIRECTS}") do |redirects|
|
36
|
+
arguments[:max_redirects] = redirects
|
37
|
+
end
|
38
|
+
|
35
39
|
args.on("-r", "--max-retries RETRIES", Integer, "Max. # of times to retry failed HTTP reqs. Defaults to the value of the MAX_RETRIES env var or #{TitleGrabber::MAX_RETRIES}") do |retries|
|
36
40
|
arguments[:max_retries] = retries
|
37
41
|
end
|
data/lib/http_helper.rb
CHANGED
@@ -3,7 +3,6 @@ require "http"
|
|
3
3
|
require_relative "text_helper"
|
4
4
|
|
5
5
|
module HTTPHelper
|
6
|
-
MAX_HOPS = 5
|
7
6
|
INVALID_BYTE_SEQ = "invalid byte sequence".freeze
|
8
7
|
CONNECTION_ERRORS = ["SSL_connect", "Connection reset"].freeze
|
9
8
|
REST_INTERVAL = 0.5..1
|
@@ -17,9 +16,11 @@ module HTTPHelper
|
|
17
16
|
begin
|
18
17
|
res = Timeout.timeout(read_to) {
|
19
18
|
HTTP.timeout(write: write_to, connect: connect_to, read: read_to).
|
20
|
-
follow(max_hops:
|
19
|
+
follow(max_hops: max_redirects).
|
21
20
|
get(url, ssl_context: ssl_ctx)
|
22
21
|
}
|
22
|
+
rescue HTTP::Redirector::TooManyRedirectsError
|
23
|
+
logger.warn "[#{Thread.current.name}] GET #{url} resulted in more than #{max_redirects} redirect#{'s' unless max_redirects == 1}"
|
23
24
|
rescue HTTP::Error, Timeout::Error => err
|
24
25
|
msg = err.message
|
25
26
|
|
data/lib/title_grabber.rb
CHANGED
@@ -16,6 +16,7 @@ module TitleGrabber
|
|
16
16
|
CONNECT_TO = 15
|
17
17
|
READ_TO = 15
|
18
18
|
WRITE_TO = 15
|
19
|
+
MAX_REDIRECTS = 5
|
19
20
|
MAX_RETRIES = 5
|
20
21
|
MAX_THREADS = Etc.nprocessors
|
21
22
|
URL_RE = %r(https?://\S+)i
|
@@ -35,7 +36,7 @@ module TitleGrabber
|
|
35
36
|
include TextHelper
|
36
37
|
|
37
38
|
attr_reader :lines, :out_path, :tmp_path, :connect_to, :read_to, :write_to,
|
38
|
-
:max_retries, :max_threads, :logger
|
39
|
+
:max_redirects, :max_retries, :max_threads, :logger
|
39
40
|
|
40
41
|
def initialize(lines, options)
|
41
42
|
@lines = lines
|
@@ -46,6 +47,7 @@ module TitleGrabber
|
|
46
47
|
@connect_to = options.fetch(:connect_to, CONNECT_TO)
|
47
48
|
@read_to = options.fetch(:read_to, READ_TO)
|
48
49
|
@write_to = options.fetch(:write_to, WRITE_TO)
|
50
|
+
@max_redirects = options.fetch(:max_redirects, MAX_REDIRECTS)
|
49
51
|
@max_retries = options.fetch(:max_retries, MAX_RETRIES)
|
50
52
|
@max_threads = options.fetch(:max_th, Etc.nprocessors)
|
51
53
|
|