title_grabber 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/exe/title-grabber +4 -0
- data/lib/http_helper.rb +3 -2
- data/lib/title_grabber/version.rb +1 -1
- data/lib/title_grabber.rb +3 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6642eaa211c58d0debf01ff8b0129832ef2c83d7e280f048bab0656ae7f0aec1
|
4
|
+
data.tar.gz: 3d9bc77e04fd081dd9c3792c9cc378c6eac60a818c8cdd9efdfed134f52ec57d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5455e96ff7c4ad6654c0c3e57fff4dc60627c55fe586d7bc72f9492c591765451101bfaee1a3537a881713f469600ffc7343081ff9fcc73d56aa733af42e4e1
|
7
|
+
data.tar.gz: 5474dcf7991beb7ea7c48cfe95fa98ab02cd976d068da9a1246ad518eb4963b5c701f46f54c41735b2d77f5309391dcf8f82692b803b401c1b70ef1e75d3147c
|
data/Gemfile.lock
CHANGED
data/exe/title-grabber
CHANGED
@@ -32,6 +32,10 @@ OptionParser.new do |args|
|
|
32
32
|
arguments[:write_to] = timeout
|
33
33
|
end
|
34
34
|
|
35
|
+
args.on("--max-redirects REDIRECTS", Integer, "Max. # of HTTP redirects to follow. Defaults to the value of the MAX_REDIRECTS env var or #{TitleGrabber::MAX_REDIRECTS}") do |redirects|
|
36
|
+
arguments[:max_redirects] = redirects
|
37
|
+
end
|
38
|
+
|
35
39
|
args.on("-r", "--max-retries RETRIES", Integer, "Max. # of times to retry failed HTTP reqs. Defaults to the value of the MAX_RETRIES env var or #{TitleGrabber::MAX_RETRIES}") do |retries|
|
36
40
|
arguments[:max_retries] = retries
|
37
41
|
end
|
data/lib/http_helper.rb
CHANGED
@@ -3,7 +3,6 @@ require "http"
|
|
3
3
|
require_relative "text_helper"
|
4
4
|
|
5
5
|
module HTTPHelper
|
6
|
-
MAX_HOPS = 5
|
7
6
|
INVALID_BYTE_SEQ = "invalid byte sequence".freeze
|
8
7
|
CONNECTION_ERRORS = ["SSL_connect", "Connection reset"].freeze
|
9
8
|
REST_INTERVAL = 0.5..1
|
@@ -17,9 +16,11 @@ module HTTPHelper
|
|
17
16
|
begin
|
18
17
|
res = Timeout.timeout(read_to) {
|
19
18
|
HTTP.timeout(write: write_to, connect: connect_to, read: read_to).
|
20
|
-
follow(max_hops:
|
19
|
+
follow(max_hops: max_redirects).
|
21
20
|
get(url, ssl_context: ssl_ctx)
|
22
21
|
}
|
22
|
+
rescue HTTP::Redirector::TooManyRedirectsError
|
23
|
+
logger.warn "[#{Thread.current.name}] GET #{url} resulted in more than #{max_redirects} redirect#{'s' unless max_redirects == 1}"
|
23
24
|
rescue HTTP::Error, Timeout::Error => err
|
24
25
|
msg = err.message
|
25
26
|
|
data/lib/title_grabber.rb
CHANGED
@@ -16,6 +16,7 @@ module TitleGrabber
|
|
16
16
|
CONNECT_TO = 15
|
17
17
|
READ_TO = 15
|
18
18
|
WRITE_TO = 15
|
19
|
+
MAX_REDIRECTS = 5
|
19
20
|
MAX_RETRIES = 5
|
20
21
|
MAX_THREADS = Etc.nprocessors
|
21
22
|
URL_RE = %r(https?://\S+)i
|
@@ -35,7 +36,7 @@ module TitleGrabber
|
|
35
36
|
include TextHelper
|
36
37
|
|
37
38
|
attr_reader :lines, :out_path, :tmp_path, :connect_to, :read_to, :write_to,
|
38
|
-
:max_retries, :max_threads, :logger
|
39
|
+
:max_redirects, :max_retries, :max_threads, :logger
|
39
40
|
|
40
41
|
def initialize(lines, options)
|
41
42
|
@lines = lines
|
@@ -46,6 +47,7 @@ module TitleGrabber
|
|
46
47
|
@connect_to = options.fetch(:connect_to, CONNECT_TO)
|
47
48
|
@read_to = options.fetch(:read_to, READ_TO)
|
48
49
|
@write_to = options.fetch(:write_to, WRITE_TO)
|
50
|
+
@max_redirects = options.fetch(:max_redirects, MAX_REDIRECTS)
|
49
51
|
@max_retries = options.fetch(:max_retries, MAX_RETRIES)
|
50
52
|
@max_threads = options.fetch(:max_th, Etc.nprocessors)
|
51
53
|
|