title_grabber 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92a1bcb0847898f17c59d593ab9cba0399698ca4fb895a04e0f640ba13cabb23
4
- data.tar.gz: 4a207b40f92fafbe8b6aff81fe982c7e572894bbb7b36950d80c515d741c920a
3
+ metadata.gz: 8a8b57f801507ef8c856ebd741b247c30e08472b5df3be45d30e6fd7fa3c0092
4
+ data.tar.gz: 75ff595060fe8dd71a669ebadcf38f040d5c47492a5c6ea770e893832493ddff
5
5
  SHA512:
6
- metadata.gz: f908ad1996a3228474d75c73ae519a14cfaa3455973f4cac7a33547d5ab36ab6c39ff80837c205cb746bb74b9baca63e2ffb4fb72394cd7801a1c56b2ac162fc
7
- data.tar.gz: 54cf0f2d8d67341964d7d8e5487eb89ef1672f7d96d4c92cabbad6fd2c1d5ffad4d6116d0793ee26722600268704dca840707f8c9300a9ebbb22b8be02ba0473
6
+ metadata.gz: 661dd87e4e60dcfd4a66168799f3b07babf5c21a890b2e61afb99b527b1e15cb7ebd875a60f39bd12cc2f06ef286b603efb89c4908f461606fa3c60a09ad5db6
7
+ data.tar.gz: b6b1152856c43702082a788a1a056ccc2cef93101faada8890eda27b949d424cf3b9a83991fd91e07733f2c860003b26644510ae9635ea5c6336a1990f997445
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- title_grabber (0.3.3)
4
+ title_grabber (0.3.4)
5
5
  http (~> 4.1)
6
6
  oga (~> 2.15)
7
7
 
data/lib/http_helper.rb CHANGED
@@ -11,7 +11,7 @@ module HTTPHelper
11
11
  include TextHelper
12
12
 
13
13
  def open_w_timeout(url, write_to:, connect_to:, read_to:, max_retries:)
14
- logger.info "[Thread: ##{Thread.current[:id]}] GET #{url}"
14
+ logger.info "[#{Thread.current.name}] GET #{url}"
15
15
  retries = 0
16
16
 
17
17
  begin
@@ -30,16 +30,16 @@ module HTTPHelper
30
30
 
31
31
  if retries <= max_retries
32
32
  rest_time = rand(REST_INTERVAL)
33
- logger.warn "[Thread: ##{Thread.current[:id]}] URL: #{url} [#{msg}] - Going to sleep for #{rest_time.round(1)} secs - Retry ##{retries}"
33
+ logger.warn "[#{Thread.current.name}] URL: #{url} [#{msg}] - Going to sleep for #{rest_time.round(1)} secs - Retry ##{retries}"
34
34
  sleep(rest_time)
35
35
  retry
36
36
  else
37
- logger.error "[Thread: ##{Thread.current[:id]}] URL: #{url} [#{msg}]"
37
+ logger.error "[#{Thread.current.name}] URL: #{url} [#{msg}]"
38
38
  nil
39
39
  end
40
40
  end
41
41
  rescue => err
42
- logger.error "[Thread: ##{Thread.current[:id]}] URL: #{url} [#{err.message}]"
42
+ logger.error "[#{Thread.current.name}] URL: #{url} [#{err.message}]"
43
43
  nil
44
44
  else
45
45
  utf8_encode(body)
data/lib/title_grabber.rb CHANGED
@@ -58,8 +58,6 @@ module TitleGrabber
58
58
  end
59
59
 
60
60
  def call
61
- install_at_exit_handler
62
-
63
61
  queue = Queue.new
64
62
  CSV.open(tmp_path, "w", force_quotes: true) do |csv|
65
63
  csv << HEADERS
@@ -81,7 +79,7 @@ module TitleGrabber
81
79
  thr_cnt = [max_threads, queue.size].min
82
80
  threads = 1.upto(thr_cnt).map.with_index { |_, i|
83
81
  Thread.new(i) do |j|
84
- Thread.current[:id] = i + 1
82
+ Thread.current.name = "Thread ##{i + 1}"
85
83
 
86
84
  url = begin
87
85
  queue.pop(true)
@@ -92,7 +90,7 @@ module TitleGrabber
92
90
  doc = begin
93
91
  Oga.parse_html(html)
94
92
  rescue ArgumentError, LL::ParserError => err
95
- logger.error "[Thread: ##{Thread.current[:id]}] Unable to parse HTML from URL '#{url}' - #{err.message}"
93
+ logger.error "[#{Thread.current.name}] Unable to parse HTML from URL '#{url}' - #{err.message}"
96
94
  nil
97
95
  end
98
96
 
@@ -114,20 +112,16 @@ module TitleGrabber
114
112
  end
115
113
  }.each(&:join)
116
114
  end
115
+ ensure
116
+ if tmp_path.size?
117
+ FileUtils.mv(tmp_path, out_path)
118
+ else
119
+ tmp_path.unlink if tmp_path.exist?
120
+ end
117
121
  end
118
122
 
119
123
  private
120
124
 
121
- def install_at_exit_handler
122
- at_exit do
123
- if tmp_path.size?
124
- FileUtils.mv(tmp_path, out_path)
125
- else
126
- tmp_path.unlink if tmp_path.exist?
127
- end
128
- end
129
- end
130
-
131
125
  def processed_urls
132
126
  @processed_urls ||= begin
133
127
  urls = {}
@@ -1,3 +1,3 @@
1
1
  module TitleGrabber
2
- VERSION = "0.3.3"
2
+ VERSION = "0.3.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: title_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-06 00:00:00.000000000 Z
11
+ date: 2019-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http