title_grabber 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92a1bcb0847898f17c59d593ab9cba0399698ca4fb895a04e0f640ba13cabb23
4
- data.tar.gz: 4a207b40f92fafbe8b6aff81fe982c7e572894bbb7b36950d80c515d741c920a
3
+ metadata.gz: 8a8b57f801507ef8c856ebd741b247c30e08472b5df3be45d30e6fd7fa3c0092
4
+ data.tar.gz: 75ff595060fe8dd71a669ebadcf38f040d5c47492a5c6ea770e893832493ddff
5
5
  SHA512:
6
- metadata.gz: f908ad1996a3228474d75c73ae519a14cfaa3455973f4cac7a33547d5ab36ab6c39ff80837c205cb746bb74b9baca63e2ffb4fb72394cd7801a1c56b2ac162fc
7
- data.tar.gz: 54cf0f2d8d67341964d7d8e5487eb89ef1672f7d96d4c92cabbad6fd2c1d5ffad4d6116d0793ee26722600268704dca840707f8c9300a9ebbb22b8be02ba0473
6
+ metadata.gz: 661dd87e4e60dcfd4a66168799f3b07babf5c21a890b2e61afb99b527b1e15cb7ebd875a60f39bd12cc2f06ef286b603efb89c4908f461606fa3c60a09ad5db6
7
+ data.tar.gz: b6b1152856c43702082a788a1a056ccc2cef93101faada8890eda27b949d424cf3b9a83991fd91e07733f2c860003b26644510ae9635ea5c6336a1990f997445
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- title_grabber (0.3.3)
4
+ title_grabber (0.3.4)
5
5
  http (~> 4.1)
6
6
  oga (~> 2.15)
7
7
 
data/lib/http_helper.rb CHANGED
@@ -11,7 +11,7 @@ module HTTPHelper
11
11
  include TextHelper
12
12
 
13
13
  def open_w_timeout(url, write_to:, connect_to:, read_to:, max_retries:)
14
- logger.info "[Thread: ##{Thread.current[:id]}] GET #{url}"
14
+ logger.info "[#{Thread.current.name}] GET #{url}"
15
15
  retries = 0
16
16
 
17
17
  begin
@@ -30,16 +30,16 @@ module HTTPHelper
30
30
 
31
31
  if retries <= max_retries
32
32
  rest_time = rand(REST_INTERVAL)
33
- logger.warn "[Thread: ##{Thread.current[:id]}] URL: #{url} [#{msg}] - Going to sleep for #{rest_time.round(1)} secs - Retry ##{retries}"
33
+ logger.warn "[#{Thread.current.name}] URL: #{url} [#{msg}] - Going to sleep for #{rest_time.round(1)} secs - Retry ##{retries}"
34
34
  sleep(rest_time)
35
35
  retry
36
36
  else
37
- logger.error "[Thread: ##{Thread.current[:id]}] URL: #{url} [#{msg}]"
37
+ logger.error "[#{Thread.current.name}] URL: #{url} [#{msg}]"
38
38
  nil
39
39
  end
40
40
  end
41
41
  rescue => err
42
- logger.error "[Thread: ##{Thread.current[:id]}] URL: #{url} [#{err.message}]"
42
+ logger.error "[#{Thread.current.name}] URL: #{url} [#{err.message}]"
43
43
  nil
44
44
  else
45
45
  utf8_encode(body)
data/lib/title_grabber.rb CHANGED
@@ -58,8 +58,6 @@ module TitleGrabber
58
58
  end
59
59
 
60
60
  def call
61
- install_at_exit_handler
62
-
63
61
  queue = Queue.new
64
62
  CSV.open(tmp_path, "w", force_quotes: true) do |csv|
65
63
  csv << HEADERS
@@ -81,7 +79,7 @@ module TitleGrabber
81
79
  thr_cnt = [max_threads, queue.size].min
82
80
  threads = 1.upto(thr_cnt).map.with_index { |_, i|
83
81
  Thread.new(i) do |j|
84
- Thread.current[:id] = i + 1
82
+ Thread.current.name = "Thread ##{i + 1}"
85
83
 
86
84
  url = begin
87
85
  queue.pop(true)
@@ -92,7 +90,7 @@ module TitleGrabber
92
90
  doc = begin
93
91
  Oga.parse_html(html)
94
92
  rescue ArgumentError, LL::ParserError => err
95
- logger.error "[Thread: ##{Thread.current[:id]}] Unable to parse HTML from URL '#{url}' - #{err.message}"
93
+ logger.error "[#{Thread.current.name}] Unable to parse HTML from URL '#{url}' - #{err.message}"
96
94
  nil
97
95
  end
98
96
 
@@ -114,20 +112,16 @@ module TitleGrabber
114
112
  end
115
113
  }.each(&:join)
116
114
  end
115
+ ensure
116
+ if tmp_path.size?
117
+ FileUtils.mv(tmp_path, out_path)
118
+ else
119
+ tmp_path.unlink if tmp_path.exist?
120
+ end
117
121
  end
118
122
 
119
123
  private
120
124
 
121
- def install_at_exit_handler
122
- at_exit do
123
- if tmp_path.size?
124
- FileUtils.mv(tmp_path, out_path)
125
- else
126
- tmp_path.unlink if tmp_path.exist?
127
- end
128
- end
129
- end
130
-
131
125
  def processed_urls
132
126
  @processed_urls ||= begin
133
127
  urls = {}
@@ -1,3 +1,3 @@
1
1
  module TitleGrabber
2
- VERSION = "0.3.3"
2
+ VERSION = "0.3.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: title_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-06 00:00:00.000000000 Z
11
+ date: 2019-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http