title_grabber 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 870ae6b9b5903d1516e9678f25a85f3852a059ed2bd1b6a23d8289dcf75370b2
4
- data.tar.gz: 26e430fb06e441b39297b416b27c21428b24eedd8d8f28dd9e329e1f6027ebec
3
+ metadata.gz: 7532ff11d949765bb08b4ae1deeefc1aea1c70e8b794139cf747cf10c948e433
4
+ data.tar.gz: f59192b55720a85dab8767b03346fc335e825d5696d20f7ac46e0880a081b96d
5
5
  SHA512:
6
- metadata.gz: 4a76a1cfe72a0296a1db4d46e38ee43f54cddd07a0cf1a11c40eb95f687376f610a810e87886b6910c6ecb9673723b9f35b4d13fca6a0bbcb8ee799a929ae420
7
- data.tar.gz: 64fc74972c50ea504aa44d35079db6e0ffd4531df7a2b1e7f9f0df1a134e0f12edf76009a31eeed795f7d905c224ebd2b653c8885716199cecf1fa23575ff7ec
6
+ metadata.gz: 1102126705a068b7e7e92721c93cc283bf9b4bef28a49c7799aeb3f03825548751ebb108861e73952ba79de57620d4e9390a6a95df32b9a04cc2e7a75df704c5
7
+ data.tar.gz: 24f10e575e1f5b706877e952e7fefdcba17a589a3a0f0282c697d6b1882e79f40066bac01509e5d3ed43d50f2102e68ff3f2baae5c5bd9c3eecd295e86f5af03
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- title_grabber (0.2.3)
4
+ title_grabber (0.2.4)
5
5
  http (~> 4.1)
6
6
  oga (~> 2.15)
7
7
 
@@ -1,3 +1,3 @@
1
1
  module TitleGrabber
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
data/lib/title_grabber.rb CHANGED
@@ -16,32 +16,23 @@ module TitleGrabber
16
16
  ART_TIT_HEAD = -"article_title"
17
17
  HEADERS = [URL_HEADER, PAGE_TIT_HEAD, ART_TIT_HEAD].freeze
18
18
 
19
- class << self
19
+ def self.call(lines, options)
20
+ MultiThreadedGrabber.new(lines, options).call
21
+ end
22
+
23
+ class MultiThreadedGrabber
20
24
  include HTTPHelper
21
25
  include TextHelper
22
26
 
23
- def call(lines, options)
24
- out_path = options[:output]
25
-
26
- processed_urls = if out_path.exist?
27
- arr_of_h = CSV.read(out_path, headers: true)
28
- arr_of_h.each_with_object({}) { |r, h|
29
- page_tit = r[PAGE_TIT_HEAD]
30
- art_tit = r[ART_TIT_HEAD]
31
-
32
- unless page_tit.empty? && art_tit.empty?
33
- h[r[URL_HEADER]] = { PAGE_TIT_HEAD => page_tit,
34
- ART_TIT_HEAD => art_tit }
35
- end
36
- }.tap do
37
- arr_of_h = nil
38
- end
39
- else
40
- {}
41
- end
27
+ attr_reader :lines, :out_path
42
28
 
43
- queue = Queue.new
29
+ def initialize(lines, options)
30
+ @lines = lines
31
+ @out_path = options[:output]
32
+ end
44
33
 
34
+ def call
35
+ queue = Queue.new
45
36
  tmp_path = out_path.sub_ext(".tmp#{out_path.extname}")
46
37
  CSV.open(tmp_path, "w", force_quotes: true) do |csv|
47
38
  csv << HEADERS
@@ -89,5 +80,27 @@ module TitleGrabber
89
80
 
90
81
  FileUtils.mv(tmp_path, out_path)
91
82
  end
83
+
84
+ private
85
+
86
+ def processed_urls
87
+ @processed_urls ||= begin
88
+ urls = {}
89
+
90
+ if out_path.exist?
91
+ CSV.foreach(out_path, headers: true) do |r|
92
+ page_tit = r[PAGE_TIT_HEAD]
93
+ art_tit = r[ART_TIT_HEAD]
94
+
95
+ unless page_tit.empty? && art_tit.empty?
96
+ urls[r[URL_HEADER]] = { PAGE_TIT_HEAD => page_tit,
97
+ ART_TIT_HEAD => art_tit }
98
+ end
99
+ end
100
+ end
101
+
102
+ urls
103
+ end
104
+ end
92
105
  end
93
106
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: title_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch