title_grabber 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 870ae6b9b5903d1516e9678f25a85f3852a059ed2bd1b6a23d8289dcf75370b2
4
- data.tar.gz: 26e430fb06e441b39297b416b27c21428b24eedd8d8f28dd9e329e1f6027ebec
3
+ metadata.gz: 7532ff11d949765bb08b4ae1deeefc1aea1c70e8b794139cf747cf10c948e433
4
+ data.tar.gz: f59192b55720a85dab8767b03346fc335e825d5696d20f7ac46e0880a081b96d
5
5
  SHA512:
6
- metadata.gz: 4a76a1cfe72a0296a1db4d46e38ee43f54cddd07a0cf1a11c40eb95f687376f610a810e87886b6910c6ecb9673723b9f35b4d13fca6a0bbcb8ee799a929ae420
7
- data.tar.gz: 64fc74972c50ea504aa44d35079db6e0ffd4531df7a2b1e7f9f0df1a134e0f12edf76009a31eeed795f7d905c224ebd2b653c8885716199cecf1fa23575ff7ec
6
+ metadata.gz: 1102126705a068b7e7e92721c93cc283bf9b4bef28a49c7799aeb3f03825548751ebb108861e73952ba79de57620d4e9390a6a95df32b9a04cc2e7a75df704c5
7
+ data.tar.gz: 24f10e575e1f5b706877e952e7fefdcba17a589a3a0f0282c697d6b1882e79f40066bac01509e5d3ed43d50f2102e68ff3f2baae5c5bd9c3eecd295e86f5af03
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- title_grabber (0.2.3)
4
+ title_grabber (0.2.4)
5
5
  http (~> 4.1)
6
6
  oga (~> 2.15)
7
7
 
@@ -1,3 +1,3 @@
1
1
  module TitleGrabber
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
data/lib/title_grabber.rb CHANGED
@@ -16,32 +16,23 @@ module TitleGrabber
16
16
  ART_TIT_HEAD = -"article_title"
17
17
  HEADERS = [URL_HEADER, PAGE_TIT_HEAD, ART_TIT_HEAD].freeze
18
18
 
19
- class << self
19
+ def self.call(lines, options)
20
+ MultiThreadedGrabber.new(lines, options).call
21
+ end
22
+
23
+ class MultiThreadedGrabber
20
24
  include HTTPHelper
21
25
  include TextHelper
22
26
 
23
- def call(lines, options)
24
- out_path = options[:output]
25
-
26
- processed_urls = if out_path.exist?
27
- arr_of_h = CSV.read(out_path, headers: true)
28
- arr_of_h.each_with_object({}) { |r, h|
29
- page_tit = r[PAGE_TIT_HEAD]
30
- art_tit = r[ART_TIT_HEAD]
31
-
32
- unless page_tit.empty? && art_tit.empty?
33
- h[r[URL_HEADER]] = { PAGE_TIT_HEAD => page_tit,
34
- ART_TIT_HEAD => art_tit }
35
- end
36
- }.tap do
37
- arr_of_h = nil
38
- end
39
- else
40
- {}
41
- end
27
+ attr_reader :lines, :out_path
42
28
 
43
- queue = Queue.new
29
+ def initialize(lines, options)
30
+ @lines = lines
31
+ @out_path = options[:output]
32
+ end
44
33
 
34
+ def call
35
+ queue = Queue.new
45
36
  tmp_path = out_path.sub_ext(".tmp#{out_path.extname}")
46
37
  CSV.open(tmp_path, "w", force_quotes: true) do |csv|
47
38
  csv << HEADERS
@@ -89,5 +80,27 @@ module TitleGrabber
89
80
 
90
81
  FileUtils.mv(tmp_path, out_path)
91
82
  end
83
+
84
+ private
85
+
86
+ def processed_urls
87
+ @processed_urls ||= begin
88
+ urls = {}
89
+
90
+ if out_path.exist?
91
+ CSV.foreach(out_path, headers: true) do |r|
92
+ page_tit = r[PAGE_TIT_HEAD]
93
+ art_tit = r[ART_TIT_HEAD]
94
+
95
+ unless page_tit.empty? && art_tit.empty?
96
+ urls[r[URL_HEADER]] = { PAGE_TIT_HEAD => page_tit,
97
+ ART_TIT_HEAD => art_tit }
98
+ end
99
+ end
100
+ end
101
+
102
+ urls
103
+ end
104
+ end
92
105
  end
93
106
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: title_grabber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch