curation 1.1 → 1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/curation.rb +15 -13
- data/lib/curation/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21a638c75deffb70db8694ecb99d94b1b711fba922ecbd0826d3a69597031df6
|
4
|
+
data.tar.gz: 621519940fe6bc44302201f64211eba932d52ed366ebe0c6f11c02688b5ae0ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf5f63d05793d71ac052b94dcaf5f4e34943355799e042361ae704ca9423b501ab3e157ae8b6509da642e6fa680768205030a24ddc1eb4e24c31863284289575
|
7
|
+
data.tar.gz: d09140b59ba119e85e9c955a40586bd1ce6410fe392a59b5e4a92e2a1ae80b4608fd458c0b84b24be8ba31267bcee9550432264e6ca4068be47b90db46228ed1
|
data/lib/curation.rb
CHANGED
@@ -19,8 +19,9 @@ module Curation
|
|
19
19
|
'[style*="display: none;"]', '[style*="display: none"]', '[aria-hidden="true"]'
|
20
20
|
]
|
21
21
|
|
22
|
-
def initialize(url)
|
22
|
+
def initialize(url, html = nil)
|
23
23
|
@url = url
|
24
|
+
@html = html
|
24
25
|
end
|
25
26
|
|
26
27
|
def title
|
@@ -29,7 +30,8 @@ module Curation
|
|
29
30
|
return ld['headline'] if ld.has_key? 'headline'
|
30
31
|
end
|
31
32
|
end
|
32
|
-
metainspector.best_title
|
33
|
+
metainspector.best_title unless metainspector.best_title.blank?
|
34
|
+
metainspector.title
|
33
35
|
end
|
34
36
|
|
35
37
|
def image
|
@@ -46,7 +48,7 @@ module Curation
|
|
46
48
|
return ld['articleBody'] if ld.has_key? 'articleBody'
|
47
49
|
end
|
48
50
|
end
|
49
|
-
h =
|
51
|
+
h = nokogiri.dup
|
50
52
|
BLACKLIST.each do |tag|
|
51
53
|
h.css(tag).remove
|
52
54
|
end
|
@@ -73,17 +75,11 @@ module Curation
|
|
73
75
|
metainspector.images.best
|
74
76
|
end
|
75
77
|
|
76
|
-
def html
|
77
|
-
@html ||= Nokogiri::HTML data
|
78
|
-
rescue
|
79
|
-
puts "Nokogiri error"
|
80
|
-
end
|
81
|
-
|
82
78
|
def json_ld
|
83
79
|
unless @json_ld
|
84
80
|
@json_ld = []
|
85
81
|
begin
|
86
|
-
options =
|
82
|
+
options = nokogiri.css('[type="application/ld+json"]')
|
87
83
|
options.each do |option|
|
88
84
|
string = option.inner_text
|
89
85
|
hash = JSON.parse(string)
|
@@ -96,14 +92,20 @@ module Curation
|
|
96
92
|
@json_ld
|
97
93
|
end
|
98
94
|
|
99
|
-
def
|
100
|
-
URI.open url
|
95
|
+
def html
|
96
|
+
@html ||= URI.open url
|
101
97
|
rescue
|
102
98
|
puts "Impossible to open #{url}"
|
103
99
|
end
|
104
100
|
|
101
|
+
def nokogiri
|
102
|
+
@nokogiri ||= Nokogiri::HTML html
|
103
|
+
rescue
|
104
|
+
puts "Nokogiri error"
|
105
|
+
end
|
106
|
+
|
105
107
|
def metainspector
|
106
|
-
@metainspector ||= MetaInspector.new url
|
108
|
+
@metainspector ||= MetaInspector.new url, document: html
|
107
109
|
rescue
|
108
110
|
puts "MetaInspector error"
|
109
111
|
end
|
data/lib/curation/version.rb
CHANGED