curation 1.1 → 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/curation.rb +15 -13
- data/lib/curation/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21a638c75deffb70db8694ecb99d94b1b711fba922ecbd0826d3a69597031df6
|
4
|
+
data.tar.gz: 621519940fe6bc44302201f64211eba932d52ed366ebe0c6f11c02688b5ae0ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf5f63d05793d71ac052b94dcaf5f4e34943355799e042361ae704ca9423b501ab3e157ae8b6509da642e6fa680768205030a24ddc1eb4e24c31863284289575
|
7
|
+
data.tar.gz: d09140b59ba119e85e9c955a40586bd1ce6410fe392a59b5e4a92e2a1ae80b4608fd458c0b84b24be8ba31267bcee9550432264e6ca4068be47b90db46228ed1
|
data/lib/curation.rb
CHANGED
@@ -19,8 +19,9 @@ module Curation
|
|
19
19
|
'[style*="display: none;"]', '[style*="display: none"]', '[aria-hidden="true"]'
|
20
20
|
]
|
21
21
|
|
22
|
-
def initialize(url)
|
22
|
+
def initialize(url, html = nil)
|
23
23
|
@url = url
|
24
|
+
@html = html
|
24
25
|
end
|
25
26
|
|
26
27
|
def title
|
@@ -29,7 +30,8 @@ module Curation
|
|
29
30
|
return ld['headline'] if ld.has_key? 'headline'
|
30
31
|
end
|
31
32
|
end
|
32
|
-
metainspector.best_title
|
33
|
+
metainspector.best_title unless metainspector.best_title.blank?
|
34
|
+
metainspector.title
|
33
35
|
end
|
34
36
|
|
35
37
|
def image
|
@@ -46,7 +48,7 @@ module Curation
|
|
46
48
|
return ld['articleBody'] if ld.has_key? 'articleBody'
|
47
49
|
end
|
48
50
|
end
|
49
|
-
h =
|
51
|
+
h = nokogiri.dup
|
50
52
|
BLACKLIST.each do |tag|
|
51
53
|
h.css(tag).remove
|
52
54
|
end
|
@@ -73,17 +75,11 @@ module Curation
|
|
73
75
|
metainspector.images.best
|
74
76
|
end
|
75
77
|
|
76
|
-
def html
|
77
|
-
@html ||= Nokogiri::HTML data
|
78
|
-
rescue
|
79
|
-
puts "Nokogiri error"
|
80
|
-
end
|
81
|
-
|
82
78
|
def json_ld
|
83
79
|
unless @json_ld
|
84
80
|
@json_ld = []
|
85
81
|
begin
|
86
|
-
options =
|
82
|
+
options = nokogiri.css('[type="application/ld+json"]')
|
87
83
|
options.each do |option|
|
88
84
|
string = option.inner_text
|
89
85
|
hash = JSON.parse(string)
|
@@ -96,14 +92,20 @@ module Curation
|
|
96
92
|
@json_ld
|
97
93
|
end
|
98
94
|
|
99
|
-
def
|
100
|
-
URI.open url
|
95
|
+
def html
|
96
|
+
@html ||= URI.open url
|
101
97
|
rescue
|
102
98
|
puts "Impossible to open #{url}"
|
103
99
|
end
|
104
100
|
|
101
|
+
def nokogiri
|
102
|
+
@nokogiri ||= Nokogiri::HTML html
|
103
|
+
rescue
|
104
|
+
puts "Nokogiri error"
|
105
|
+
end
|
106
|
+
|
105
107
|
def metainspector
|
106
|
-
@metainspector ||= MetaInspector.new url
|
108
|
+
@metainspector ||= MetaInspector.new url, document: html
|
107
109
|
rescue
|
108
110
|
puts "MetaInspector error"
|
109
111
|
end
|
data/lib/curation/version.rb
CHANGED