curation 1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14637da5bf7b047f9c34b40ff4bf108110dce7408d3c12af9e76222141bdcf65
4
- data.tar.gz: 144487e448354476958895db783fdc681d0116ec3eca5e7ede1cd3cf1bd1d13a
3
+ metadata.gz: 21a638c75deffb70db8694ecb99d94b1b711fba922ecbd0826d3a69597031df6
4
+ data.tar.gz: 621519940fe6bc44302201f64211eba932d52ed366ebe0c6f11c02688b5ae0ba
5
5
  SHA512:
6
- metadata.gz: 303babe96daee5e792672e1ece05b49d77c115e393af00ded6902b10b9d4adb51210923c7d6c0e68a359246078618a8d09606ab57dd57fa0882db7e3a474e3e4
7
- data.tar.gz: 451a1ad8866adc91850ff9b150847564bb9de43b15432de2dbf06fb2936a3788ed2c0bf4d9e3c9c462e13bdb5c6236bf5dc0c97a6697a8c3ea9c307bf2f35410
6
+ metadata.gz: bf5f63d05793d71ac052b94dcaf5f4e34943355799e042361ae704ca9423b501ab3e157ae8b6509da642e6fa680768205030a24ddc1eb4e24c31863284289575
7
+ data.tar.gz: d09140b59ba119e85e9c955a40586bd1ce6410fe392a59b5e4a92e2a1ae80b4608fd458c0b84b24be8ba31267bcee9550432264e6ca4068be47b90db46228ed1
@@ -19,8 +19,9 @@ module Curation
19
19
  '[style*="display: none;"]', '[style*="display: none"]', '[aria-hidden="true"]'
20
20
  ]
21
21
 
22
- def initialize(url)
22
+ def initialize(url, html = nil)
23
23
  @url = url
24
+ @html = html
24
25
  end
25
26
 
26
27
  def title
@@ -29,7 +30,8 @@ module Curation
29
30
  return ld['headline'] if ld.has_key? 'headline'
30
31
  end
31
32
  end
32
- metainspector.best_title
33
+ metainspector.best_title unless metainspector.best_title.blank?
34
+ metainspector.title
33
35
  end
34
36
 
35
37
  def image
@@ -46,7 +48,7 @@ module Curation
46
48
  return ld['articleBody'] if ld.has_key? 'articleBody'
47
49
  end
48
50
  end
49
- h = html.dup
51
+ h = nokogiri.dup
50
52
  BLACKLIST.each do |tag|
51
53
  h.css(tag).remove
52
54
  end
@@ -73,17 +75,11 @@ module Curation
73
75
  metainspector.images.best
74
76
  end
75
77
 
76
- def html
77
- @html ||= Nokogiri::HTML data
78
- rescue
79
- puts "Nokogiri error"
80
- end
81
-
82
78
  def json_ld
83
79
  unless @json_ld
84
80
  @json_ld = []
85
81
  begin
86
- options = html.css('[type="application/ld+json"]')
82
+ options = nokogiri.css('[type="application/ld+json"]')
87
83
  options.each do |option|
88
84
  string = option.inner_text
89
85
  hash = JSON.parse(string)
@@ -96,14 +92,20 @@ module Curation
96
92
  @json_ld
97
93
  end
98
94
 
99
- def data
100
- URI.open url
95
+ def html
96
+ @html ||= URI.open url
101
97
  rescue
102
98
  puts "Impossible to open #{url}"
103
99
  end
104
100
 
101
+ def nokogiri
102
+ @nokogiri ||= Nokogiri::HTML html
103
+ rescue
104
+ puts "Nokogiri error"
105
+ end
106
+
105
107
  def metainspector
106
- @metainspector ||= MetaInspector.new url
108
+ @metainspector ||= MetaInspector.new url, document: html
107
109
  rescue
108
110
  puts "MetaInspector error"
109
111
  end
@@ -1,3 +1,3 @@
1
1
  module Curation
2
- VERSION = "1.1"
2
+ VERSION = "1.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: curation
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.1'
4
+ version: '1.2'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy