curation 1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14637da5bf7b047f9c34b40ff4bf108110dce7408d3c12af9e76222141bdcf65
4
- data.tar.gz: 144487e448354476958895db783fdc681d0116ec3eca5e7ede1cd3cf1bd1d13a
3
+ metadata.gz: 21a638c75deffb70db8694ecb99d94b1b711fba922ecbd0826d3a69597031df6
4
+ data.tar.gz: 621519940fe6bc44302201f64211eba932d52ed366ebe0c6f11c02688b5ae0ba
5
5
  SHA512:
6
- metadata.gz: 303babe96daee5e792672e1ece05b49d77c115e393af00ded6902b10b9d4adb51210923c7d6c0e68a359246078618a8d09606ab57dd57fa0882db7e3a474e3e4
7
- data.tar.gz: 451a1ad8866adc91850ff9b150847564bb9de43b15432de2dbf06fb2936a3788ed2c0bf4d9e3c9c462e13bdb5c6236bf5dc0c97a6697a8c3ea9c307bf2f35410
6
+ metadata.gz: bf5f63d05793d71ac052b94dcaf5f4e34943355799e042361ae704ca9423b501ab3e157ae8b6509da642e6fa680768205030a24ddc1eb4e24c31863284289575
7
+ data.tar.gz: d09140b59ba119e85e9c955a40586bd1ce6410fe392a59b5e4a92e2a1ae80b4608fd458c0b84b24be8ba31267bcee9550432264e6ca4068be47b90db46228ed1
@@ -19,8 +19,9 @@ module Curation
19
19
  '[style*="display: none;"]', '[style*="display: none"]', '[aria-hidden="true"]'
20
20
  ]
21
21
 
22
- def initialize(url)
22
+ def initialize(url, html = nil)
23
23
  @url = url
24
+ @html = html
24
25
  end
25
26
 
26
27
  def title
@@ -29,7 +30,8 @@ module Curation
29
30
  return ld['headline'] if ld.has_key? 'headline'
30
31
  end
31
32
  end
32
- metainspector.best_title
33
+ metainspector.best_title unless metainspector.best_title.blank?
34
+ metainspector.title
33
35
  end
34
36
 
35
37
  def image
@@ -46,7 +48,7 @@ module Curation
46
48
  return ld['articleBody'] if ld.has_key? 'articleBody'
47
49
  end
48
50
  end
49
- h = html.dup
51
+ h = nokogiri.dup
50
52
  BLACKLIST.each do |tag|
51
53
  h.css(tag).remove
52
54
  end
@@ -73,17 +75,11 @@ module Curation
73
75
  metainspector.images.best
74
76
  end
75
77
 
76
- def html
77
- @html ||= Nokogiri::HTML data
78
- rescue
79
- puts "Nokogiri error"
80
- end
81
-
82
78
  def json_ld
83
79
  unless @json_ld
84
80
  @json_ld = []
85
81
  begin
86
- options = html.css('[type="application/ld+json"]')
82
+ options = nokogiri.css('[type="application/ld+json"]')
87
83
  options.each do |option|
88
84
  string = option.inner_text
89
85
  hash = JSON.parse(string)
@@ -96,14 +92,20 @@ module Curation
96
92
  @json_ld
97
93
  end
98
94
 
99
- def data
100
- URI.open url
95
+ def html
96
+ @html ||= URI.open url
101
97
  rescue
102
98
  puts "Impossible to open #{url}"
103
99
  end
104
100
 
101
+ def nokogiri
102
+ @nokogiri ||= Nokogiri::HTML html
103
+ rescue
104
+ puts "Nokogiri error"
105
+ end
106
+
105
107
  def metainspector
106
- @metainspector ||= MetaInspector.new url
108
+ @metainspector ||= MetaInspector.new url, document: html
107
109
  rescue
108
110
  puts "MetaInspector error"
109
111
  end
@@ -1,3 +1,3 @@
1
1
  module Curation
2
- VERSION = "1.1"
2
+ VERSION = "1.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: curation
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.1'
4
+ version: '1.2'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy