metainspector 5.7.0 → 5.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 860350c6b4704259715f0a0eea047893134d774928b0bf78c9633f5afec480ff
4
- data.tar.gz: d0abcb709893e63be252886723f8f202137dae3d02b506173a59b3368dcda7f7
3
+ metadata.gz: a349b7103e8b214dc3211da2bc46d830e8e88b85fa7bde9b734ff3ba13802be0
4
+ data.tar.gz: fc4b691504551ee1c0ca6df5f92f5f5dbf5f2ebfa71415cff5259a67aff17c5f
5
5
  SHA512:
6
- metadata.gz: ab74dde15e1864f4c747be953a512278578ab231bea4637e1fb752234766c5b895b47405bd8572c06fd421cd16170e69c4331fb3462a7177ea4c6c521748e1f9
7
- data.tar.gz: fde5cf878c7320d49c82aa430b2fc422044a07168e4b6e6cce53d3b33655b28f6ee811cc535ffca06be46c19403b83e7c453aa942895b9e77c675468fa3fea12
6
+ metadata.gz: 1a268679f23f0ecfdd4358304999200175c78c2bef37ad1d5acdba641b46512ec6e6dca8d18005b87bc7fbc9b632592ac657818c99d540324685a56912a8985c
7
+ data.tar.gz: 8f90160c9f923e69157cfc8c2d6b13f386dd394501d37330a6ede47e587d9474adbe9b932df86340fc3c14754eedd59538eef65944801f6567b26189be8db077
@@ -1,5 +1,10 @@
1
1
  # MetaInpector Changelog
2
2
 
3
+ ## [Changes in 5.7](https://github.com/jaimeiniesta/metainspector/compare/v5.6.0...v5.7.0)
4
+
5
+ * Avoids normalizing image URLs. https://github.com/jaimeiniesta/metainspector/pull/241
6
+ * Adds `NonHtmlErrorException` instead of `ParserError` https://github.com/jaimeiniesta/metainspector/pull/248
7
+
3
8
  ## [Changes in 5.6](https://github.com/jaimeiniesta/metainspector/compare/v5.5.0...v5.6.0)
4
9
 
5
10
  * New feature: `:encoding` option for force encoding of a parsed document.
data/README.md CHANGED
@@ -85,6 +85,12 @@ page.author # author of the page from the meta author tag
85
85
  page.best_author # best author of the page, from a selection of candidates
86
86
  page.description # returns the meta description
87
87
  page.best_description # returns the first non-empty description between the following candidates: standard meta description, og:description, twitter:description, the first long paragraph
88
+ page.h1 # returns h1 text array
89
+ page.h2 # returns h2 text array
90
+ page.h3 # returns h3 text array
91
+ page.h4 # returns h4 text array
92
+ page.h5 # returns h5 text array
93
+ page.h6 # returns h6 text array
88
94
  ```
89
95
 
90
96
  ### Links
@@ -48,7 +48,7 @@ module MetaInspector
48
48
  delegate [:content_type, :response] => :@request
49
49
 
50
50
  delegate [:parsed, :title, :best_title, :author, :best_author,
51
- :description, :best_description, :links,
51
+ :h1, :h2, :h3, :h4, :h5, :h6, :description, :best_description, :links,
52
52
  :images, :feed, :charset, :meta_tags,
53
53
  :meta_tag, :meta, :favicon,
54
54
  :head_links, :stylesheets, :canonicals] => :@parser
@@ -66,6 +66,12 @@ module MetaInspector
66
66
  'best_author' => best_author,
67
67
  'description' => description,
68
68
  'best_description' => best_description,
69
+ 'h1' => h1,
70
+ 'h2' => h2,
71
+ 'h3' => h3,
72
+ 'h4' => h4,
73
+ 'h5' => h5,
74
+ 'h6' => h6,
69
75
  'links' => links.to_hash,
70
76
  'images' => images.to_a,
71
77
  'charset' => charset,
@@ -26,7 +26,8 @@ module MetaInspector
26
26
  delegate [:head_links, :stylesheets, :canonicals, :feed] => :@head_links_parser
27
27
  delegate [:links, :base_url] => :@links_parser
28
28
  delegate :images => :@images_parser
29
- delegate [:title, :best_title, :author, :best_author, :description, :best_description] => :@texts_parser
29
+ delegate [:title, :best_title, :author, :best_author, :description, :best_description,
30
+ :h1, :h2, :h3, :h4, :h5, :h6] => :@texts_parser
30
31
 
31
32
  # Returns the whole parsed document
32
33
  def parsed
@@ -13,6 +13,30 @@ module MetaInspector
13
13
  @best_title ||= find_best_title
14
14
  end
15
15
 
16
+ def h1
17
+ @h1 ||= find_heading('h1')
18
+ end
19
+
20
+ def h2
21
+ @h2 ||= find_heading('h2')
22
+ end
23
+
24
+ def h3
25
+ @h3 ||= find_heading('h3')
26
+ end
27
+
28
+ def h4
29
+ @h4 ||= find_heading('h4')
30
+ end
31
+
32
+ def h5
33
+ @h5 ||= find_heading('h5')
34
+ end
35
+
36
+ def h6
37
+ @h6 ||= find_heading('h6')
38
+ end
39
+
16
40
  # Returns the meta author, if present
17
41
  def author
18
42
  @author ||= meta['author']
@@ -45,6 +69,10 @@ module MetaInspector
45
69
 
46
70
  private
47
71
 
72
+ def find_heading(heading)
73
+ parsed.css(heading).map { |tag| tag.inner_text.strip.gsub(/\s+/, ' ') }.reject(&:empty?)
74
+ end
75
+
48
76
  # Look for candidates per list of priority
49
77
  def find_best_title
50
78
  candidates = [
@@ -1,3 +1,3 @@
1
1
  module MetaInspector
2
- VERSION = '5.7.0'
2
+ VERSION = '5.8.0'
3
3
  end
@@ -15,19 +15,19 @@ Gem::Specification.new do |gem|
15
15
  gem.version = MetaInspector::VERSION
16
16
 
17
17
  gem.add_dependency 'nokogiri', '~> 1.10.4'
18
- gem.add_dependency 'faraday', '~> 0.15.3'
19
- gem.add_dependency 'faraday_middleware', '~> 0.12.2'
18
+ gem.add_dependency 'faraday', '~> 0.17.0'
19
+ gem.add_dependency 'faraday_middleware', '~> 0.13.1'
20
20
  gem.add_dependency 'faraday-cookie_jar', '~> 0.0.6'
21
21
  gem.add_dependency 'faraday-http-cache', '~> 2.0.0'
22
22
  gem.add_dependency 'faraday-encoding', '~> 0.0.5'
23
- gem.add_dependency 'addressable', '~> 2.5.2'
24
- gem.add_dependency 'fastimage', '~> 2.1.4'
23
+ gem.add_dependency 'addressable', '~> 2.7.0'
24
+ gem.add_dependency 'fastimage', '~> 2.1.7'
25
25
  gem.add_dependency 'nesty', '~> 1.0.2'
26
26
 
27
- gem.add_development_dependency 'rspec', '~> 3.8.0'
27
+ gem.add_development_dependency 'rspec', '~> 3.9.0'
28
28
  gem.add_development_dependency 'webmock', '~> 3.7.6'
29
29
  gem.add_development_dependency 'awesome_print', '~> 1.8.0'
30
- gem.add_development_dependency 'rake', '~> 12.3.1'
30
+ gem.add_development_dependency 'rake', '~> 13.0.0'
31
31
  gem.add_development_dependency 'pry', '~> 0.12.2'
32
- gem.add_development_dependency 'rubocop', '~> 0.60.0'
32
+ gem.add_development_dependency 'rubocop', '~> 0.75.1'
33
33
  end
@@ -44,6 +44,12 @@ describe MetaInspector::Document do
44
44
  "images" => ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"],
45
45
  "charset" => "utf-8",
46
46
  "feed" => "http://feeds.feedburner.com/PageRankAlert",
47
+ "h1" => [],
48
+ "h2" => ["Track your PageRank changes"],
49
+ "h3" => ["WHAT'S YOUR PAGERANK?"],
50
+ "h4" => ["Build your own lists", "Get e-mail alerts", "Track your history"],
51
+ "h5" => [],
52
+ "h6" => [],
47
53
  "content_type" => "text/html",
48
54
  "meta_tags" => {
49
55
  "name" => {
@@ -0,0 +1,23 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx/0.7.67
3
+ Date: Fri, 18 Nov 2011 21:46:46 GMT
4
+ Content-Type: text/html
5
+ Connection: keep-alive
6
+ Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
7
+ Content-Length: 4987
8
+ X-Varnish: 2000423390
9
+ Age: 0
10
+ Via: 1.1 varnish
11
+
12
+ <html>
13
+ <head>
14
+ </head>
15
+ <body>
16
+ <h1>H1</h1>
17
+ <h2>H2</h2>
18
+ <h3>H3</h3>
19
+ <h4>H4</h4>
20
+ <h5>H5</h5>
21
+ <h6>H6</h6>
22
+ </body>
23
+ </html>
@@ -6,6 +6,48 @@ describe MetaInspector do
6
6
  expect(page.title).to eq('An example page')
7
7
  end
8
8
 
9
+ describe "#h1" do
10
+ it "should find h1 content" do
11
+ page = MetaInspector.new('http://example.com/headings')
12
+ expect(page.h1.first).to eq('H1')
13
+ end
14
+ end
15
+
16
+ describe "#h2" do
17
+ it "should find h2 content" do
18
+ page = MetaInspector.new('http://example.com/headings')
19
+ expect(page.h2.first).to eq('H2')
20
+ end
21
+ end
22
+
23
+ describe "#h3" do
24
+ it "should find h3 content" do
25
+ page = MetaInspector.new('http://example.com/headings')
26
+ expect(page.h3.first).to eq('H3')
27
+ end
28
+ end
29
+
30
+ describe "#h4" do
31
+ it "should find h4 content" do
32
+ page = MetaInspector.new('http://example.com/headings')
33
+ expect(page.h4.first).to eq('H4')
34
+ end
35
+ end
36
+
37
+ describe "#h5" do
38
+ it "should find h5 content" do
39
+ page = MetaInspector.new('http://example.com/headings')
40
+ expect(page.h5.first).to eq('H5')
41
+ end
42
+ end
43
+
44
+ describe "#h6" do
45
+ it "should find h6 content" do
46
+ page = MetaInspector.new('http://example.com/headings')
47
+ expect(page.h6.first).to eq('H6')
48
+ end
49
+ end
50
+
9
51
  describe '#best_title' do
10
52
  it "should find 'head title' when that's the only thing" do
11
53
  page = MetaInspector.new('http://example.com/title_in_head')
@@ -29,6 +29,7 @@ RSpec.configure do |config|
29
29
  stub_request(:get, "http://example.com/desc_in_twitter").to_return(fixture_file("desc_in_twitter.response"))
30
30
  stub_request(:get, "http://example.com/empty").to_return(fixture_file("empty_page.response"))
31
31
  stub_request(:get, "http://example.com/head_links").to_return(fixture_file("head_links.response"))
32
+ stub_request(:get, "http://example.com/headings").to_return(fixture_file("headings.response"))
32
33
  stub_request(:get, "http://example.com/invalid_byte_seq").to_return(fixture_file("invalid_byte_seq.response"))
33
34
  stub_request(:get, "http://example.com/invalid_utf8_byte_seq").to_return(fixture_file("invalid_utf8_byte_seq.response"))
34
35
  stub_request(:get, "http://example.com/invalid_href").to_return(fixture_file("invalid_href.response"))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.7.0
4
+ version: 5.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-01 00:00:00.000000000 Z
11
+ date: 2019-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -30,28 +30,28 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.15.3
33
+ version: 0.17.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.15.3
40
+ version: 0.17.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: faraday_middleware
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.12.2
47
+ version: 0.13.1
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.12.2
54
+ version: 0.13.1
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: faraday-cookie_jar
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -100,28 +100,28 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 2.5.2
103
+ version: 2.7.0
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 2.5.2
110
+ version: 2.7.0
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: fastimage
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: 2.1.4
117
+ version: 2.1.7
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: 2.1.4
124
+ version: 2.1.7
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: nesty
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +142,14 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: 3.8.0
145
+ version: 3.9.0
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: 3.8.0
152
+ version: 3.9.0
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: webmock
155
155
  requirement: !ruby/object:Gem::Requirement
@@ -184,14 +184,14 @@ dependencies:
184
184
  requirements:
185
185
  - - "~>"
186
186
  - !ruby/object:Gem::Version
187
- version: 12.3.1
187
+ version: 13.0.0
188
188
  type: :development
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
- version: 12.3.1
194
+ version: 13.0.0
195
195
  - !ruby/object:Gem::Dependency
196
196
  name: pry
197
197
  requirement: !ruby/object:Gem::Requirement
@@ -212,14 +212,14 @@ dependencies:
212
212
  requirements:
213
213
  - - "~>"
214
214
  - !ruby/object:Gem::Version
215
- version: 0.60.0
215
+ version: 0.75.1
216
216
  type: :development
217
217
  prerelease: false
218
218
  version_requirements: !ruby/object:Gem::Requirement
219
219
  requirements:
220
220
  - - "~>"
221
221
  - !ruby/object:Gem::Version
222
- version: 0.60.0
222
+ version: 0.75.1
223
223
  description: MetaInspector lets you scrape a web page and get its links, images, texts,
224
224
  meta tags...
225
225
  email:
@@ -280,6 +280,7 @@ files:
280
280
  - spec/fixtures/facebook.com.response
281
281
  - spec/fixtures/guardian.co.uk.response
282
282
  - spec/fixtures/head_links.response
283
+ - spec/fixtures/headings.response
283
284
  - spec/fixtures/https.facebook.com.response
284
285
  - spec/fixtures/international.response
285
286
  - spec/fixtures/invalid_byte_seq.response