metainspector 2.2.1 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/meta_inspector/document.rb +3 -2
- data/lib/meta_inspector/parser.rb +5 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/document_spec.rb +1 -0
- data/spec/fixtures/pagerankalert.com.response +2 -1
- data/spec/parser_spec.rb +14 -1
- metadata +17 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 069e40d49a431b3208e162a5d9e5912fb71f90e7
|
4
|
+
data.tar.gz: ebf89aa3a5913981d767873f968832d3e276711f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73d3b7ab3ea75a325b84d2ea35f2048d035126dac2e8c97097edd673874f590c133a3c168ce07fc5a991ba0712750804c01b10a20a457bb814ac56c048bab21e
|
7
|
+
data.tar.gz: d20c54ffa3e1d365a74eab9feca89e1397e5fab7cb2fcefba0145d4aec7dab85ae60527885f839a343f155970f6cd65be517058604f8b5f9a5be857091539954
|
data/README.md
CHANGED
@@ -54,6 +54,7 @@ Then you can see the scraped data like this:
|
|
54
54
|
page.feed # Get rss or atom links in meta data fields as array
|
55
55
|
page.charset # UTF-8
|
56
56
|
page.content_type # content-type returned by the server when the url was requested
|
57
|
+
page.favicon # absolute URL to the favicon
|
57
58
|
|
58
59
|
## Meta tags
|
59
60
|
|
@@ -36,7 +36,7 @@ module MetaInspector
|
|
36
36
|
def_delegators :@url, :url, :scheme, :host, :root_url
|
37
37
|
def_delegators :@request, :content_type
|
38
38
|
def_delegators :@parser, :parsed, :respond_to?, :title, :description, :links, :internal_links, :external_links,
|
39
|
-
:images, :image, :feed, :charset, :meta_tags, :meta_tag, :meta
|
39
|
+
:images, :image, :feed, :charset, :meta_tags, :meta_tag, :meta, :favicon
|
40
40
|
|
41
41
|
# Returns all document data as a nested Hash
|
42
42
|
def to_hash
|
@@ -50,7 +50,8 @@ module MetaInspector
|
|
50
50
|
'charset' => charset,
|
51
51
|
'feed' => feed,
|
52
52
|
'content_type' => content_type,
|
53
|
-
'meta_tags' => meta_tags
|
53
|
+
'meta_tags' => meta_tags,
|
54
|
+
'favicon' => favicon
|
54
55
|
}
|
55
56
|
end
|
56
57
|
|
@@ -47,6 +47,11 @@ module MetaInspector
|
|
47
47
|
def title
|
48
48
|
@title ||= parsed.css('title').inner_text rescue nil
|
49
49
|
end
|
50
|
+
|
51
|
+
# Return favicon url if exist
|
52
|
+
def favicon
|
53
|
+
@favicon ||= URL.absolutify(parsed.xpath('//link[@rel="icon"]')[0].attributes['href'].value, base_url) rescue nil
|
54
|
+
end
|
50
55
|
|
51
56
|
# A description getter that first checks for a meta description and if not present will
|
52
57
|
# guess by looking at the first paragraph with more than 120 characters
|
data/spec/document_spec.rb
CHANGED
@@ -26,6 +26,7 @@ describe MetaInspector::Document do
|
|
26
26
|
@m.to_hash.should == {
|
27
27
|
"url" =>"http://pagerankalert.com/",
|
28
28
|
"title" =>"PageRankAlert.com :: Track your PageRank changes & receive alerts",
|
29
|
+
"favicon" =>"http://pagerankalert.com/src/favicon.ico",
|
29
30
|
"links" => ["http://pagerankalert.com/",
|
30
31
|
"http://pagerankalert.com/es?language=es",
|
31
32
|
"http://pagerankalert.com/users/sign_up",
|
@@ -19,6 +19,7 @@ Via: 1.1 varnish
|
|
19
19
|
<meta charset=utf-8>
|
20
20
|
<link rel="alternate" type="application/rss+xml" title="PageRankAlert.com blog" href="http://feeds.feedburner.com/PageRankAlert" />
|
21
21
|
<title>PageRankAlert.com :: Track your PageRank changes & receive alerts</title>
|
22
|
+
<link rel="icon" href="/src/favicon.ico">
|
22
23
|
<meta name="description" content="Track your PageRank(TM) changes and receive alerts by email" />
|
23
24
|
<meta name="keywords" content="pagerank, seo, optimization, google" />
|
24
25
|
<meta name="robots" content="all,follow" />
|
@@ -183,4 +184,4 @@ window.onload = (typeof window.onload != 'function') ? _loadUserVoice : function
|
|
183
184
|
|
184
185
|
</script>
|
185
186
|
</body>
|
186
|
-
</html>
|
187
|
+
</html>
|
data/spec/parser_spec.rb
CHANGED
@@ -55,7 +55,7 @@ describe MetaInspector::Parser do
|
|
55
55
|
it "should return the document as a string" do
|
56
56
|
@m.to_s.class.should == String
|
57
57
|
end
|
58
|
-
|
58
|
+
|
59
59
|
describe "Feed" do
|
60
60
|
it "should get rss feed" do
|
61
61
|
@m = MetaInspector::Parser.new(doc 'http://www.iteh.at')
|
@@ -72,6 +72,7 @@ describe MetaInspector::Parser do
|
|
72
72
|
@m.feed.should == nil
|
73
73
|
end
|
74
74
|
end
|
75
|
+
|
75
76
|
end
|
76
77
|
|
77
78
|
describe '#description' do
|
@@ -86,6 +87,18 @@ describe MetaInspector::Parser do
|
|
86
87
|
@m.description.should == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday, an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
|
87
88
|
end
|
88
89
|
end
|
90
|
+
|
91
|
+
describe '#favicon' do
|
92
|
+
it "should get favicon link" do
|
93
|
+
@m = MetaInspector::Parser.new(doc 'http://pagerankalert.com/')
|
94
|
+
@m.favicon.should == 'http://pagerankalert.com/src/favicon.ico'
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should get favicon link of nil" do
|
98
|
+
@m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
99
|
+
@m.favicon.should == nil
|
100
|
+
end
|
101
|
+
end
|
89
102
|
|
90
103
|
describe 'Links' do
|
91
104
|
before(:each) do
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.6'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: open_uri_redirections
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.1.4
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.1.4
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - ~>
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: 2.3.5
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - ~>
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 2.3.5
|
55
55
|
- !ruby/object:Gem::Dependency
|
@@ -84,28 +84,28 @@ dependencies:
|
|
84
84
|
name: awesome_print
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - ~>
|
87
|
+
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: 1.2.0
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - ~>
|
94
|
+
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 1.2.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: rake
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - ~>
|
101
|
+
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: 10.1.0
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - ~>
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 10.1.0
|
111
111
|
description: MetaInspector lets you scrape a web page and get its title, charset,
|
@@ -116,9 +116,9 @@ executables: []
|
|
116
116
|
extensions: []
|
117
117
|
extra_rdoc_files: []
|
118
118
|
files:
|
119
|
-
- .gitignore
|
120
|
-
- .rspec.example
|
121
|
-
- .travis.yml
|
119
|
+
- ".gitignore"
|
120
|
+
- ".rspec.example"
|
121
|
+
- ".travis.yml"
|
122
122
|
- Gemfile
|
123
123
|
- MIT-LICENSE
|
124
124
|
- README.md
|
@@ -181,12 +181,12 @@ require_paths:
|
|
181
181
|
- lib
|
182
182
|
required_ruby_version: !ruby/object:Gem::Requirement
|
183
183
|
requirements:
|
184
|
-
- -
|
184
|
+
- - ">="
|
185
185
|
- !ruby/object:Gem::Version
|
186
186
|
version: '0'
|
187
187
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
188
188
|
requirements:
|
189
|
-
- -
|
189
|
+
- - ">="
|
190
190
|
- !ruby/object:Gem::Version
|
191
191
|
version: '0'
|
192
192
|
requirements: []
|