metainspector 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/.rspec.example +1 -0
- data/README.rdoc +5 -1
- data/Rakefile +3 -0
- data/lib/meta_inspector.rb +1 -1
- data/lib/meta_inspector/scraper.rb +15 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -1
- data/spec/metainspector_spec.rb +11 -1
- metadata +7 -6
data/.gitignore
CHANGED
data/.rspec.example
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format d
|
data/README.rdoc
CHANGED
@@ -31,6 +31,7 @@ Then you can see the scraped data like this:
|
|
31
31
|
page.meta_description # meta description, as string
|
32
32
|
page.meta_keywords # meta keywords, as string
|
33
33
|
page.image # Most relevant image, if defined with og:image
|
34
|
+
page.rss # Get rss or atom links in meta data fields as array
|
34
35
|
|
35
36
|
MetaInspector uses dynamic methods for meta_tag discovery, so all these will work, and will be converted to a search of a meta tag by the corresponding name, and return its content attribute
|
36
37
|
|
@@ -84,7 +85,10 @@ You can find some sample scripts on the samples folder, including a basic scrapi
|
|
84
85
|
|
85
86
|
= ZOMG Fork! Thank you!
|
86
87
|
|
87
|
-
You're welcome to fork this project and send pull requests. I want to thank
|
88
|
+
You're welcome to fork this project and send pull requests. I want to thank specially:
|
89
|
+
|
90
|
+
* Ryan Romanchuk https://github.com/rromanchuk
|
91
|
+
* Edmund Haselwanter https://github.com/ehaselwanter
|
88
92
|
|
89
93
|
= To Do
|
90
94
|
|
data/Rakefile
CHANGED
data/lib/meta_inspector.rb
CHANGED
@@ -28,6 +28,15 @@ module MetaInspector
|
|
28
28
|
@links ||= parsed_document.search("//a").map {|link| link.attributes["href"].to_s.strip} rescue nil
|
29
29
|
end
|
30
30
|
|
31
|
+
# Returns the parsed document meta rss links
|
32
|
+
def feed
|
33
|
+
@feed ||= parsed_document.xpath("//link").select{ |link|
|
34
|
+
link.attributes["type"] && link.attributes["type"].value =~ /(atom|rss)/
|
35
|
+
}.map { |link|
|
36
|
+
absolutify_url(link.attributes["href"].value)
|
37
|
+
}.first rescue nil
|
38
|
+
end
|
39
|
+
|
31
40
|
# Returns the parsed image from Facebook's open graph property tags
|
32
41
|
# Most all major websites now define this property and is usually very relevant
|
33
42
|
# See doc at http://developers.facebook.com/docs/opengraph/
|
@@ -81,5 +90,11 @@ module MetaInspector
|
|
81
90
|
super
|
82
91
|
end
|
83
92
|
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def absolutify_url(url)
|
97
|
+
url =~ /^http.*/ ? url : File.join(@url,url)
|
98
|
+
end
|
84
99
|
end
|
85
100
|
end
|
data/meta_inspector.gemspec
CHANGED
data/spec/metainspector_spec.rb
CHANGED
@@ -37,7 +37,7 @@ describe MetaInspector do
|
|
37
37
|
end
|
38
38
|
|
39
39
|
it "should get the links" do
|
40
|
-
@m.links.size.should ==
|
40
|
+
@m.links.size.should == 9
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should have a Nokogiri::HTML::Document as parsed_document" do
|
@@ -47,6 +47,16 @@ describe MetaInspector do
|
|
47
47
|
it "should have a String as document" do
|
48
48
|
@m.document.class.should == String
|
49
49
|
end
|
50
|
+
|
51
|
+
it "should get rss feed" do
|
52
|
+
@m = MetaInspector.new('http://www.iteh.at')
|
53
|
+
@m.feed.should == 'http://www.iteh.at/de/rss/'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should get atom feed" do
|
57
|
+
@m = MetaInspector.new('http://www.tea-tron.com/jbravo/blog/')
|
58
|
+
@m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
|
59
|
+
end
|
50
60
|
end
|
51
61
|
|
52
62
|
context 'Getting meta tags by ghost methods' do
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 4
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.4.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jaime Iniesta
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-05-
|
17
|
+
date: 2011-05-30 00:00:00 +02:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -53,13 +53,13 @@ dependencies:
|
|
53
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
54
|
none: false
|
55
55
|
requirements:
|
56
|
-
- -
|
56
|
+
- - ~>
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
segments:
|
59
59
|
- 2
|
60
|
-
-
|
60
|
+
- 6
|
61
61
|
- 0
|
62
|
-
version: 2.
|
62
|
+
version: 2.6.0
|
63
63
|
type: :development
|
64
64
|
version_requirements: *id003
|
65
65
|
description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
|
@@ -73,6 +73,7 @@ extra_rdoc_files: []
|
|
73
73
|
|
74
74
|
files:
|
75
75
|
- .gitignore
|
76
|
+
- .rspec.example
|
76
77
|
- Gemfile
|
77
78
|
- MIT-LICENSE
|
78
79
|
- README.rdoc
|