metainspector 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.rspec.example +1 -0
- data/README.rdoc +5 -1
- data/Rakefile +3 -0
- data/lib/meta_inspector.rb +1 -1
- data/lib/meta_inspector/scraper.rb +15 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -1
- data/spec/metainspector_spec.rb +11 -1
- metadata +7 -6
data/.gitignore
CHANGED
data/.rspec.example
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format d
|
data/README.rdoc
CHANGED
@@ -31,6 +31,7 @@ Then you can see the scraped data like this:
|
|
31
31
|
page.meta_description # meta description, as string
|
32
32
|
page.meta_keywords # meta keywords, as string
|
33
33
|
page.image # Most relevant image, if defined with og:image
|
34
|
+
page.rss # Get rss or atom links in meta data fields as array
|
34
35
|
|
35
36
|
MetaInspector uses dynamic methods for meta_tag discovery, so all these will work, and will be converted to a search of a meta tag by the corresponding name, and return its content attribute
|
36
37
|
|
@@ -84,7 +85,10 @@ You can find some sample scripts on the samples folder, including a basic scrapi
|
|
84
85
|
|
85
86
|
= ZOMG Fork! Thank you!
|
86
87
|
|
87
|
-
You're welcome to fork this project and send pull requests. I want to thank
|
88
|
+
You're welcome to fork this project and send pull requests. I want to thank specially:
|
89
|
+
|
90
|
+
* Ryan Romanchuk https://github.com/rromanchuk
|
91
|
+
* Edmund Haselwanter https://github.com/ehaselwanter
|
88
92
|
|
89
93
|
= To Do
|
90
94
|
|
data/Rakefile
CHANGED
data/lib/meta_inspector.rb
CHANGED
@@ -28,6 +28,15 @@ module MetaInspector
|
|
28
28
|
@links ||= parsed_document.search("//a").map {|link| link.attributes["href"].to_s.strip} rescue nil
|
29
29
|
end
|
30
30
|
|
31
|
+
# Returns the parsed document meta rss links
|
32
|
+
def feed
|
33
|
+
@feed ||= parsed_document.xpath("//link").select{ |link|
|
34
|
+
link.attributes["type"] && link.attributes["type"].value =~ /(atom|rss)/
|
35
|
+
}.map { |link|
|
36
|
+
absolutify_url(link.attributes["href"].value)
|
37
|
+
}.first rescue nil
|
38
|
+
end
|
39
|
+
|
31
40
|
# Returns the parsed image from Facebook's open graph property tags
|
32
41
|
# Most all major websites now define this property and is usually very relevant
|
33
42
|
# See doc at http://developers.facebook.com/docs/opengraph/
|
@@ -81,5 +90,11 @@ module MetaInspector
|
|
81
90
|
super
|
82
91
|
end
|
83
92
|
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def absolutify_url(url)
|
97
|
+
url =~ /^http.*/ ? url : File.join(@url,url)
|
98
|
+
end
|
84
99
|
end
|
85
100
|
end
|
data/meta_inspector.gemspec
CHANGED
data/spec/metainspector_spec.rb
CHANGED
@@ -37,7 +37,7 @@ describe MetaInspector do
|
|
37
37
|
end
|
38
38
|
|
39
39
|
it "should get the links" do
|
40
|
-
@m.links.size.should ==
|
40
|
+
@m.links.size.should == 9
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should have a Nokogiri::HTML::Document as parsed_document" do
|
@@ -47,6 +47,16 @@ describe MetaInspector do
|
|
47
47
|
it "should have a String as document" do
|
48
48
|
@m.document.class.should == String
|
49
49
|
end
|
50
|
+
|
51
|
+
it "should get rss feed" do
|
52
|
+
@m = MetaInspector.new('http://www.iteh.at')
|
53
|
+
@m.feed.should == 'http://www.iteh.at/de/rss/'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should get atom feed" do
|
57
|
+
@m = MetaInspector.new('http://www.tea-tron.com/jbravo/blog/')
|
58
|
+
@m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
|
59
|
+
end
|
50
60
|
end
|
51
61
|
|
52
62
|
context 'Getting meta tags by ghost methods' do
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 4
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.4.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jaime Iniesta
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-05-
|
17
|
+
date: 2011-05-30 00:00:00 +02:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -53,13 +53,13 @@ dependencies:
|
|
53
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
54
|
none: false
|
55
55
|
requirements:
|
56
|
-
- -
|
56
|
+
- - ~>
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
segments:
|
59
59
|
- 2
|
60
|
-
-
|
60
|
+
- 6
|
61
61
|
- 0
|
62
|
-
version: 2.
|
62
|
+
version: 2.6.0
|
63
63
|
type: :development
|
64
64
|
version_requirements: *id003
|
65
65
|
description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
|
@@ -73,6 +73,7 @@ extra_rdoc_files: []
|
|
73
73
|
|
74
74
|
files:
|
75
75
|
- .gitignore
|
76
|
+
- .rspec.example
|
76
77
|
- Gemfile
|
77
78
|
- MIT-LICENSE
|
78
79
|
- README.rdoc
|