metainspector 1.6.0 → 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +2 -1
- data/lib/meta_inspector/scraper.rb +8 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/metainspector_spec.rb +5 -0
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -32,6 +32,8 @@ Then you can see the scraped data like this:
|
|
32
32
|
page.meta_description # meta description, as string
|
33
33
|
page.meta_keywords # meta keywords, as string
|
34
34
|
page.image # Most relevant image, if defined with og:image
|
35
|
+
page.images # array of strings, with every img found on the page
|
36
|
+
page.absolute_images # array of all the images converted to absolute urls
|
35
37
|
page.feed # Get rss or atom links in meta data fields as array
|
36
38
|
page.meta_og_title # opengraph title
|
37
39
|
page.meta_og_image # opengraph image
|
@@ -101,7 +103,6 @@ You're welcome to fork this project and send pull requests. I want to thank spec
|
|
101
103
|
|
102
104
|
* Get page.base_dir from the URL
|
103
105
|
* Distinguish between external and internal links, returning page.links for all of them as found, page.external_links and page.internal_links converted to absolute URLs
|
104
|
-
* Return array of images in page as absolute URLs
|
105
106
|
* Be able to set a timeout in seconds
|
106
107
|
* If keywords seem to be separated by blank spaces, replace them with commas
|
107
108
|
* Mocks
|
@@ -30,11 +30,19 @@ module MetaInspector
|
|
30
30
|
.map {|link| link.attributes["href"] \
|
31
31
|
.to_s.strip}.uniq) rescue nil
|
32
32
|
end
|
33
|
+
|
34
|
+
def images
|
35
|
+
@data.images ||= parsed_document.search('//img').map{ |i| i.attributes['src'].value }.uniq
|
36
|
+
end
|
33
37
|
|
34
38
|
# Returns the links converted to absolute urls
|
35
39
|
def absolute_links
|
36
40
|
@data.absolute_links ||= links.map { |l| absolutify_url(l) }
|
37
41
|
end
|
42
|
+
|
43
|
+
def absolute_images
|
44
|
+
@data.absolute_images ||= images.map{ |i| absolutify_url(i) }
|
45
|
+
end
|
38
46
|
|
39
47
|
# Returns the parsed document meta rss links
|
40
48
|
def feed
|
data/spec/metainspector_spec.rb
CHANGED
@@ -45,6 +45,11 @@ describe MetaInspector do
|
|
45
45
|
@m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
46
46
|
@m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
47
47
|
end
|
48
|
+
|
49
|
+
it "should find all page images" do
|
50
|
+
@m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
|
51
|
+
@m.images == ["/images/pagerank_alert.png?1309512337"]
|
52
|
+
end
|
48
53
|
|
49
54
|
it "should have a Nokogiri::HTML::Document as parsed_document" do
|
50
55
|
@m.parsed_document.class.should == Nokogiri::HTML::Document
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 7
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.7.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jaime Iniesta
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-07-05 00:00:00 +02:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|