metainspector 1.6.0 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -32,6 +32,8 @@ Then you can see the scraped data like this:
32
32
  page.meta_description # meta description, as string
33
33
  page.meta_keywords # meta keywords, as string
34
34
  page.image # Most relevant image, if defined with og:image
35
+ page.images # array of strings, with every img found on the page
36
+ page.absolute_images # array of all the images converted to absolute urls
35
37
  page.feed # Get rss or atom links in meta data fields as array
36
38
  page.meta_og_title # opengraph title
37
39
  page.meta_og_image # opengraph image
@@ -101,7 +103,6 @@ You're welcome to fork this project and send pull requests. I want to thank spec
101
103
 
102
104
  * Get page.base_dir from the URL
103
105
  * Distinguish between external and internal links, returning page.links for all of them as found, page.external_links and page.internal_links converted to absolute URLs
104
- * Return array of images in page as absolute URLs
105
106
  * Be able to set a timeout in seconds
106
107
  * If keywords seem to be separated by blank spaces, replace them with commas
107
108
  * Mocks
@@ -30,11 +30,19 @@ module MetaInspector
30
30
  .map {|link| link.attributes["href"] \
31
31
  .to_s.strip}.uniq) rescue nil
32
32
  end
33
+
34
+ def images
35
+ @data.images ||= parsed_document.search('//img').map{ |i| i.attributes['src'].value }.uniq
36
+ end
33
37
 
34
38
  # Returns the links converted to absolute urls
35
39
  def absolute_links
36
40
  @data.absolute_links ||= links.map { |l| absolutify_url(l) }
37
41
  end
42
+
43
+ def absolute_images
44
+ @data.absolute_images ||= images.map{ |i| absolutify_url(i) }
45
+ end
38
46
 
39
47
  # Returns the parsed document meta rss links
40
48
  def feed
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.6.0"
4
+ VERSION = "1.7.0"
5
5
  end
@@ -45,6 +45,11 @@ describe MetaInspector do
45
45
  @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
46
46
  @m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
47
47
  end
48
+
49
+ it "should find all page images" do
50
+ @m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
51
+ @m.images == ["/images/pagerank_alert.png?1309512337"]
52
+ end
48
53
 
49
54
  it "should have a Nokogiri::HTML::Document as parsed_document" do
50
55
  @m.parsed_document.class.should == Nokogiri::HTML::Document
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 6
7
+ - 7
8
8
  - 0
9
- version: 1.6.0
9
+ version: 1.7.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jaime Iniesta
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-06-03 00:00:00 +02:00
17
+ date: 2011-07-05 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency