metainspector 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -32,6 +32,8 @@ Then you can see the scraped data like this:
32
32
  page.meta_description # meta description, as string
33
33
  page.meta_keywords # meta keywords, as string
34
34
  page.image # Most relevant image, if defined with og:image
35
+ page.images # array of strings, with every img found on the page
36
+ page.absolute_images # array of all the images converted to absolute urls
35
37
  page.feed # Get rss or atom links in meta data fields as array
36
38
  page.meta_og_title # opengraph title
37
39
  page.meta_og_image # opengraph image
@@ -101,7 +103,6 @@ You're welcome to fork this project and send pull requests. I want to thank spec
101
103
 
102
104
  * Get page.base_dir from the URL
103
105
  * Distinguish between external and internal links, returning page.links for all of them as found, page.external_links and page.internal_links converted to absolute URLs
104
- * Return array of images in page as absolute URLs
105
106
  * Be able to set a timeout in seconds
106
107
  * If keywords seem to be separated by blank spaces, replace them with commas
107
108
  * Mocks
@@ -30,11 +30,19 @@ module MetaInspector
30
30
  .map {|link| link.attributes["href"] \
31
31
  .to_s.strip}.uniq) rescue nil
32
32
  end
33
+
34
+ def images
35
+ @data.images ||= parsed_document.search('//img').map{ |i| i.attributes['src'].value }.uniq
36
+ end
33
37
 
34
38
  # Returns the links converted to absolute urls
35
39
  def absolute_links
36
40
  @data.absolute_links ||= links.map { |l| absolutify_url(l) }
37
41
  end
42
+
43
+ def absolute_images
44
+ @data.absolute_images ||= images.map{ |i| absolutify_url(i) }
45
+ end
38
46
 
39
47
  # Returns the parsed document meta rss links
40
48
  def feed
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.6.0"
4
+ VERSION = "1.7.0"
5
5
  end
@@ -45,6 +45,11 @@ describe MetaInspector do
45
45
  @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
46
46
  @m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
47
47
  end
48
+
49
+ it "should find all page images" do
50
+ @m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
51
+ @m.images == ["/images/pagerank_alert.png?1309512337"]
52
+ end
48
53
 
49
54
  it "should have a Nokogiri::HTML::Document as parsed_document" do
50
55
  @m.parsed_document.class.should == Nokogiri::HTML::Document
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 6
7
+ - 7
8
8
  - 0
9
- version: 1.6.0
9
+ version: 1.7.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jaime Iniesta
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-06-03 00:00:00 +02:00
17
+ date: 2011-07-05 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency