RubyGems - metainspector - Versions diffs - 1.6.0 → 1.7.0 - Mend

metainspector 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/README.rdoc +2 -1
data/lib/meta_inspector/scraper.rb +8 -0
data/lib/meta_inspector/version.rb +1 -1
data/spec/metainspector_spec.rb +5 -0
metadata +3 -3

data/README.rdoc CHANGED Viewed

@@ -32,6 +32,8 @@ Then you can see the scraped data like this:
   page.meta_description   # meta description, as string
   page.meta_keywords      # meta keywords, as string
   page.image              # Most relevant image, if defined with og:image
+  page.images             # array of strings, with every img found on the page
+  page.absolute_images    # array of all the images converted to absolute urls
   page.feed               # Get rss or atom links in meta data fields as array
   page.meta_og_title      # opengraph title
   page.meta_og_image      # opengraph image
@@ -101,7 +103,6 @@ You're welcome to fork this project and send pull requests. I want to thank spec
 * Get page.base_dir from the URL
 * Distinguish between external and internal links, returning page.links for all of them as found, page.external_links and page.internal_links converted to absolute URLs
-* Return array of images in page as absolute URLs
 * Be able to set a timeout in seconds
 * If keywords seem to be separated by blank spaces, replace them with commas
 * Mocks

data/lib/meta_inspector/scraper.rb CHANGED Viewed

@@ -30,11 +30,19 @@ module MetaInspector
                                 .map {|link| link.attributes["href"] \
                                 .to_s.strip}.uniq) rescue nil
     end
+    def images
+      @data.images ||= parsed_document.search('//img').map{ |i| i.attributes['src'].value }.uniq
+    end
     # Returns the links converted to absolute urls
     def absolute_links
       @data.absolute_links ||= links.map { |l| absolutify_url(l) }
     end
+    def absolute_images
+      @data.absolute_images ||= images.map{ |i| absolutify_url(i) }
+    end
     # Returns the parsed document meta rss links
     def feed

data/lib/meta_inspector/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # -*- encoding: utf-8 -*-
 module MetaInspector
-  VERSION = "1.6.0"
+  VERSION = "1.7.0"
 end

data/spec/metainspector_spec.rb CHANGED Viewed

@@ -45,6 +45,11 @@ describe MetaInspector do
       @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
       @m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
     end
+    it "should find all page images" do
+      @m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
+      @m.images == ["/images/pagerank_alert.png?1309512337"]
+    end
     it "should have a Nokogiri::HTML::Document as parsed_document" do
       @m.parsed_document.class.should == Nokogiri::HTML::Document

metadata CHANGED Viewed

@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
   prerelease: false
   segments:
   - 1
-  - 6
+  - 7
   - 0
-  version: 1.6.0
+  version: 1.7.0
 platform: ruby
 authors:
 - Jaime Iniesta
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-06-03 00:00:00 +02:00
+date: 2011-07-05 00:00:00 +02:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency