RubyGems - metainspector - Versions diffs - 1.13.1 → 1.14.0 - Mend

metainspector 1.13.1 → 1.14.0

Files changed (6) hide show

data/README.md CHANGED Viewed

@@ -28,6 +28,10 @@ If you don't include the scheme on the URL, http:// will be used by default:
     page = MetaInspector.new('markupvalidator.com')
+You can also include the html which will be used as the document to scrape:
+    page = MetaInspector.new("http://markupvalidator.com", :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>")
 ## Accessing scraped data
 Then you can see the scraped data like this:
@@ -70,9 +74,13 @@ You can also access most of the scraped data as a hash:
     page.to_hash  # { "url"   => "http://markupvalidator.com",
                       "title" => "MarkupValidator :: site-wide markup validation tool", ... }
-The full scraped document if accessible from:
+The original document is accessible from:
+    page.document         # A String with the contents of the HTML document
+And the full scraped document is accessible from:
-    page.document  # Nokogiri doc that you can use it to get any element from the page
+    page.parsed_document  # Nokogiri doc that you can use it to get any element from the page
 ## Options
@@ -166,4 +174,4 @@ Thanks to all the contributors:
 [https://github.com/jaimeiniesta/metainspector/graphs/contributors](https://github.com/jaimeiniesta/metainspector/graphs/contributors)
-Copyright (c) 2009-2012 Jaime Iniesta, released under the MIT license
+Copyright (c) 2009-2012 Jaime Iniesta, released under the MIT license

data/lib/meta_inspector/scraper.rb CHANGED Viewed

@@ -16,8 +16,10 @@ module MetaInspector
     # Options:
     # => timeout: defaults to 20 seconds
     # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
-    # => allow_safe_redirections:   if redirects from http to https sites on the same domain should be allowed or not
+    # => allow_safe_redirections: if redirects from http to https sites on the same domain should be allowed or not
     # => allow_unsafe_redirections: if redirects from https to http sites on the same domain should be allowed or not
+    # => document: the html of the url as a string
+    # => verbose: if the errors should be logged to the screen
     def initialize(url, options = {})
       options   = defaults.merge(options)
@@ -32,6 +34,7 @@ module MetaInspector
       @allow_safe_redirections    = options[:allow_safe_redirections]
       @allow_unsafe_redirections  = options[:allow_unsafe_redirections]
       @verbose                    = options[:verbose]
+      @document                   = options[:document]
     end
     # Returns the parsed document title, from the content of the <title> tag.

data/lib/meta_inspector/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # -*- encoding: utf-8 -*-
 module MetaInspector
-  VERSION = "1.13.1"
+  VERSION = "1.14.0"
 end

data/meta_inspector.gemspec CHANGED Viewed

@@ -21,5 +21,5 @@ Gem::Specification.new do |gem|
   gem.add_development_dependency 'rspec', '2.12.0'
   gem.add_development_dependency 'fakeweb', '1.3.0'
   gem.add_development_dependency 'awesome_print', '1.1.0'
-  gem.add_development_dependency 'rake', '10.0.2'
+  gem.add_development_dependency 'rake', '~> 10.0.3'
 end

data/spec/metainspector_spec.rb CHANGED Viewed

@@ -113,6 +113,21 @@ describe MetaInspector do
     end
   end
+  describe 'Doing a basic scrape from passed url html' do
+    before(:each) do
+      @m = MetaInspector.new("http://cnn.com", :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>")
+    end
+    it "should get correct links when the url html is passed as an option" do
+      @m.links.should == ["http://cnn.com/hello"]
+    end
+    it "should get the title" do
+      @m.title.should == "Hello From Passed Html"
+    end
+  end
   describe 'Page with missing meta description' do
     it "should find secondary description" do
       @m = MetaInspector.new('http://theonion-no-description.com')

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: metainspector
 version: !ruby/object:Gem::Version
-  hash: 33
+  hash: 47
   prerelease:
   segments:
   - 1
-  - 13
-  - 1
-  version: 1.13.1
+  - 14
+  - 0
+  version: 1.14.0
 platform: ruby
 authors:
 - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-12-13 00:00:00 Z
+date: 2013-01-14 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -118,14 +118,14 @@ dependencies:
   requirement: &id007 !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - "="
+    - - ~>
       - !ruby/object:Gem::Version
-        hash: 75
+        hash: 73
         segments:
         - 10
         - 0
-        - 2
-        version: 10.0.2
+        - 3
+        version: 10.0.3
   type: :development
   version_requirements: *id007
 description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags