RubyGems - spidr - Versions diffs - 0.1.3 → 0.1.4 - Mend

spidr 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/History.txt CHANGED Viewed

@@ -1,3 +1,7 @@
+=== 0.1.4 / 2009-01-15
+* Use Nokogiri for HTML and XML parsing.
 === 0.1.3 / 2009-01-10
 * Added the :host options to Spidr::Agent#initialize.

data/README.txt CHANGED Viewed

@@ -25,7 +25,7 @@ and easy to use.
 == REQUIREMENTS:
-* Hpricot
+* nokogiri
 == INSTALL:

data/Rakefile CHANGED Viewed

@@ -10,7 +10,7 @@ Hoe.new('spidr', Spidr::VERSION) do |p|
   p.rubyforge_name = 'spidr'
   p.developer('Postmodern', 'postmodern.mod3@gmail.com')
   p.remote_rdoc_dir = 'docs'
-  p.extra_deps = ['hpricot']
+  p.extra_deps = ['nokogiri']
 end
 # vim: syntax=Ruby

data/lib/spidr/agent.rb CHANGED Viewed

@@ -3,7 +3,6 @@ require 'spidr/page'
 require 'spidr/spidr'
 require 'net/http'
-require 'hpricot'
 module Spidr
   class Agent

data/lib/spidr/page.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 require 'uri'
-require 'hpricot'
+require 'nokogiri'
 module Spidr
   class Page
@@ -185,12 +185,17 @@ module Spidr
     end
     #
-    # Returns an Hpricot::Doc if the page represents a HTML document,
-    # returns +nil+ otherwise.
+    # If the page has a <tt>text/html</tt> content-type, a
+    # Nokogiri::HTML::Document object will be returned. If the page has a
+    # <tt>text/xml</tt> content-type, a Nokogiri::XML::Document object
+    # will be returned. Other content-types will cause +nil+ to be
+    # returned.
     #
     def doc
       if html?
-        return @doc ||= Hpricot(body)
+        return @doc ||= Nokogiri::HTML(body)
+      elsif xml?
+        return @doc ||= Nokogiri::XML(body)
       end
     end
@@ -201,8 +206,8 @@ module Spidr
       urls = []
       if html?
-        doc.search('a[@href]') do |a|
-          url = a.attributes['href'].strip
+        self.doc.search('a[@href]').each do |a|
+          url = a.get_attribute('href')
           urls << url unless url.empty?
         end

data/lib/spidr/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Spidr
-  VERSION = '0.1.3'
+  VERSION = '0.1.4'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: spidr
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Postmodern
@@ -9,11 +9,11 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-01-10 00:00:00 -08:00
+date: 2009-01-15 00:00:00 -08:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: hpricot
+  name: nokogiri
   type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement