RubyGems - anemone - Versions diffs - 0.3.0 → 0.3.1 - Mend

anemone 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/lib/anemone/cli/count.rb CHANGED

@@ -7,7 +7,7 @@ rescue
   puts <<-INFO
 Usage:
   anemone count <url>
 Synopsis:
   Crawls a site starting at the given URL and outputs the total number
   of unique pages on the site.
@@ -17,6 +17,6 @@ end
 Anemone.crawl(url) do |anemone|
   anemone.after_crawl do |pages|
-    puts pages.uniq.size
+    puts pages.uniq!.size
   end
 end

data/lib/anemone/cli/pagedepth.rb CHANGED

@@ -17,16 +17,16 @@ end
 Anemone.crawl(root) do |anemone|
   anemone.skip_links_like %r{^/c/$}, %r{^/stores/$}
   anemone.after_crawl do |pages|
-    pages = pages.shortest_paths!(root).uniq
+    pages = pages.shortest_paths!(root).uniq!
     depths = pages.values.inject({}) do |depths, page|
       depths[page.depth] ||= 0
       depths[page.depth] += 1
       depths
     end
     depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" }
   end
 end

data/lib/anemone/core.rb CHANGED

@@ -7,7 +7,7 @@ require 'anemone/storage'
 module Anemone
-  VERSION = '0.3.0';
+  VERSION = '0.3.1';
   #
   # Convenience method to start a crawl

data/lib/anemone/page.rb CHANGED

@@ -6,6 +6,8 @@ module Anemone
     # The URL of the page
     attr_reader :url
+    # The raw HTTP response body of the page
+    attr_reader :body
     # Headers of the HTTP response
     attr_reader :headers
     # URL of the page this one redirected to, if any
@@ -48,7 +50,9 @@ module Anemone
       @fetched = !params[:code].nil?
     end
+    #
     # Array of distinct A tag HREFs from the page
+    #
     def links
       return @links unless @links.nil?
       @links = []
@@ -64,18 +68,26 @@ module Anemone
       @links
     end
+    #
     # Nokogiri document for the HTML body
+    #
     def doc
       return @doc if @doc
       @doc = Nokogiri::HTML(@body) if @body && html? rescue nil
     end
+    #
     # Delete the Nokogiri document and response body to conserve memory
+    #
     def discard_doc!
       links # force parsing of page links before we trash the document
       @doc = @body = nil
     end
+    #
+    # Was the page successfully fetched?
+    # +true+ if the page was fetched with no error, +false+ otherwise.
+    #
     def fetched?
       @fetched
     end

data/spec/page_spec.rb CHANGED

@@ -17,6 +17,12 @@ module Anemone
       fail_page.fetched?.should == false
     end
+    it "should store and expose the response body of the HTTP request" do
+      body = 'test'
+      page = @http.fetch_page(FakePage.new('body_test', {:body => body}).url)
+      page.body.should == body
+    end
     it "should record any error that occurs during fetch_page" do
       @page.should respond_to(:error)
       @page.error.should be_nil

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: anemone
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.1
 platform: ruby
 authors:
 - Chris Kite
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-12-16 00:00:00 -06:00
+date: 2010-01-22 00:00:00 -06:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency