spk-anemone 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,13 @@
1
+ == 0.3.1 / 2010-01-22
2
+
3
+ * Minor enhancements
4
+
5
+ * Added an attr_accessor to Page for the HTTP response body
6
+
7
+ * Bug fixes
8
+
9
+ * Fixed incorrect method calls in CLI scripts
10
+
1
11
  == 0.3.0 / 2009-12-15
2
12
 
3
13
  * Major enchancements
@@ -7,7 +7,7 @@ rescue
7
7
  puts <<-INFO
8
8
  Usage:
9
9
  anemone count <url>
10
-
10
+
11
11
  Synopsis:
12
12
  Crawls a site starting at the given URL and outputs the total number
13
13
  of unique pages on the site.
@@ -17,6 +17,6 @@ end
17
17
 
18
18
  Anemone.crawl(url) do |anemone|
19
19
  anemone.after_crawl do |pages|
20
- puts pages.uniq.size
20
+ puts pages.uniq!.size
21
21
  end
22
22
  end
@@ -17,16 +17,16 @@ end
17
17
 
18
18
  Anemone.crawl(root) do |anemone|
19
19
  anemone.skip_links_like %r{^/c/$}, %r{^/stores/$}
20
-
20
+
21
21
  anemone.after_crawl do |pages|
22
- pages = pages.shortest_paths!(root).uniq
23
-
22
+ pages = pages.shortest_paths!(root).uniq!
23
+
24
24
  depths = pages.values.inject({}) do |depths, page|
25
25
  depths[page.depth] ||= 0
26
26
  depths[page.depth] += 1
27
27
  depths
28
28
  end
29
-
29
+
30
30
  depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" }
31
31
  end
32
32
  end
data/lib/anemone/core.rb CHANGED
@@ -7,7 +7,7 @@ require 'anemone/storage'
7
7
 
8
8
  module Anemone
9
9
 
10
- VERSION = '0.3.0';
10
+ VERSION = '0.3.1';
11
11
 
12
12
  #
13
13
  # Convenience method to start a crawl
@@ -193,7 +193,7 @@ module Anemone
193
193
  @robots = Robots.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
194
194
  end
195
195
 
196
- # Generate Authorization string only if not already set
196
+ # Generate Authorization string and set authorization opts
197
197
  def authorization(auth=nil)
198
198
  require 'base64'
199
199
  if auth.is_a?(String) && auth.include?(':')
data/lib/anemone/page.rb CHANGED
@@ -6,6 +6,8 @@ module Anemone
6
6
 
7
7
  # The URL of the page
8
8
  attr_reader :url
9
+ # The raw HTTP response body of the page
10
+ attr_reader :body
9
11
  # Headers of the HTTP response
10
12
  attr_reader :headers
11
13
  # URL of the page this one redirected to, if any
@@ -50,7 +52,9 @@ module Anemone
50
52
  @fetched = !params[:code].nil?
51
53
  end
52
54
 
55
+ #
53
56
  # Array of distinct A tag HREFs from the page
57
+ #
54
58
  def links
55
59
  return @links unless @links.nil?
56
60
  @links = []
@@ -66,18 +70,26 @@ module Anemone
66
70
  @links
67
71
  end
68
72
 
73
+ #
69
74
  # Nokogiri document for the HTML body
75
+ #
70
76
  def doc
71
77
  return @doc if @doc
72
78
  @doc = Nokogiri::HTML(@body) if @body && html? rescue nil
73
79
  end
74
80
 
81
+ #
75
82
  # Delete the Nokogiri document and response body to conserve memory
83
+ #
76
84
  def discard_doc!
77
85
  links # force parsing of page links before we trash the document
78
86
  @doc = @body = nil
79
87
  end
80
88
 
89
+ #
90
+ # Was the page successfully fetched?
91
+ # +true+ if the page was fetched with no error, +false+ otherwise.
92
+ #
81
93
  def fetched?
82
94
  @fetched
83
95
  end
data/spec/page_spec.rb CHANGED
@@ -17,6 +17,12 @@ module Anemone
17
17
  fail_page.fetched?.should == false
18
18
  end
19
19
 
20
+ it "should store and expose the response body of the HTTP request" do
21
+ body = 'test'
22
+ page = @http.fetch_page(FakePage.new('body_test', {:body => body}).url)
23
+ page.body.should == body
24
+ end
25
+
20
26
  it "should record any error that occurs during fetch_page" do
21
27
  @page.should respond_to(:error)
22
28
  @page.error.should be_nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spk-anemone
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Kite
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-03 00:00:00 +01:00
12
+ date: 2010-01-29 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency