spk-anemone 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,13 @@
1
+ == 0.3.1 / 2010-01-22
2
+
3
+ * Minor enhancements
4
+
5
+ * Added an attr_accessor to Page for the HTTP response body
6
+
7
+ * Bug fixes
8
+
9
+ * Fixed incorrect method calls in CLI scripts
10
+
1
11
  == 0.3.0 / 2009-12-15
2
12
 
3
13
  * Major enchancements
@@ -7,7 +7,7 @@ rescue
7
7
  puts <<-INFO
8
8
  Usage:
9
9
  anemone count <url>
10
-
10
+
11
11
  Synopsis:
12
12
  Crawls a site starting at the given URL and outputs the total number
13
13
  of unique pages on the site.
@@ -17,6 +17,6 @@ end
17
17
 
18
18
  Anemone.crawl(url) do |anemone|
19
19
  anemone.after_crawl do |pages|
20
- puts pages.uniq.size
20
+ puts pages.uniq!.size
21
21
  end
22
22
  end
@@ -17,16 +17,16 @@ end
17
17
 
18
18
  Anemone.crawl(root) do |anemone|
19
19
  anemone.skip_links_like %r{^/c/$}, %r{^/stores/$}
20
-
20
+
21
21
  anemone.after_crawl do |pages|
22
- pages = pages.shortest_paths!(root).uniq
23
-
22
+ pages = pages.shortest_paths!(root).uniq!
23
+
24
24
  depths = pages.values.inject({}) do |depths, page|
25
25
  depths[page.depth] ||= 0
26
26
  depths[page.depth] += 1
27
27
  depths
28
28
  end
29
-
29
+
30
30
  depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" }
31
31
  end
32
32
  end
data/lib/anemone/core.rb CHANGED
@@ -7,7 +7,7 @@ require 'anemone/storage'
7
7
 
8
8
  module Anemone
9
9
 
10
- VERSION = '0.3.0';
10
+ VERSION = '0.3.1';
11
11
 
12
12
  #
13
13
  # Convenience method to start a crawl
@@ -193,7 +193,7 @@ module Anemone
193
193
  @robots = Robots.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
194
194
  end
195
195
 
196
- # Generate Authorization string only if not already set
196
+ # Generate Authorization string and set authorization opts
197
197
  def authorization(auth=nil)
198
198
  require 'base64'
199
199
  if auth.is_a?(String) && auth.include?(':')
data/lib/anemone/page.rb CHANGED
@@ -6,6 +6,8 @@ module Anemone
6
6
 
7
7
  # The URL of the page
8
8
  attr_reader :url
9
+ # The raw HTTP response body of the page
10
+ attr_reader :body
9
11
  # Headers of the HTTP response
10
12
  attr_reader :headers
11
13
  # URL of the page this one redirected to, if any
@@ -50,7 +52,9 @@ module Anemone
50
52
  @fetched = !params[:code].nil?
51
53
  end
52
54
 
55
+ #
53
56
  # Array of distinct A tag HREFs from the page
57
+ #
54
58
  def links
55
59
  return @links unless @links.nil?
56
60
  @links = []
@@ -66,18 +70,26 @@ module Anemone
66
70
  @links
67
71
  end
68
72
 
73
+ #
69
74
  # Nokogiri document for the HTML body
75
+ #
70
76
  def doc
71
77
  return @doc if @doc
72
78
  @doc = Nokogiri::HTML(@body) if @body && html? rescue nil
73
79
  end
74
80
 
81
+ #
75
82
  # Delete the Nokogiri document and response body to conserve memory
83
+ #
76
84
  def discard_doc!
77
85
  links # force parsing of page links before we trash the document
78
86
  @doc = @body = nil
79
87
  end
80
88
 
89
+ #
90
+ # Was the page successfully fetched?
91
+ # +true+ if the page was fetched with no error, +false+ otherwise.
92
+ #
81
93
  def fetched?
82
94
  @fetched
83
95
  end
data/spec/page_spec.rb CHANGED
@@ -17,6 +17,12 @@ module Anemone
17
17
  fail_page.fetched?.should == false
18
18
  end
19
19
 
20
+ it "should store and expose the response body of the HTTP request" do
21
+ body = 'test'
22
+ page = @http.fetch_page(FakePage.new('body_test', {:body => body}).url)
23
+ page.body.should == body
24
+ end
25
+
20
26
  it "should record any error that occurs during fetch_page" do
21
27
  @page.should respond_to(:error)
22
28
  @page.error.should be_nil
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spk-anemone
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Kite
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-03 00:00:00 +01:00
12
+ date: 2010-01-29 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency