ucnv-chainsaw 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -12,7 +12,7 @@ A Ruby library for spidering web resources.
12
12
  == Synopsis
13
13
 
14
14
  Chainsaw.launch('http://example.com/').open { |cs|
15
- cs.doc.css('#navi a')[2]
15
+ cs.set_next cs.doc.css('#navi a')[2]
16
16
  }.open { |cs|
17
17
  form = cs.doc.xpath('//form')[0]
18
18
  input = form.xpath('.//input[@type="text"]')[0]
@@ -32,6 +32,7 @@ A Ruby library for spidering web resources.
32
32
 
33
33
  == Installation
34
34
 
35
+ * gem source -a http://gems.github.com
35
36
  * gem install ucnv-chainsaw
36
37
 
37
38
  == Copyright
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ DESCRIPTION = "A Ruby library for spidering web resources."
17
17
  RUBYFORGE_PROJECT = "chainsaw"
18
18
  HOMEPATH = "http://github.com/ucnv/chainsaw/tree/master"
19
19
  BIN_FILES = %w( )
20
- VERS = "0.0.1"
20
+ VERS = "0.0.2"
21
21
 
22
22
  REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
23
23
  CLEAN.include ['**/.*.sw?', '*.gem', '.config']
@@ -60,7 +60,7 @@ spec = Gem::Specification.new do |s|
60
60
 
61
61
  s.add_dependency('nokogiri', '>=1.2.1')
62
62
  s.add_dependency('httpclient', '>=2.1.4')
63
- s.required_ruby_version = '>= 1.8.6'
63
+ s.required_ruby_version = '>= 1.8.7'
64
64
 
65
65
  s.files = %w(README.rdoc ChangeLog Rakefile) +
66
66
  Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
@@ -1,15 +1,15 @@
1
- require 'rubygems'
1
+ require 'rubygems'
2
2
  require 'chainsaw'
3
3
 
4
4
  username, password, tweet = ARGV
5
5
 
6
- Chainsaw.launch('https://twitter.com/home').> { |a|
6
+ Chainsaw.launch('https://twitter.com/home').process { |a|
7
7
  f = a.doc.css('.signin').first
8
8
  f.xpath('id("username_or_email")').first['value'] = username
9
9
  f.xpath('id("session[password]")').first['value'] = password
10
10
  a.set_next f
11
- }.> { |a|
11
+ }.process { |a|
12
12
  f = a.doc.css('#doingForm').first
13
13
  f.xpath('id("status")').first.content = tweet || "(o'-')-o fwip fwip"
14
14
  a.set_next f
15
- }.>
15
+ }.process
@@ -0,0 +1,16 @@
1
+ require 'rubygems'
2
+ require 'chainsaw'
3
+
4
+ # post to delicious.com using API
5
+ username = ARGV.shift
6
+ password = ARGV.shift
7
+ url = ARGV.shift
8
+ desc = ARGV.shift || ''
9
+ tags = ARGV.shift || ''
10
+
11
+ uri = URI.parse 'https://api.del.icio.us/v1/posts/add'
12
+ uri.query = "url=#{URI.escape url}&description=#{URI.escape desc}&tags=#{URI.escape tags}"
13
+
14
+ Chainsaw(uri).set_auth(username, password).open {
15
+ puts doc.xpath '//result/@code'
16
+ }
@@ -5,7 +5,7 @@ module Chainsaw
5
5
  class Browser
6
6
  include Chainsaw::Util
7
7
 
8
- DEFAULT_USER_AGENT = '' # TODO: set value
8
+ DEFAULT_USER_AGENT = "Chainsaw/#{VERSION}"
9
9
 
10
10
  attr_accessor :user_agent, :ignore_redirect, :hide_referer, :max_history_count, :encoding # configurables
11
11
  attr_accessor :request_headers, :url_base, :results
@@ -138,10 +138,16 @@ module Chainsaw
138
138
 
139
139
  def document
140
140
  return @document unless @document.nil?
141
- return nil if res.content.nil?
142
- return nil unless is_xml_parsable?(res.contenttype)
143
- enc = @encoding || Encoding.guess(res.content)
144
- @document = Nokogiri.parse(res.content, nil, enc)
141
+ begin
142
+ raise 'Require HTML or XML.' unless
143
+ is_xml_parsable?(res.contenttype)
144
+ enc = @encoding || Encoding.guess(res.content)
145
+ @document = Nokogiri.parse(res.content, nil, enc)
146
+ raise "Something wrong to parse this: #{res.content.to_s}" if
147
+ @document.root.nil?
148
+ rescue
149
+ raise Chainsaw::ParseError
150
+ end
145
151
 
146
152
  b = @document.xpath('//base')
147
153
  base = b.empty? ? '' : b[0]['href']
@@ -168,10 +174,17 @@ module Chainsaw
168
174
  private
169
175
 
170
176
  def process_chain(&block)
171
- if block.arity == 0
172
- results.push yield
173
- else
177
+ unless block.arity == -1
174
178
  results.push yield(self)
179
+ else
180
+ instance_exec(eval('self', block)) do |caller_self|
181
+ mm = lambda do |name, *args|
182
+ caller_self.__send__(name, *args)
183
+ end
184
+ self.class.__send__(:define_method, :method_missing, &mm)
185
+ end
186
+ results.push instance_eval &block
187
+ self.class.__send__(:undef_method, :method_missing)
175
188
  end
176
189
  end
177
190
 
@@ -184,6 +197,8 @@ module Chainsaw
184
197
  @request_count += 1
185
198
  r = begin
186
199
  @engine.send(call, uri, query, @request_headers)
200
+ rescue HTTPClient::BadResponseError => e
201
+ e.res
187
202
  rescue
188
203
  raise(
189
204
  Chainsaw::RequestError,
@@ -256,7 +271,5 @@ module Chainsaw
256
271
  @history.slice! @max_history_count, @history.size - @max_history_count
257
272
  end
258
273
 
259
-
260
-
261
274
  end
262
275
  end
@@ -83,5 +83,9 @@ module Chainsaw
83
83
  include Chainsaw::ErrorWrapper
84
84
  end
85
85
 
86
+ class ParseError < StandardError
87
+ include Chainsaw::ErrorWrapper
88
+ end
89
+
86
90
  end
87
91
 
@@ -22,6 +22,5 @@ class HTTPClient
22
22
  end
23
23
  end
24
24
 
25
- HTTP::Status::SUCCESSFUL_STATUS.push 404
26
25
 
27
26
 
data/lib/chainsaw.rb CHANGED
@@ -17,21 +17,23 @@ Nokogiri::XML::Element.module_eval do
17
17
  end
18
18
 
19
19
  module Chainsaw
20
- VERSION = '0.0.1'
20
+ VERSION = '0.0.2'
21
21
 
22
22
  #
23
23
  # Return a instance of the Chainsaw::Browser class.
24
24
  def self.launch(*args)
25
- args.pop! if args.last.is_a? Proc
26
25
  cs = Chainsaw::Browser.new *args
27
- yield cs if block_given?
26
+ if block_given?
27
+ block = Proc.new
28
+ cs.instance_eval { process_chain &block }
29
+ end
28
30
  cs
29
31
  end
30
32
  end
31
33
 
32
34
  module Kernel
33
35
  #
34
- # alias for Chainsaw.launch
36
+ # alias for Chainsaw#launch
35
37
  def Chainsaw(*args)
36
38
  Chainsaw.launch *args
37
39
  end
data/test/htdocs/06.html CHANGED
@@ -1,6 +1,6 @@
1
1
  <html>
2
2
  <head>
3
- <title>test sjis</title>
3
+ <title>test euc</title>
4
4
  </head>
5
5
  <body>
6
6
  <div id="links">
data/test/htdocs/cgi.rb CHANGED
@@ -32,6 +32,16 @@ if cgi.has_key? 'auth'
32
32
  end
33
33
  end
34
34
 
35
+ if cgi.keys.find {|k| k =~ /^(\d{3})$/}
36
+ status = $1
37
+ cgi.print [
38
+ "Status: #{status}",
39
+ "\n",
40
+ "Status #{status}"
41
+ ].join("\n")
42
+ exit 0
43
+ end
44
+
35
45
  if env['CONTENT_TYPE'] =~ %r{^multipart/form-data;}
36
46
  upload = cgi.params['f'][0]
37
47
  res['upload'] = {
data/test/test_browser.rb CHANGED
@@ -174,6 +174,7 @@ class TestBrowser < Test::Unit::TestCase
174
174
  end
175
175
 
176
176
  =begin
177
+ ## this test works fine but very slow
177
178
  def test_auth
178
179
  user_pass = 'testuser:testpass'
179
180
  Chainsaw.launch(TEST_URL + 'cgi.rb?auth').
@@ -240,7 +241,7 @@ class TestBrowser < Test::Unit::TestCase
240
241
  end
241
242
 
242
243
  def test_ignore_redirect
243
-
244
+ # TODO
244
245
  end
245
246
 
246
247
  def test_result
@@ -256,6 +257,15 @@ class TestBrowser < Test::Unit::TestCase
256
257
 
257
258
  end
258
259
 
260
+ def test_bad_response
261
+ assert_nothing_raised do
262
+ Chainsaw.launch(TEST_URL + 'cgi.rb?500').open { |cs|
263
+ assert_equal 500, cs.res.status
264
+ assert_equal 'Status 500', cs.res.content
265
+ }
266
+ end
267
+ end
268
+
259
269
  def test_aliases
260
270
  Chainsaw.launch(TEST_URL + '03.html').> { |cs|
261
271
  assert_instance_of Nokogiri::HTML::Document, cs.doc
@@ -267,7 +277,22 @@ class TestBrowser < Test::Unit::TestCase
267
277
  assert_equal @text_val, x['params']['t'][0]
268
278
  assert_equal 'go', x['params']['s'][0]
269
279
  }
280
+
281
+ end
270
282
 
283
+ def test_mixed_instance
284
+ cs = Chainsaw.launch(TEST_URL + '01.html').open {
285
+ assert_instance_of Nokogiri::HTML::Document, doc
286
+ assert_equal 200, res.status
287
+ links = doc.search('//a')
288
+ assert_equal links.length, 5
289
+ set_next links[1]
290
+ 'result1'
291
+ }.open {
292
+ assert_equal res.uri.to_s, TEST_URL + '02.html'
293
+ 'result2'
294
+ }
295
+ assert_equal ['result1', 'result2'], cs.results
271
296
  end
272
297
 
273
298
  end
@@ -12,9 +12,30 @@ class TestChainsaw < Test::Unit::TestCase
12
12
  agent2 = Chainsaw.launch('http://example.com/', {:user_agent => 'Chainsaw XXX'})
13
13
 
14
14
  assert_instance_of Chainsaw::Browser, agent1
15
- assert_equal agent1.to_yaml, agent2.to_yaml
15
+ #assert_equal agent1.to_yaml, agent2.to_yaml
16
+ assert_equal agent1.user_agent, agent2.user_agent
16
17
 
17
18
  end
19
+
20
+ def test_launch_more
21
+ assert_nothing_raised do
22
+ Chainsaw {
23
+ set_next 'http://example.com/'
24
+ }
25
+ end
26
+
27
+ assert_nothing_raised do
28
+ Chainsaw('http://example.com/')
29
+ end
30
+
31
+ assert_nothing_raised do
32
+ Chainsaw.launch('http://example.com/some') {
33
+ set_next 'http://example.com/'
34
+ }
35
+ end
36
+ end
37
+
38
+
18
39
 
19
40
  end
20
41
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ucnv-chainsaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - ucnv
@@ -9,7 +9,7 @@ autorequire: ""
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-02 00:00:00 -08:00
12
+ date: 2009-03-07 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -71,6 +71,7 @@ files:
71
71
  - lib/chainsaw.rb
72
72
  - examples/01_google.rb
73
73
  - examples/02_twitter.rb
74
+ - examples/03_delicious.rb
74
75
  has_rdoc: true
75
76
  homepage: http://github.com/ucnv/chainsaw/tree/master
76
77
  post_install_message:
@@ -93,7 +94,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
93
94
  requirements:
94
95
  - - ">="
95
96
  - !ruby/object:Gem::Version
96
- version: 1.8.6
97
+ version: 1.8.7
97
98
  version:
98
99
  required_rubygems_version: !ruby/object:Gem::Requirement
99
100
  requirements: