ucnv-chainsaw 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -12,7 +12,7 @@ A Ruby library for spidering web resources.
12
12
  == Synopsis
13
13
 
14
14
  Chainsaw.launch('http://example.com/').open { |cs|
15
- cs.doc.css('#navi a')[2]
15
+ cs.set_next cs.doc.css('#navi a')[2]
16
16
  }.open { |cs|
17
17
  form = cs.doc.xpath('//form')[0]
18
18
  input = form.xpath('.//input[@type="text"]')[0]
@@ -32,6 +32,7 @@ A Ruby library for spidering web resources.
32
32
 
33
33
  == Installation
34
34
 
35
+ * gem source -a http://gems.github.com
35
36
  * gem install ucnv-chainsaw
36
37
 
37
38
  == Copyright
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ DESCRIPTION = "A Ruby library for spidering web resources."
17
17
  RUBYFORGE_PROJECT = "chainsaw"
18
18
  HOMEPATH = "http://github.com/ucnv/chainsaw/tree/master"
19
19
  BIN_FILES = %w( )
20
- VERS = "0.0.1"
20
+ VERS = "0.0.2"
21
21
 
22
22
  REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
23
23
  CLEAN.include ['**/.*.sw?', '*.gem', '.config']
@@ -60,7 +60,7 @@ spec = Gem::Specification.new do |s|
60
60
 
61
61
  s.add_dependency('nokogiri', '>=1.2.1')
62
62
  s.add_dependency('httpclient', '>=2.1.4')
63
- s.required_ruby_version = '>= 1.8.6'
63
+ s.required_ruby_version = '>= 1.8.7'
64
64
 
65
65
  s.files = %w(README.rdoc ChangeLog Rakefile) +
66
66
  Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
@@ -1,15 +1,15 @@
1
- require 'rubygems'
1
+ require 'rubygems'
2
2
  require 'chainsaw'
3
3
 
4
4
  username, password, tweet = ARGV
5
5
 
6
- Chainsaw.launch('https://twitter.com/home').> { |a|
6
+ Chainsaw.launch('https://twitter.com/home').process { |a|
7
7
  f = a.doc.css('.signin').first
8
8
  f.xpath('id("username_or_email")').first['value'] = username
9
9
  f.xpath('id("session[password]")').first['value'] = password
10
10
  a.set_next f
11
- }.> { |a|
11
+ }.process { |a|
12
12
  f = a.doc.css('#doingForm').first
13
13
  f.xpath('id("status")').first.content = tweet || "(o'-')-o fwip fwip"
14
14
  a.set_next f
15
- }.>
15
+ }.process
@@ -0,0 +1,16 @@
1
+ require 'rubygems'
2
+ require 'chainsaw'
3
+
4
+ # post to delicious.com using API
5
+ username = ARGV.shift
6
+ password = ARGV.shift
7
+ url = ARGV.shift
8
+ desc = ARGV.shift || ''
9
+ tags = ARGV.shift || ''
10
+
11
+ uri = URI.parse 'https://api.del.icio.us/v1/posts/add'
12
+ uri.query = "url=#{URI.escape url}&description=#{URI.escape desc}&tags=#{URI.escape tags}"
13
+
14
+ Chainsaw(uri).set_auth(username, password).open {
15
+ puts doc.xpath '//result/@code'
16
+ }
@@ -5,7 +5,7 @@ module Chainsaw
5
5
  class Browser
6
6
  include Chainsaw::Util
7
7
 
8
- DEFAULT_USER_AGENT = '' # TODO: set value
8
+ DEFAULT_USER_AGENT = "Chainsaw/#{VERSION}"
9
9
 
10
10
  attr_accessor :user_agent, :ignore_redirect, :hide_referer, :max_history_count, :encoding # configurables
11
11
  attr_accessor :request_headers, :url_base, :results
@@ -138,10 +138,16 @@ module Chainsaw
138
138
 
139
139
  def document
140
140
  return @document unless @document.nil?
141
- return nil if res.content.nil?
142
- return nil unless is_xml_parsable?(res.contenttype)
143
- enc = @encoding || Encoding.guess(res.content)
144
- @document = Nokogiri.parse(res.content, nil, enc)
141
+ begin
142
+ raise 'Require HTML or XML.' unless
143
+ is_xml_parsable?(res.contenttype)
144
+ enc = @encoding || Encoding.guess(res.content)
145
+ @document = Nokogiri.parse(res.content, nil, enc)
146
+ raise "Something wrong to parse this: #{res.content.to_s}" if
147
+ @document.root.nil?
148
+ rescue
149
+ raise Chainsaw::ParseError
150
+ end
145
151
 
146
152
  b = @document.xpath('//base')
147
153
  base = b.empty? ? '' : b[0]['href']
@@ -168,10 +174,17 @@ module Chainsaw
168
174
  private
169
175
 
170
176
  def process_chain(&block)
171
- if block.arity == 0
172
- results.push yield
173
- else
177
+ unless block.arity == -1
174
178
  results.push yield(self)
179
+ else
180
+ instance_exec(eval('self', block)) do |caller_self|
181
+ mm = lambda do |name, *args|
182
+ caller_self.__send__(name, *args)
183
+ end
184
+ self.class.__send__(:define_method, :method_missing, &mm)
185
+ end
186
+ results.push instance_eval &block
187
+ self.class.__send__(:undef_method, :method_missing)
175
188
  end
176
189
  end
177
190
 
@@ -184,6 +197,8 @@ module Chainsaw
184
197
  @request_count += 1
185
198
  r = begin
186
199
  @engine.send(call, uri, query, @request_headers)
200
+ rescue HTTPClient::BadResponseError => e
201
+ e.res
187
202
  rescue
188
203
  raise(
189
204
  Chainsaw::RequestError,
@@ -256,7 +271,5 @@ module Chainsaw
256
271
  @history.slice! @max_history_count, @history.size - @max_history_count
257
272
  end
258
273
 
259
-
260
-
261
274
  end
262
275
  end
@@ -83,5 +83,9 @@ module Chainsaw
83
83
  include Chainsaw::ErrorWrapper
84
84
  end
85
85
 
86
+ class ParseError < StandardError
87
+ include Chainsaw::ErrorWrapper
88
+ end
89
+
86
90
  end
87
91
 
@@ -22,6 +22,5 @@ class HTTPClient
22
22
  end
23
23
  end
24
24
 
25
- HTTP::Status::SUCCESSFUL_STATUS.push 404
26
25
 
27
26
 
data/lib/chainsaw.rb CHANGED
@@ -17,21 +17,23 @@ Nokogiri::XML::Element.module_eval do
17
17
  end
18
18
 
19
19
  module Chainsaw
20
- VERSION = '0.0.1'
20
+ VERSION = '0.0.2'
21
21
 
22
22
  #
23
23
  # Return a instance of the Chainsaw::Browser class.
24
24
  def self.launch(*args)
25
- args.pop! if args.last.is_a? Proc
26
25
  cs = Chainsaw::Browser.new *args
27
- yield cs if block_given?
26
+ if block_given?
27
+ block = Proc.new
28
+ cs.instance_eval { process_chain &block }
29
+ end
28
30
  cs
29
31
  end
30
32
  end
31
33
 
32
34
  module Kernel
33
35
  #
34
- # alias for Chainsaw.launch
36
+ # alias for Chainsaw#launch
35
37
  def Chainsaw(*args)
36
38
  Chainsaw.launch *args
37
39
  end
data/test/htdocs/06.html CHANGED
@@ -1,6 +1,6 @@
1
1
  <html>
2
2
  <head>
3
- <title>test sjis</title>
3
+ <title>test euc</title>
4
4
  </head>
5
5
  <body>
6
6
  <div id="links">
data/test/htdocs/cgi.rb CHANGED
@@ -32,6 +32,16 @@ if cgi.has_key? 'auth'
32
32
  end
33
33
  end
34
34
 
35
+ if cgi.keys.find {|k| k =~ /^(\d{3})$/}
36
+ status = $1
37
+ cgi.print [
38
+ "Status: #{status}",
39
+ "\n",
40
+ "Status #{status}"
41
+ ].join("\n")
42
+ exit 0
43
+ end
44
+
35
45
  if env['CONTENT_TYPE'] =~ %r{^multipart/form-data;}
36
46
  upload = cgi.params['f'][0]
37
47
  res['upload'] = {
data/test/test_browser.rb CHANGED
@@ -174,6 +174,7 @@ class TestBrowser < Test::Unit::TestCase
174
174
  end
175
175
 
176
176
  =begin
177
+ ## this test works fine but very slow
177
178
  def test_auth
178
179
  user_pass = 'testuser:testpass'
179
180
  Chainsaw.launch(TEST_URL + 'cgi.rb?auth').
@@ -240,7 +241,7 @@ class TestBrowser < Test::Unit::TestCase
240
241
  end
241
242
 
242
243
  def test_ignore_redirect
243
-
244
+ # TODO
244
245
  end
245
246
 
246
247
  def test_result
@@ -256,6 +257,15 @@ class TestBrowser < Test::Unit::TestCase
256
257
 
257
258
  end
258
259
 
260
+ def test_bad_response
261
+ assert_nothing_raised do
262
+ Chainsaw.launch(TEST_URL + 'cgi.rb?500').open { |cs|
263
+ assert_equal 500, cs.res.status
264
+ assert_equal 'Status 500', cs.res.content
265
+ }
266
+ end
267
+ end
268
+
259
269
  def test_aliases
260
270
  Chainsaw.launch(TEST_URL + '03.html').> { |cs|
261
271
  assert_instance_of Nokogiri::HTML::Document, cs.doc
@@ -267,7 +277,22 @@ class TestBrowser < Test::Unit::TestCase
267
277
  assert_equal @text_val, x['params']['t'][0]
268
278
  assert_equal 'go', x['params']['s'][0]
269
279
  }
280
+
281
+ end
270
282
 
283
+ def test_mixed_instance
284
+ cs = Chainsaw.launch(TEST_URL + '01.html').open {
285
+ assert_instance_of Nokogiri::HTML::Document, doc
286
+ assert_equal 200, res.status
287
+ links = doc.search('//a')
288
+ assert_equal links.length, 5
289
+ set_next links[1]
290
+ 'result1'
291
+ }.open {
292
+ assert_equal res.uri.to_s, TEST_URL + '02.html'
293
+ 'result2'
294
+ }
295
+ assert_equal ['result1', 'result2'], cs.results
271
296
  end
272
297
 
273
298
  end
@@ -12,9 +12,30 @@ class TestChainsaw < Test::Unit::TestCase
12
12
  agent2 = Chainsaw.launch('http://example.com/', {:user_agent => 'Chainsaw XXX'})
13
13
 
14
14
  assert_instance_of Chainsaw::Browser, agent1
15
- assert_equal agent1.to_yaml, agent2.to_yaml
15
+ #assert_equal agent1.to_yaml, agent2.to_yaml
16
+ assert_equal agent1.user_agent, agent2.user_agent
16
17
 
17
18
  end
19
+
20
+ def test_launch_more
21
+ assert_nothing_raised do
22
+ Chainsaw {
23
+ set_next 'http://example.com/'
24
+ }
25
+ end
26
+
27
+ assert_nothing_raised do
28
+ Chainsaw('http://example.com/')
29
+ end
30
+
31
+ assert_nothing_raised do
32
+ Chainsaw.launch('http://example.com/some') {
33
+ set_next 'http://example.com/'
34
+ }
35
+ end
36
+ end
37
+
38
+
18
39
 
19
40
  end
20
41
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ucnv-chainsaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - ucnv
@@ -9,7 +9,7 @@ autorequire: ""
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-02 00:00:00 -08:00
12
+ date: 2009-03-07 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -71,6 +71,7 @@ files:
71
71
  - lib/chainsaw.rb
72
72
  - examples/01_google.rb
73
73
  - examples/02_twitter.rb
74
+ - examples/03_delicious.rb
74
75
  has_rdoc: true
75
76
  homepage: http://github.com/ucnv/chainsaw/tree/master
76
77
  post_install_message:
@@ -93,7 +94,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
93
94
  requirements:
94
95
  - - ">="
95
96
  - !ruby/object:Gem::Version
96
- version: 1.8.6
97
+ version: 1.8.7
97
98
  version:
98
99
  required_rubygems_version: !ruby/object:Gem::Requirement
99
100
  requirements: