ucnv-chainsaw 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +2 -1
- data/Rakefile +2 -2
- data/examples/02_twitter.rb +4 -4
- data/examples/03_delicious.rb +16 -0
- data/lib/chainsaw/browser.rb +23 -10
- data/lib/chainsaw/common.rb +4 -0
- data/lib/chainsaw/ext/httpclient.rb +0 -1
- data/lib/chainsaw.rb +6 -4
- data/test/htdocs/06.html +1 -1
- data/test/htdocs/cgi.rb +10 -0
- data/test/test_browser.rb +26 -1
- data/test/test_chainsaw.rb +22 -1
- metadata +4 -3
data/README.rdoc
CHANGED
@@ -12,7 +12,7 @@ A Ruby library for spidering web resources.
|
|
12
12
|
== Synopsis
|
13
13
|
|
14
14
|
Chainsaw.launch('http://example.com/').open { |cs|
|
15
|
-
cs.doc.css('#navi a')[2]
|
15
|
+
cs.set_next cs.doc.css('#navi a')[2]
|
16
16
|
}.open { |cs|
|
17
17
|
form = cs.doc.xpath('//form')[0]
|
18
18
|
input = form.xpath('.//input[@type="text"]')[0]
|
@@ -32,6 +32,7 @@ A Ruby library for spidering web resources.
|
|
32
32
|
|
33
33
|
== Installation
|
34
34
|
|
35
|
+
* gem source -a http://gems.github.com
|
35
36
|
* gem install ucnv-chainsaw
|
36
37
|
|
37
38
|
== Copyright
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ DESCRIPTION = "A Ruby library for spidering web resources."
|
|
17
17
|
RUBYFORGE_PROJECT = "chainsaw"
|
18
18
|
HOMEPATH = "http://github.com/ucnv/chainsaw/tree/master"
|
19
19
|
BIN_FILES = %w( )
|
20
|
-
VERS = "0.0.
|
20
|
+
VERS = "0.0.2"
|
21
21
|
|
22
22
|
REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
23
23
|
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
@@ -60,7 +60,7 @@ spec = Gem::Specification.new do |s|
|
|
60
60
|
|
61
61
|
s.add_dependency('nokogiri', '>=1.2.1')
|
62
62
|
s.add_dependency('httpclient', '>=2.1.4')
|
63
|
-
s.required_ruby_version = '>= 1.8.
|
63
|
+
s.required_ruby_version = '>= 1.8.7'
|
64
64
|
|
65
65
|
s.files = %w(README.rdoc ChangeLog Rakefile) +
|
66
66
|
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
data/examples/02_twitter.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
require 'rubygems'
|
1
|
+
require 'rubygems'
|
2
2
|
require 'chainsaw'
|
3
3
|
|
4
4
|
username, password, tweet = ARGV
|
5
5
|
|
6
|
-
Chainsaw.launch('https://twitter.com/home')
|
6
|
+
Chainsaw.launch('https://twitter.com/home').process { |a|
|
7
7
|
f = a.doc.css('.signin').first
|
8
8
|
f.xpath('id("username_or_email")').first['value'] = username
|
9
9
|
f.xpath('id("session[password]")').first['value'] = password
|
10
10
|
a.set_next f
|
11
|
-
}
|
11
|
+
}.process { |a|
|
12
12
|
f = a.doc.css('#doingForm').first
|
13
13
|
f.xpath('id("status")').first.content = tweet || "(o'-')-o fwip fwip"
|
14
14
|
a.set_next f
|
15
|
-
}
|
15
|
+
}.process
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'chainsaw'
|
3
|
+
|
4
|
+
# post to delicious.com using API
|
5
|
+
username = ARGV.shift
|
6
|
+
password = ARGV.shift
|
7
|
+
url = ARGV.shift
|
8
|
+
desc = ARGV.shift || ''
|
9
|
+
tags = ARGV.shift || ''
|
10
|
+
|
11
|
+
uri = URI.parse 'https://api.del.icio.us/v1/posts/add'
|
12
|
+
uri.query = "url=#{URI.escape url}&description=#{URI.escape desc}&tags=#{URI.escape tags}"
|
13
|
+
|
14
|
+
Chainsaw(uri).set_auth(username, password).open {
|
15
|
+
puts doc.xpath '//result/@code'
|
16
|
+
}
|
data/lib/chainsaw/browser.rb
CHANGED
@@ -5,7 +5,7 @@ module Chainsaw
|
|
5
5
|
class Browser
|
6
6
|
include Chainsaw::Util
|
7
7
|
|
8
|
-
DEFAULT_USER_AGENT =
|
8
|
+
DEFAULT_USER_AGENT = "Chainsaw/#{VERSION}"
|
9
9
|
|
10
10
|
attr_accessor :user_agent, :ignore_redirect, :hide_referer, :max_history_count, :encoding # configurables
|
11
11
|
attr_accessor :request_headers, :url_base, :results
|
@@ -138,10 +138,16 @@ module Chainsaw
|
|
138
138
|
|
139
139
|
def document
|
140
140
|
return @document unless @document.nil?
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
141
|
+
begin
|
142
|
+
raise 'Require HTML or XML.' unless
|
143
|
+
is_xml_parsable?(res.contenttype)
|
144
|
+
enc = @encoding || Encoding.guess(res.content)
|
145
|
+
@document = Nokogiri.parse(res.content, nil, enc)
|
146
|
+
raise "Something wrong to parse this: #{res.content.to_s}" if
|
147
|
+
@document.root.nil?
|
148
|
+
rescue
|
149
|
+
raise Chainsaw::ParseError
|
150
|
+
end
|
145
151
|
|
146
152
|
b = @document.xpath('//base')
|
147
153
|
base = b.empty? ? '' : b[0]['href']
|
@@ -168,10 +174,17 @@ module Chainsaw
|
|
168
174
|
private
|
169
175
|
|
170
176
|
def process_chain(&block)
|
171
|
-
|
172
|
-
results.push yield
|
173
|
-
else
|
177
|
+
unless block.arity == -1
|
174
178
|
results.push yield(self)
|
179
|
+
else
|
180
|
+
instance_exec(eval('self', block)) do |caller_self|
|
181
|
+
mm = lambda do |name, *args|
|
182
|
+
caller_self.__send__(name, *args)
|
183
|
+
end
|
184
|
+
self.class.__send__(:define_method, :method_missing, &mm)
|
185
|
+
end
|
186
|
+
results.push instance_eval &block
|
187
|
+
self.class.__send__(:undef_method, :method_missing)
|
175
188
|
end
|
176
189
|
end
|
177
190
|
|
@@ -184,6 +197,8 @@ module Chainsaw
|
|
184
197
|
@request_count += 1
|
185
198
|
r = begin
|
186
199
|
@engine.send(call, uri, query, @request_headers)
|
200
|
+
rescue HTTPClient::BadResponseError => e
|
201
|
+
e.res
|
187
202
|
rescue
|
188
203
|
raise(
|
189
204
|
Chainsaw::RequestError,
|
@@ -256,7 +271,5 @@ module Chainsaw
|
|
256
271
|
@history.slice! @max_history_count, @history.size - @max_history_count
|
257
272
|
end
|
258
273
|
|
259
|
-
|
260
|
-
|
261
274
|
end
|
262
275
|
end
|
data/lib/chainsaw/common.rb
CHANGED
data/lib/chainsaw.rb
CHANGED
@@ -17,21 +17,23 @@ Nokogiri::XML::Element.module_eval do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
module Chainsaw
|
20
|
-
VERSION = '0.0.
|
20
|
+
VERSION = '0.0.2'
|
21
21
|
|
22
22
|
#
|
23
23
|
# Return a instance of the Chainsaw::Browser class.
|
24
24
|
def self.launch(*args)
|
25
|
-
args.pop! if args.last.is_a? Proc
|
26
25
|
cs = Chainsaw::Browser.new *args
|
27
|
-
|
26
|
+
if block_given?
|
27
|
+
block = Proc.new
|
28
|
+
cs.instance_eval { process_chain &block }
|
29
|
+
end
|
28
30
|
cs
|
29
31
|
end
|
30
32
|
end
|
31
33
|
|
32
34
|
module Kernel
|
33
35
|
#
|
34
|
-
# alias for Chainsaw
|
36
|
+
# alias for Chainsaw#launch
|
35
37
|
def Chainsaw(*args)
|
36
38
|
Chainsaw.launch *args
|
37
39
|
end
|
data/test/htdocs/06.html
CHANGED
data/test/htdocs/cgi.rb
CHANGED
@@ -32,6 +32,16 @@ if cgi.has_key? 'auth'
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
if cgi.keys.find {|k| k =~ /^(\d{3})$/}
|
36
|
+
status = $1
|
37
|
+
cgi.print [
|
38
|
+
"Status: #{status}",
|
39
|
+
"\n",
|
40
|
+
"Status #{status}"
|
41
|
+
].join("\n")
|
42
|
+
exit 0
|
43
|
+
end
|
44
|
+
|
35
45
|
if env['CONTENT_TYPE'] =~ %r{^multipart/form-data;}
|
36
46
|
upload = cgi.params['f'][0]
|
37
47
|
res['upload'] = {
|
data/test/test_browser.rb
CHANGED
@@ -174,6 +174,7 @@ class TestBrowser < Test::Unit::TestCase
|
|
174
174
|
end
|
175
175
|
|
176
176
|
=begin
|
177
|
+
## this test works fine but very slow
|
177
178
|
def test_auth
|
178
179
|
user_pass = 'testuser:testpass'
|
179
180
|
Chainsaw.launch(TEST_URL + 'cgi.rb?auth').
|
@@ -240,7 +241,7 @@ class TestBrowser < Test::Unit::TestCase
|
|
240
241
|
end
|
241
242
|
|
242
243
|
def test_ignore_redirect
|
243
|
-
|
244
|
+
# TODO
|
244
245
|
end
|
245
246
|
|
246
247
|
def test_result
|
@@ -256,6 +257,15 @@ class TestBrowser < Test::Unit::TestCase
|
|
256
257
|
|
257
258
|
end
|
258
259
|
|
260
|
+
def test_bad_response
|
261
|
+
assert_nothing_raised do
|
262
|
+
Chainsaw.launch(TEST_URL + 'cgi.rb?500').open { |cs|
|
263
|
+
assert_equal 500, cs.res.status
|
264
|
+
assert_equal 'Status 500', cs.res.content
|
265
|
+
}
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
259
269
|
def test_aliases
|
260
270
|
Chainsaw.launch(TEST_URL + '03.html').> { |cs|
|
261
271
|
assert_instance_of Nokogiri::HTML::Document, cs.doc
|
@@ -267,7 +277,22 @@ class TestBrowser < Test::Unit::TestCase
|
|
267
277
|
assert_equal @text_val, x['params']['t'][0]
|
268
278
|
assert_equal 'go', x['params']['s'][0]
|
269
279
|
}
|
280
|
+
|
281
|
+
end
|
270
282
|
|
283
|
+
def test_mixed_instance
|
284
|
+
cs = Chainsaw.launch(TEST_URL + '01.html').open {
|
285
|
+
assert_instance_of Nokogiri::HTML::Document, doc
|
286
|
+
assert_equal 200, res.status
|
287
|
+
links = doc.search('//a')
|
288
|
+
assert_equal links.length, 5
|
289
|
+
set_next links[1]
|
290
|
+
'result1'
|
291
|
+
}.open {
|
292
|
+
assert_equal res.uri.to_s, TEST_URL + '02.html'
|
293
|
+
'result2'
|
294
|
+
}
|
295
|
+
assert_equal ['result1', 'result2'], cs.results
|
271
296
|
end
|
272
297
|
|
273
298
|
end
|
data/test/test_chainsaw.rb
CHANGED
@@ -12,9 +12,30 @@ class TestChainsaw < Test::Unit::TestCase
|
|
12
12
|
agent2 = Chainsaw.launch('http://example.com/', {:user_agent => 'Chainsaw XXX'})
|
13
13
|
|
14
14
|
assert_instance_of Chainsaw::Browser, agent1
|
15
|
-
assert_equal agent1.to_yaml, agent2.to_yaml
|
15
|
+
#assert_equal agent1.to_yaml, agent2.to_yaml
|
16
|
+
assert_equal agent1.user_agent, agent2.user_agent
|
16
17
|
|
17
18
|
end
|
19
|
+
|
20
|
+
def test_launch_more
|
21
|
+
assert_nothing_raised do
|
22
|
+
Chainsaw {
|
23
|
+
set_next 'http://example.com/'
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_nothing_raised do
|
28
|
+
Chainsaw('http://example.com/')
|
29
|
+
end
|
30
|
+
|
31
|
+
assert_nothing_raised do
|
32
|
+
Chainsaw.launch('http://example.com/some') {
|
33
|
+
set_next 'http://example.com/'
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
18
39
|
|
19
40
|
end
|
20
41
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ucnv-chainsaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ucnv
|
@@ -9,7 +9,7 @@ autorequire: ""
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-03-
|
12
|
+
date: 2009-03-07 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -71,6 +71,7 @@ files:
|
|
71
71
|
- lib/chainsaw.rb
|
72
72
|
- examples/01_google.rb
|
73
73
|
- examples/02_twitter.rb
|
74
|
+
- examples/03_delicious.rb
|
74
75
|
has_rdoc: true
|
75
76
|
homepage: http://github.com/ucnv/chainsaw/tree/master
|
76
77
|
post_install_message:
|
@@ -93,7 +94,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
94
|
requirements:
|
94
95
|
- - ">="
|
95
96
|
- !ruby/object:Gem::Version
|
96
|
-
version: 1.8.
|
97
|
+
version: 1.8.7
|
97
98
|
version:
|
98
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
100
|
requirements:
|