ucnv-chainsaw 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +2 -1
- data/Rakefile +2 -2
- data/examples/02_twitter.rb +4 -4
- data/examples/03_delicious.rb +16 -0
- data/lib/chainsaw/browser.rb +23 -10
- data/lib/chainsaw/common.rb +4 -0
- data/lib/chainsaw/ext/httpclient.rb +0 -1
- data/lib/chainsaw.rb +6 -4
- data/test/htdocs/06.html +1 -1
- data/test/htdocs/cgi.rb +10 -0
- data/test/test_browser.rb +26 -1
- data/test/test_chainsaw.rb +22 -1
- metadata +4 -3
data/README.rdoc
CHANGED
@@ -12,7 +12,7 @@ A Ruby library for spidering web resources.
|
|
12
12
|
== Synopsis
|
13
13
|
|
14
14
|
Chainsaw.launch('http://example.com/').open { |cs|
|
15
|
-
cs.doc.css('#navi a')[2]
|
15
|
+
cs.set_next cs.doc.css('#navi a')[2]
|
16
16
|
}.open { |cs|
|
17
17
|
form = cs.doc.xpath('//form')[0]
|
18
18
|
input = form.xpath('.//input[@type="text"]')[0]
|
@@ -32,6 +32,7 @@ A Ruby library for spidering web resources.
|
|
32
32
|
|
33
33
|
== Installation
|
34
34
|
|
35
|
+
* gem source -a http://gems.github.com
|
35
36
|
* gem install ucnv-chainsaw
|
36
37
|
|
37
38
|
== Copyright
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ DESCRIPTION = "A Ruby library for spidering web resources."
|
|
17
17
|
RUBYFORGE_PROJECT = "chainsaw"
|
18
18
|
HOMEPATH = "http://github.com/ucnv/chainsaw/tree/master"
|
19
19
|
BIN_FILES = %w( )
|
20
|
-
VERS = "0.0.
|
20
|
+
VERS = "0.0.2"
|
21
21
|
|
22
22
|
REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
23
23
|
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
@@ -60,7 +60,7 @@ spec = Gem::Specification.new do |s|
|
|
60
60
|
|
61
61
|
s.add_dependency('nokogiri', '>=1.2.1')
|
62
62
|
s.add_dependency('httpclient', '>=2.1.4')
|
63
|
-
s.required_ruby_version = '>= 1.8.
|
63
|
+
s.required_ruby_version = '>= 1.8.7'
|
64
64
|
|
65
65
|
s.files = %w(README.rdoc ChangeLog Rakefile) +
|
66
66
|
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
data/examples/02_twitter.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
require 'rubygems'
|
1
|
+
require 'rubygems'
|
2
2
|
require 'chainsaw'
|
3
3
|
|
4
4
|
username, password, tweet = ARGV
|
5
5
|
|
6
|
-
Chainsaw.launch('https://twitter.com/home')
|
6
|
+
Chainsaw.launch('https://twitter.com/home').process { |a|
|
7
7
|
f = a.doc.css('.signin').first
|
8
8
|
f.xpath('id("username_or_email")').first['value'] = username
|
9
9
|
f.xpath('id("session[password]")').first['value'] = password
|
10
10
|
a.set_next f
|
11
|
-
}
|
11
|
+
}.process { |a|
|
12
12
|
f = a.doc.css('#doingForm').first
|
13
13
|
f.xpath('id("status")').first.content = tweet || "(o'-')-o fwip fwip"
|
14
14
|
a.set_next f
|
15
|
-
}
|
15
|
+
}.process
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'chainsaw'
|
3
|
+
|
4
|
+
# post to delicious.com using API
|
5
|
+
username = ARGV.shift
|
6
|
+
password = ARGV.shift
|
7
|
+
url = ARGV.shift
|
8
|
+
desc = ARGV.shift || ''
|
9
|
+
tags = ARGV.shift || ''
|
10
|
+
|
11
|
+
uri = URI.parse 'https://api.del.icio.us/v1/posts/add'
|
12
|
+
uri.query = "url=#{URI.escape url}&description=#{URI.escape desc}&tags=#{URI.escape tags}"
|
13
|
+
|
14
|
+
Chainsaw(uri).set_auth(username, password).open {
|
15
|
+
puts doc.xpath '//result/@code'
|
16
|
+
}
|
data/lib/chainsaw/browser.rb
CHANGED
@@ -5,7 +5,7 @@ module Chainsaw
|
|
5
5
|
class Browser
|
6
6
|
include Chainsaw::Util
|
7
7
|
|
8
|
-
DEFAULT_USER_AGENT =
|
8
|
+
DEFAULT_USER_AGENT = "Chainsaw/#{VERSION}"
|
9
9
|
|
10
10
|
attr_accessor :user_agent, :ignore_redirect, :hide_referer, :max_history_count, :encoding # configurables
|
11
11
|
attr_accessor :request_headers, :url_base, :results
|
@@ -138,10 +138,16 @@ module Chainsaw
|
|
138
138
|
|
139
139
|
def document
|
140
140
|
return @document unless @document.nil?
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
141
|
+
begin
|
142
|
+
raise 'Require HTML or XML.' unless
|
143
|
+
is_xml_parsable?(res.contenttype)
|
144
|
+
enc = @encoding || Encoding.guess(res.content)
|
145
|
+
@document = Nokogiri.parse(res.content, nil, enc)
|
146
|
+
raise "Something wrong to parse this: #{res.content.to_s}" if
|
147
|
+
@document.root.nil?
|
148
|
+
rescue
|
149
|
+
raise Chainsaw::ParseError
|
150
|
+
end
|
145
151
|
|
146
152
|
b = @document.xpath('//base')
|
147
153
|
base = b.empty? ? '' : b[0]['href']
|
@@ -168,10 +174,17 @@ module Chainsaw
|
|
168
174
|
private
|
169
175
|
|
170
176
|
def process_chain(&block)
|
171
|
-
|
172
|
-
results.push yield
|
173
|
-
else
|
177
|
+
unless block.arity == -1
|
174
178
|
results.push yield(self)
|
179
|
+
else
|
180
|
+
instance_exec(eval('self', block)) do |caller_self|
|
181
|
+
mm = lambda do |name, *args|
|
182
|
+
caller_self.__send__(name, *args)
|
183
|
+
end
|
184
|
+
self.class.__send__(:define_method, :method_missing, &mm)
|
185
|
+
end
|
186
|
+
results.push instance_eval &block
|
187
|
+
self.class.__send__(:undef_method, :method_missing)
|
175
188
|
end
|
176
189
|
end
|
177
190
|
|
@@ -184,6 +197,8 @@ module Chainsaw
|
|
184
197
|
@request_count += 1
|
185
198
|
r = begin
|
186
199
|
@engine.send(call, uri, query, @request_headers)
|
200
|
+
rescue HTTPClient::BadResponseError => e
|
201
|
+
e.res
|
187
202
|
rescue
|
188
203
|
raise(
|
189
204
|
Chainsaw::RequestError,
|
@@ -256,7 +271,5 @@ module Chainsaw
|
|
256
271
|
@history.slice! @max_history_count, @history.size - @max_history_count
|
257
272
|
end
|
258
273
|
|
259
|
-
|
260
|
-
|
261
274
|
end
|
262
275
|
end
|
data/lib/chainsaw/common.rb
CHANGED
data/lib/chainsaw.rb
CHANGED
@@ -17,21 +17,23 @@ Nokogiri::XML::Element.module_eval do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
module Chainsaw
|
20
|
-
VERSION = '0.0.
|
20
|
+
VERSION = '0.0.2'
|
21
21
|
|
22
22
|
#
|
23
23
|
# Return a instance of the Chainsaw::Browser class.
|
24
24
|
def self.launch(*args)
|
25
|
-
args.pop! if args.last.is_a? Proc
|
26
25
|
cs = Chainsaw::Browser.new *args
|
27
|
-
|
26
|
+
if block_given?
|
27
|
+
block = Proc.new
|
28
|
+
cs.instance_eval { process_chain &block }
|
29
|
+
end
|
28
30
|
cs
|
29
31
|
end
|
30
32
|
end
|
31
33
|
|
32
34
|
module Kernel
|
33
35
|
#
|
34
|
-
# alias for Chainsaw
|
36
|
+
# alias for Chainsaw#launch
|
35
37
|
def Chainsaw(*args)
|
36
38
|
Chainsaw.launch *args
|
37
39
|
end
|
data/test/htdocs/06.html
CHANGED
data/test/htdocs/cgi.rb
CHANGED
@@ -32,6 +32,16 @@ if cgi.has_key? 'auth'
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
if cgi.keys.find {|k| k =~ /^(\d{3})$/}
|
36
|
+
status = $1
|
37
|
+
cgi.print [
|
38
|
+
"Status: #{status}",
|
39
|
+
"\n",
|
40
|
+
"Status #{status}"
|
41
|
+
].join("\n")
|
42
|
+
exit 0
|
43
|
+
end
|
44
|
+
|
35
45
|
if env['CONTENT_TYPE'] =~ %r{^multipart/form-data;}
|
36
46
|
upload = cgi.params['f'][0]
|
37
47
|
res['upload'] = {
|
data/test/test_browser.rb
CHANGED
@@ -174,6 +174,7 @@ class TestBrowser < Test::Unit::TestCase
|
|
174
174
|
end
|
175
175
|
|
176
176
|
=begin
|
177
|
+
## this test works fine but very slow
|
177
178
|
def test_auth
|
178
179
|
user_pass = 'testuser:testpass'
|
179
180
|
Chainsaw.launch(TEST_URL + 'cgi.rb?auth').
|
@@ -240,7 +241,7 @@ class TestBrowser < Test::Unit::TestCase
|
|
240
241
|
end
|
241
242
|
|
242
243
|
def test_ignore_redirect
|
243
|
-
|
244
|
+
# TODO
|
244
245
|
end
|
245
246
|
|
246
247
|
def test_result
|
@@ -256,6 +257,15 @@ class TestBrowser < Test::Unit::TestCase
|
|
256
257
|
|
257
258
|
end
|
258
259
|
|
260
|
+
def test_bad_response
|
261
|
+
assert_nothing_raised do
|
262
|
+
Chainsaw.launch(TEST_URL + 'cgi.rb?500').open { |cs|
|
263
|
+
assert_equal 500, cs.res.status
|
264
|
+
assert_equal 'Status 500', cs.res.content
|
265
|
+
}
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
259
269
|
def test_aliases
|
260
270
|
Chainsaw.launch(TEST_URL + '03.html').> { |cs|
|
261
271
|
assert_instance_of Nokogiri::HTML::Document, cs.doc
|
@@ -267,7 +277,22 @@ class TestBrowser < Test::Unit::TestCase
|
|
267
277
|
assert_equal @text_val, x['params']['t'][0]
|
268
278
|
assert_equal 'go', x['params']['s'][0]
|
269
279
|
}
|
280
|
+
|
281
|
+
end
|
270
282
|
|
283
|
+
def test_mixed_instance
|
284
|
+
cs = Chainsaw.launch(TEST_URL + '01.html').open {
|
285
|
+
assert_instance_of Nokogiri::HTML::Document, doc
|
286
|
+
assert_equal 200, res.status
|
287
|
+
links = doc.search('//a')
|
288
|
+
assert_equal links.length, 5
|
289
|
+
set_next links[1]
|
290
|
+
'result1'
|
291
|
+
}.open {
|
292
|
+
assert_equal res.uri.to_s, TEST_URL + '02.html'
|
293
|
+
'result2'
|
294
|
+
}
|
295
|
+
assert_equal ['result1', 'result2'], cs.results
|
271
296
|
end
|
272
297
|
|
273
298
|
end
|
data/test/test_chainsaw.rb
CHANGED
@@ -12,9 +12,30 @@ class TestChainsaw < Test::Unit::TestCase
|
|
12
12
|
agent2 = Chainsaw.launch('http://example.com/', {:user_agent => 'Chainsaw XXX'})
|
13
13
|
|
14
14
|
assert_instance_of Chainsaw::Browser, agent1
|
15
|
-
assert_equal agent1.to_yaml, agent2.to_yaml
|
15
|
+
#assert_equal agent1.to_yaml, agent2.to_yaml
|
16
|
+
assert_equal agent1.user_agent, agent2.user_agent
|
16
17
|
|
17
18
|
end
|
19
|
+
|
20
|
+
def test_launch_more
|
21
|
+
assert_nothing_raised do
|
22
|
+
Chainsaw {
|
23
|
+
set_next 'http://example.com/'
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_nothing_raised do
|
28
|
+
Chainsaw('http://example.com/')
|
29
|
+
end
|
30
|
+
|
31
|
+
assert_nothing_raised do
|
32
|
+
Chainsaw.launch('http://example.com/some') {
|
33
|
+
set_next 'http://example.com/'
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
18
39
|
|
19
40
|
end
|
20
41
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ucnv-chainsaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ucnv
|
@@ -9,7 +9,7 @@ autorequire: ""
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-03-
|
12
|
+
date: 2009-03-07 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -71,6 +71,7 @@ files:
|
|
71
71
|
- lib/chainsaw.rb
|
72
72
|
- examples/01_google.rb
|
73
73
|
- examples/02_twitter.rb
|
74
|
+
- examples/03_delicious.rb
|
74
75
|
has_rdoc: true
|
75
76
|
homepage: http://github.com/ucnv/chainsaw/tree/master
|
76
77
|
post_install_message:
|
@@ -93,7 +94,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
94
|
requirements:
|
94
95
|
- - ">="
|
95
96
|
- !ruby/object:Gem::Version
|
96
|
-
version: 1.8.
|
97
|
+
version: 1.8.7
|
97
98
|
version:
|
98
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
100
|
requirements:
|