ucnv-chainsaw 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +42 -0
- data/Rakefile +149 -0
- data/examples/01_google.rb +17 -0
- data/examples/02_twitter.rb +15 -0
- data/lib/chainsaw.rb +40 -0
- data/lib/chainsaw/browser.rb +262 -0
- data/lib/chainsaw/common.rb +87 -0
- data/lib/chainsaw/element.rb +62 -0
- data/lib/chainsaw/ext/httpclient.rb +27 -0
- data/lib/chainsaw/ext/nokogiri.rb +29 -0
- data/test/helper.rb +15 -0
- data/test/htdocs/01.html +17 -0
- data/test/htdocs/02.html +9 -0
- data/test/htdocs/03.html +25 -0
- data/test/htdocs/04.html +14 -0
- data/test/htdocs/05.html +14 -0
- data/test/htdocs/06.html +14 -0
- data/test/htdocs/cgi.rb +50 -0
- data/test/htdocs/img.gif +0 -0
- data/test/server.rb +20 -0
- data/test/test_browser.rb +274 -0
- data/test/test_chainsaw.rb +20 -0
- data/test/test_element.rb +32 -0
- data/test/test_ext.rb +31 -0
- data/test/test_m17n.rb +23 -0
- metadata +116 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/helper.rb'
|
2
|
+
|
3
|
+
|
4
|
+
class TestChainsaw < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_launch
|
7
|
+
agent1 = Chainsaw.launch('http://example.com/some', {:user_agent => 'XXX'}) { |cs|
|
8
|
+
cs.user_agent = 'Chainsaw XXX'
|
9
|
+
cs.set_next 'http://example.com/'
|
10
|
+
}
|
11
|
+
|
12
|
+
agent2 = Chainsaw.launch('http://example.com/', {:user_agent => 'Chainsaw XXX'})
|
13
|
+
|
14
|
+
assert_instance_of Chainsaw::Browser, agent1
|
15
|
+
assert_equal agent1.to_yaml, agent2.to_yaml
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/helper.rb'
|
2
|
+
|
3
|
+
class TestElement < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup ; end
|
6
|
+
|
7
|
+
def test_serialize_form_01
|
8
|
+
f = <<-FORM
|
9
|
+
<form action="">
|
10
|
+
<input type="text" name="t1" value="abc" />
|
11
|
+
<input type="hidden" name="t2" value="def" />
|
12
|
+
<input type="password" name="t3" value="ghi" />
|
13
|
+
</form>
|
14
|
+
FORM
|
15
|
+
n = Nokogiri::HTML.parse f
|
16
|
+
n.xpath('.//input[@type="password"]').first.set_attribute('value', 'jkl')
|
17
|
+
s = n.xpath('.//form').first.serialize_form
|
18
|
+
assert_equal s, [["t1", "abc"], ["t2", "def"], ["t3", "jkl"]]
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_serialize_form_02
|
22
|
+
f = <<-FORM
|
23
|
+
<form action="">
|
24
|
+
<input name="t1" value="abc" />
|
25
|
+
<input type="text" value="def" />
|
26
|
+
</form>
|
27
|
+
FORM
|
28
|
+
n = Nokogiri::HTML.parse f
|
29
|
+
s = n.xpath('.//form').first.serialize_form
|
30
|
+
assert_equal s, [["t1", "abc"]]
|
31
|
+
end
|
32
|
+
end
|
data/test/test_ext.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/helper.rb'
|
2
|
+
|
3
|
+
class TestExt < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup ; end
|
6
|
+
|
7
|
+
def test_httpclient_get_r
|
8
|
+
h = HTTPClient.new
|
9
|
+
d = h.get_r(URI.join(TEST_URL, '01.html'), nil, {}) #TODO: test with redirect
|
10
|
+
assert_equal 200, d.status
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_nokogiri_id_func
|
14
|
+
x = <<-XML
|
15
|
+
<doc>
|
16
|
+
<items>
|
17
|
+
<item id="item1" name="abc"/>
|
18
|
+
<item id="item2" name="efd"/>
|
19
|
+
</items>
|
20
|
+
</doc>
|
21
|
+
XML
|
22
|
+
n = Nokogiri::XML.parse(x)
|
23
|
+
ps = n.fix_xpath('id("a")', 'id("b")/x', '//*[id="c"]')
|
24
|
+
assert_equal ['.//*[@id="a"]', './/*[@id="b"]/x', '//*[id="c"]'], ps
|
25
|
+
|
26
|
+
i = n.search('id("item1")')[0]
|
27
|
+
assert_equal 'abc', i.get_attribute('name')
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
end
|
data/test/test_m17n.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/helper.rb'
|
2
|
+
|
3
|
+
class TestM17N < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup ; end
|
6
|
+
|
7
|
+
def test_guess
|
8
|
+
assert_equal 'UTF-8', Chainsaw::Encoding.guess(open(TEST_URL + '04.html').read)
|
9
|
+
assert_equal 'SHIFT-JIS', Chainsaw::Encoding.guess(open(TEST_URL + '05.html').read)
|
10
|
+
assert_equal 'EUC-JP', Chainsaw::Encoding.guess(open(TEST_URL + '06.html').read)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_guess_through_chainsaw
|
14
|
+
%w{04.html 05.html 06.html}.each do |html|
|
15
|
+
Chainsaw.launch(TEST_URL + html).open { |cs|
|
16
|
+
a = cs.doc.xpath('//a[@name="ウェブ検索のサービス"]')
|
17
|
+
assert_kind_of Nokogiri::XML::NodeSet, a, "Error during the process #{html}"
|
18
|
+
assert_equal 'グーグル・ジャパン', a[0].content, "Error during the process #{html}"
|
19
|
+
assert_equal 'ヤフー・ジャパン', a[1].content, "Error during the process #{html}"
|
20
|
+
}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ucnv-chainsaw
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ucnv
|
8
|
+
autorequire: ""
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-02 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.1
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: httpclient
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.1.4
|
34
|
+
version:
|
35
|
+
description: A Ruby library for spidering web resources.
|
36
|
+
email: ucnvvv at gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.rdoc
|
43
|
+
- ChangeLog
|
44
|
+
files:
|
45
|
+
- README.rdoc
|
46
|
+
- ChangeLog
|
47
|
+
- Rakefile
|
48
|
+
- test/helper.rb
|
49
|
+
- test/htdocs
|
50
|
+
- test/htdocs/01.html
|
51
|
+
- test/htdocs/02.html
|
52
|
+
- test/htdocs/03.html
|
53
|
+
- test/htdocs/04.html
|
54
|
+
- test/htdocs/05.html
|
55
|
+
- test/htdocs/06.html
|
56
|
+
- test/htdocs/cgi.rb
|
57
|
+
- test/htdocs/img.gif
|
58
|
+
- test/server.rb
|
59
|
+
- test/test_browser.rb
|
60
|
+
- test/test_chainsaw.rb
|
61
|
+
- test/test_element.rb
|
62
|
+
- test/test_ext.rb
|
63
|
+
- test/test_m17n.rb
|
64
|
+
- lib/chainsaw
|
65
|
+
- lib/chainsaw/browser.rb
|
66
|
+
- lib/chainsaw/common.rb
|
67
|
+
- lib/chainsaw/element.rb
|
68
|
+
- lib/chainsaw/ext
|
69
|
+
- lib/chainsaw/ext/httpclient.rb
|
70
|
+
- lib/chainsaw/ext/nokogiri.rb
|
71
|
+
- lib/chainsaw.rb
|
72
|
+
- examples/01_google.rb
|
73
|
+
- examples/02_twitter.rb
|
74
|
+
has_rdoc: true
|
75
|
+
homepage: http://github.com/ucnv/chainsaw/tree/master
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options:
|
78
|
+
- --title
|
79
|
+
- chainsaw documentation
|
80
|
+
- --charset
|
81
|
+
- utf-8
|
82
|
+
- --opname
|
83
|
+
- index.html
|
84
|
+
- --line-numbers
|
85
|
+
- --main
|
86
|
+
- README.rdoc
|
87
|
+
- --inline-source
|
88
|
+
- --exclude
|
89
|
+
- ^(examples|extras)/
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.8.6
|
97
|
+
version:
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: "0"
|
103
|
+
version:
|
104
|
+
requirements: []
|
105
|
+
|
106
|
+
rubyforge_project: chainsaw
|
107
|
+
rubygems_version: 1.2.0
|
108
|
+
signing_key:
|
109
|
+
specification_version: 2
|
110
|
+
summary: A Ruby library for spidering web resources.
|
111
|
+
test_files:
|
112
|
+
- test/test_browser.rb
|
113
|
+
- test/test_chainsaw.rb
|
114
|
+
- test/test_element.rb
|
115
|
+
- test/test_ext.rb
|
116
|
+
- test/test_m17n.rb
|