ucnv-chainsaw 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ require File.dirname(__FILE__) + '/helper.rb'
2
+
3
+
4
+ class TestChainsaw < Test::Unit::TestCase
5
+
6
+ def test_launch
7
+ agent1 = Chainsaw.launch('http://example.com/some', {:user_agent => 'XXX'}) { |cs|
8
+ cs.user_agent = 'Chainsaw XXX'
9
+ cs.set_next 'http://example.com/'
10
+ }
11
+
12
+ agent2 = Chainsaw.launch('http://example.com/', {:user_agent => 'Chainsaw XXX'})
13
+
14
+ assert_instance_of Chainsaw::Browser, agent1
15
+ assert_equal agent1.to_yaml, agent2.to_yaml
16
+
17
+ end
18
+
19
+ end
20
+
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + '/helper.rb'
2
+
3
+ class TestElement < Test::Unit::TestCase
4
+
5
+ def setup ; end
6
+
7
+ def test_serialize_form_01
8
+ f = <<-FORM
9
+ <form action="">
10
+ <input type="text" name="t1" value="abc" />
11
+ <input type="hidden" name="t2" value="def" />
12
+ <input type="password" name="t3" value="ghi" />
13
+ </form>
14
+ FORM
15
+ n = Nokogiri::HTML.parse f
16
+ n.xpath('.//input[@type="password"]').first.set_attribute('value', 'jkl')
17
+ s = n.xpath('.//form').first.serialize_form
18
+ assert_equal s, [["t1", "abc"], ["t2", "def"], ["t3", "jkl"]]
19
+ end
20
+
21
+ def test_serialize_form_02
22
+ f = <<-FORM
23
+ <form action="">
24
+ <input name="t1" value="abc" />
25
+ <input type="text" value="def" />
26
+ </form>
27
+ FORM
28
+ n = Nokogiri::HTML.parse f
29
+ s = n.xpath('.//form').first.serialize_form
30
+ assert_equal s, [["t1", "abc"]]
31
+ end
32
+ end
data/test/test_ext.rb ADDED
@@ -0,0 +1,31 @@
1
+ require File.dirname(__FILE__) + '/helper.rb'
2
+
3
+ class TestExt < Test::Unit::TestCase
4
+
5
+ def setup ; end
6
+
7
+ def test_httpclient_get_r
8
+ h = HTTPClient.new
9
+ d = h.get_r(URI.join(TEST_URL, '01.html'), nil, {}) #TODO: test with redirect
10
+ assert_equal 200, d.status
11
+ end
12
+
13
+ def test_nokogiri_id_func
14
+ x = <<-XML
15
+ <doc>
16
+ <items>
17
+ <item id="item1" name="abc"/>
18
+ <item id="item2" name="efd"/>
19
+ </items>
20
+ </doc>
21
+ XML
22
+ n = Nokogiri::XML.parse(x)
23
+ ps = n.fix_xpath('id("a")', 'id("b")/x', '//*[id="c"]')
24
+ assert_equal ['.//*[@id="a"]', './/*[@id="b"]/x', '//*[id="c"]'], ps
25
+
26
+ i = n.search('id("item1")')[0]
27
+ assert_equal 'abc', i.get_attribute('name')
28
+ end
29
+
30
+
31
+ end
data/test/test_m17n.rb ADDED
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__) + '/helper.rb'
2
+
3
+ class TestM17N < Test::Unit::TestCase
4
+
5
+ def setup ; end
6
+
7
+ def test_guess
8
+ assert_equal 'UTF-8', Chainsaw::Encoding.guess(open(TEST_URL + '04.html').read)
9
+ assert_equal 'SHIFT-JIS', Chainsaw::Encoding.guess(open(TEST_URL + '05.html').read)
10
+ assert_equal 'EUC-JP', Chainsaw::Encoding.guess(open(TEST_URL + '06.html').read)
11
+ end
12
+
13
+ def test_guess_through_chainsaw
14
+ %w{04.html 05.html 06.html}.each do |html|
15
+ Chainsaw.launch(TEST_URL + html).open { |cs|
16
+ a = cs.doc.xpath('//a[@name="ウェブ検索のサービス"]')
17
+ assert_kind_of Nokogiri::XML::NodeSet, a, "Error during the process #{html}"
18
+ assert_equal 'グーグル・ジャパン', a[0].content, "Error during the process #{html}"
19
+ assert_equal 'ヤフー・ジャパン', a[1].content, "Error during the process #{html}"
20
+ }
21
+ end
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ucnv-chainsaw
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - ucnv
8
+ autorequire: ""
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-02 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: httpclient
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 2.1.4
34
+ version:
35
+ description: A Ruby library for spidering web resources.
36
+ email: ucnvvv at gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README.rdoc
43
+ - ChangeLog
44
+ files:
45
+ - README.rdoc
46
+ - ChangeLog
47
+ - Rakefile
48
+ - test/helper.rb
49
+ - test/htdocs
50
+ - test/htdocs/01.html
51
+ - test/htdocs/02.html
52
+ - test/htdocs/03.html
53
+ - test/htdocs/04.html
54
+ - test/htdocs/05.html
55
+ - test/htdocs/06.html
56
+ - test/htdocs/cgi.rb
57
+ - test/htdocs/img.gif
58
+ - test/server.rb
59
+ - test/test_browser.rb
60
+ - test/test_chainsaw.rb
61
+ - test/test_element.rb
62
+ - test/test_ext.rb
63
+ - test/test_m17n.rb
64
+ - lib/chainsaw
65
+ - lib/chainsaw/browser.rb
66
+ - lib/chainsaw/common.rb
67
+ - lib/chainsaw/element.rb
68
+ - lib/chainsaw/ext
69
+ - lib/chainsaw/ext/httpclient.rb
70
+ - lib/chainsaw/ext/nokogiri.rb
71
+ - lib/chainsaw.rb
72
+ - examples/01_google.rb
73
+ - examples/02_twitter.rb
74
+ has_rdoc: true
75
+ homepage: http://github.com/ucnv/chainsaw/tree/master
76
+ post_install_message:
77
+ rdoc_options:
78
+ - --title
79
+ - chainsaw documentation
80
+ - --charset
81
+ - utf-8
82
+ - --opname
83
+ - index.html
84
+ - --line-numbers
85
+ - --main
86
+ - README.rdoc
87
+ - --inline-source
88
+ - --exclude
89
+ - ^(examples|extras)/
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: 1.8.6
97
+ version:
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: "0"
103
+ version:
104
+ requirements: []
105
+
106
+ rubyforge_project: chainsaw
107
+ rubygems_version: 1.2.0
108
+ signing_key:
109
+ specification_version: 2
110
+ summary: A Ruby library for spidering web resources.
111
+ test_files:
112
+ - test/test_browser.rb
113
+ - test/test_chainsaw.rb
114
+ - test/test_element.rb
115
+ - test/test_ext.rb
116
+ - test/test_m17n.rb