mechanize 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

@@ -1,27 +0,0 @@
1
- class Module # :nodoc:
2
- def attr_finder(*syms)
3
- syms.each do |sym|
4
- class_eval %{ def #{sym.to_s}(hash = nil)
5
- if hash == nil
6
- @#{sym.to_s}
7
- else
8
- err = []
9
- hash.each \{ |k,v|
10
- err << \"#\{k\}('#\{v\}')"
11
- \}
12
- warn("attr_finder will be deprecated in " +
13
- "0.6.0. Please switch to: #\{err.join('.')\}")
14
- @#{sym.to_s}.find_all do |t|
15
- found = true
16
- hash.each_key \{ |key|
17
- found = false if t.send(key.to_sym) != hash[key]
18
- \}
19
- found
20
- end
21
- end
22
- end
23
- }
24
- end
25
- end
26
- end
27
-
@@ -1,224 +0,0 @@
1
- #
2
- # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de).
3
- # Released under the same terms of license as Ruby.
4
- #
5
-
6
- require 'rexml/rexml'
7
-
8
- class REXML::Text
9
- def collect_text_recursively
10
- value()
11
- end
12
- end
13
-
14
- class REXML::Comment
15
- def collect_text_recursively
16
- []
17
- end
18
- end
19
-
20
- module REXML::Node
21
-
22
- # Aliasing functions to get rid of warnings. Remove when support for 1.8.2
23
- # is dropped.
24
- if RUBY_VERSION > "1.8.2"
25
- alias :old_each_recursive :each_recursive
26
- alias :old_find_first_recursive :find_first_recursive
27
- alias :old_index_in_parent :index_in_parent
28
- end
29
-
30
- # Visit all subnodes of +self+ recursively
31
-
32
- def each_recursive(&block) # :yields: node
33
- self.elements.each {|node|
34
- block.call(node)
35
- node.each_recursive(&block)
36
- }
37
- end
38
-
39
- # Find (and return) first subnode (recursively) for which the block evaluates
40
- # to true. Returns +nil+ if none was found.
41
-
42
- def find_first_recursive(&block) # :yields: node
43
- each_recursive {|node|
44
- return node if block.call(node)
45
- }
46
- return nil
47
- end
48
-
49
- # Find all subnodes (recursively) for which the block evaluates to true.
50
-
51
- def find_all_recursive(&block) # :yields: node
52
- arr = []
53
- each_recursive {|node|
54
- arr << node if block.call(node)
55
- }
56
- arr
57
- end
58
-
59
- # Returns the index that +self+ has in its parent's elements array, so that
60
- # the following equation holds true:
61
- #
62
- # node == node.parent.elements[node.index_in_parent]
63
-
64
- def index_in_parent
65
- parent.index(self)+1
66
- end
67
-
68
- # Recursivly collects all text strings starting into an array.
69
- #
70
- # E.g. the method would return [["abc"], "def"] for this node:
71
- #
72
- # <i><b>abc</b>def</i>
73
-
74
- def collect_text_recursively
75
- map {|n| n.collect_text_recursively}
76
- end
77
-
78
- # Returns all text of all subnodes (recursivly), merged into one string.
79
- # This is equivalent to:
80
- #
81
- # collect_text_recursively.flatten.join("")
82
-
83
- def all_text
84
- collect_text_recursively.flatten.join("")
85
- end
86
-
87
- end
88
-
89
- #
90
- # Starting with +root_node+, we recursively look for a node with the given
91
- # +tag+, the given +attributes+ (a Hash) and whoose text equals or matches the
92
- # +text+ string or regular expression.
93
- #
94
- # To find the following node:
95
- #
96
- # <td class='abc'>text</td>
97
- #
98
- # We use:
99
- #
100
- # find_node(root, 'td', {'class' => 'abc'}, "text")
101
- #
102
- # Returns +nil+ if no matching node was found.
103
-
104
- def find_node(root_node, tag, attributes, text=nil)
105
- root_node.find_first_recursive {|node|
106
- node.name == tag and
107
- attributes.all? {|attr, val| node.attributes[attr] == val} and
108
- (text ? text === node.text : true)
109
- }
110
- end
111
-
112
- #
113
- # Extract specific columns (specified by the position of it's corrensponding
114
- # header column) from a table.
115
- #
116
- # Given the following table:
117
- #
118
- # <table>
119
- # <tr>
120
- # <td>A</td>
121
- # <td>B</td>
122
- # <td>C</td>
123
- # </tr>
124
- # <tr>
125
- # <td>A.1</td>
126
- # <td>B.1</td>
127
- # <td>C.1</td>
128
- # </tr>
129
- # <tr>
130
- # <td>A.2</td>
131
- # <td>B.2</td>
132
- # <td>C.2</td>
133
- # </tr>
134
- # </table>
135
- #
136
- # To extract the first (A) and last (C) column:
137
- #
138
- # extract_from_table(root_node, ["A", "C"])
139
- #
140
- # And you get this as result:
141
- #
142
- # [
143
- # ["A.1", "C.1"],
144
- # ["A.2", "C.2"]
145
- # ]
146
- #
147
-
148
- def extract_from_table(root_node, headers, header_tags = %w(td th))
149
-
150
- # extract and collect all header nodes
151
-
152
- header_nodes = headers.collect { |header|
153
- root_node.find_first_recursive {|node|
154
- header_tags.include?(node.name.downcase) and header === node.all_text
155
- }
156
- }
157
-
158
- raise "some headers not found" if header_nodes.compact.size < headers.size
159
-
160
- # assert that all headers have the same parent 'header_row', which is the row
161
- # in which the header_nodes are contained. 'table' is the surrounding table tag.
162
-
163
- header_row = header_nodes.first.parent
164
- table = header_row.parent
165
-
166
- raise "different parents" unless header_nodes.all? {|n| n.parent == header_row}
167
-
168
- # we now iterate over all rows in the table that follows the header_row.
169
- # for each row we collect the elements at the same positions as the header_nodes.
170
- # this is what we finally return from the method.
171
-
172
- (header_row.index_in_parent .. table.elements.size).collect do |inx|
173
- row = table.elements[inx]
174
- header_nodes.collect { |n| row.elements[ n.parent.elements.index(n) ].text }
175
- end
176
- end
177
-
178
- # Given a HTML table, this method returns a matrix (2-dim array), with all the
179
- # table-data elements correctly placed in it.
180
- #
181
- # If there's a table data element which uses 'colspan', that node is stored in
182
- # at the current position of the row followed by (colspan-1) nil values.
183
- #
184
- # Example:
185
- #
186
- # <table>
187
- # <tr>
188
- # <td>A</td>
189
- # <td>B</td>
190
- # </tr>
191
- # <tr>
192
- # <td colspan="2">C</td>
193
- # </tr>
194
- # </table>
195
- #
196
- # Result:
197
- #
198
- # [
199
- # [A, B],
200
- # [C, nil]
201
- # ]
202
- #
203
- # where A, B and C are the corresponding "<td>" nodes.
204
- #
205
-
206
- def table_to_matrix(table_node)
207
- matrix = []
208
-
209
- # for each row
210
- table_node.elements.each('tr') {|r|
211
- row = []
212
- r.elements.each {|data|
213
- next unless ['td', 'th'].include?(data.name)
214
- row << data
215
-
216
- # fill with empty elements
217
- colspan = (data.attributes['colspan'] || 1).to_i
218
- (colspan - 1).times { row << nil }
219
- }
220
- matrix << row
221
- }
222
-
223
- return matrix
224
- end
data/test/parse.rb DELETED
@@ -1,39 +0,0 @@
1
- require 'rubygems'
2
-
3
- require 'web/htmltools/xmltree'
4
-
5
- parser = HTMLTree::XMLParser.new
6
- parser.feed(DATA.read.chomp)
7
- root = parser.document
8
-
9
- root.each_recursive { |node|
10
- name = node.name.downcase
11
- case name
12
- when 'form'
13
- node.each_recursive { |n|
14
- puts n.name.downcase
15
- }
16
- end
17
- }
18
-
19
- __END__
20
- <html>
21
- <body>
22
- <table>
23
- <tr>
24
- <td>
25
- <form name="foo">
26
- <table>
27
- <tr><td><h1>Header</h1></td></tr>
28
- <tr>
29
- <td>
30
- <input type="text" name="hey" value="" />
31
- </td>
32
- </tr>
33
- </table>
34
- </form>
35
- </td>
36
- </tr>
37
- </table>
38
- </body>
39
- </html>
data/test/tc_parsing.rb DELETED
@@ -1,64 +0,0 @@
1
- $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
-
3
- require 'test/unit'
4
- require 'rexml/document'
5
- require 'mechanize/parsing'
6
-
7
- class TestParsing < Test::Unit::TestCase
8
- def test_collect_text_recursively
9
- assert_equal [["abc"], "def"], root_for("<i><b>abc</b>def</i>").collect_text_recursively
10
-
11
- assert_equal ["asdf", ["abc"], "def"], root_for("<i>asdf<b>abc</b>def</i>").collect_text_recursively
12
- end
13
-
14
- def test_index_in_parent
15
- table = root_for %(<table><tr><td>A</td><td>B</td></tr><tr><td colspan="2">C</td></tr></table>)
16
- node = table.find_first_recursive {|n| n.name == 'tr'}
17
- assert_equal node, node.parent.elements[node.index_in_parent]
18
- end
19
-
20
- def test_table_to_matrix
21
- table = root_for %(<table><tr><td>A</td><td>B</td></tr><tr><td colspan="2">C</td></tr></table>)
22
- matrix = table_to_matrix(table)
23
- assert_equal "A", matrix[0][0].all_text
24
- assert_equal "B", matrix[0][1].all_text
25
- assert_equal "C", matrix[1][0].all_text
26
- assert_equal nil, matrix[1][1]
27
- end
28
-
29
- def test_extract_from_table
30
- table = root_for %(
31
- <table>
32
- <tr>
33
- <td>A</td>
34
- <td>B</td>
35
- <td>C</td>
36
- </tr>
37
- <tr>
38
- <td>A.1</td>
39
- <td>B.1</td>
40
- <td>C.1</td>
41
- </tr>
42
- <tr>
43
- <td>A.2</td>
44
- <td>B.2</td>
45
- <td>C.2</td>
46
- </tr>
47
- </table>)
48
-
49
- assert_equal [ ["A.1", "C.1"], ["A.2", "C.2"] ], extract_from_table(table, ["A", "C"])
50
- end
51
-
52
- private
53
-
54
- def root_for(str)
55
- REXML::Document.new(str).root
56
- end
57
- =begin
58
- def document_for(str)
59
- parser = HTMLTree::XMLParser.new
60
- parser.feed(str)
61
- parser.document
62
- end
63
- =end
64
- end
data/test/test_mech.rb DELETED
@@ -1,27 +0,0 @@
1
- require 'fileutils'
2
- require 'yaml/stringio'
3
- require 'test/unit/testsuite'
4
- require 'test/unit/ui/reporter'
5
- require 'test/unit/ui/console/testrunner'
6
-
7
- Thread.new {
8
- require 'server'
9
- }
10
-
11
- fail "Missing results directory" if ARGV.empty?
12
- html_dir = ARGV[0]
13
-
14
- FileUtils.rm_r html_dir rescue nil
15
- FileUtils.mkdir_p html_dir
16
-
17
- Dir['tc_*.rb'].each do |fn|
18
- load fn
19
- end
20
-
21
- suite = Test::Unit::TestSuite.new
22
- ObjectSpace.each_object(Class) do |cls|
23
- next if cls == Test::Unit::TestCase
24
- suite << cls.suite if cls.respond_to?(:suite)
25
- end
26
-
27
- Test::Unit::UI::Reporter.run(suite, html_dir)