mechanize 0.1.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

@@ -0,0 +1,200 @@
1
+ #
2
+ # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de).
3
+ # Released under the same terms of license as Ruby.
4
+ #
5
+
6
+ require 'rexml/rexml'
7
+
8
+ module REXML::Node
9
+
10
+ # Visit all subnodes of +self+ recursively
11
+
12
+ def each_recursive(&block) # :yields: node
13
+ self.elements.each {|node|
14
+ block.call(node)
15
+ node.each_recursive(&block)
16
+ }
17
+ end
18
+
19
+ # Find (and return) first subnode (recursively) for which the block evaluates
20
+ # to true. Returns +nil+ if none was found.
21
+
22
+ def find_first_recursive(&block) # :yields: node
23
+ each_recursive {|node|
24
+ return node if block.call(node)
25
+ }
26
+ return nil
27
+ end
28
+
29
+ # Find all subnodes (recursively) for which the block evaluates to true.
30
+
31
+ def find_all_recursive(&block) # :yields: node
32
+ arr = []
33
+ each_recursive {|node|
34
+ arr << node if block.call(node)
35
+ }
36
+ arr
37
+ end
38
+
39
+ # Returns the index that +self+ has in its parent's elements array, so that
40
+ # the following equation holds true:
41
+ #
42
+ # node == node.parent.elements[node.index_in_parent]
43
+
44
+ def index_in_parent
45
+ parent.index(self)+1
46
+ end
47
+
48
+ # Recursivly collects all text strings starting into an array.
49
+ #
50
+ # E.g. the method would return [["abc"], "def"] for this node:
51
+ #
52
+ # <i><b>abc</b>def</i>
53
+
54
+ def collect_text_recursively
55
+ (elements.map {|n| n.collect_text_recursively} + [self.text]).compact
56
+ end
57
+
58
+ # Returns all text of all subnodes (recursivly), merged into one string.
59
+ # This is equivalent to:
60
+ #
61
+ # collect_text_recursively.flatten.join("")
62
+
63
+ def all_text
64
+ collect_text_recursively.flatten.join("")
65
+ end
66
+
67
+ end
68
+
69
+ #
70
+ # Starting with +root_node+, we recursively look for a node with the given
71
+ # +tag+, the given +attributes+ (a Hash) and whoose text equals or matches the
72
+ # +text+ string or regular expression.
73
+ #
74
+ # To find the following node:
75
+ #
76
+ # <td class='abc'>text</td>
77
+ #
78
+ # We use:
79
+ #
80
+ # find_node(root, 'td', {'class' => 'abc'}, "text")
81
+ #
82
+ # Returns +nil+ if no matching node was found.
83
+
84
+ def find_node(root_node, tag, attributes, text=nil)
85
+ root_node.find_first_recursive {|node|
86
+ node.name == tag and
87
+ attributes.all? {|attr, val| node.attributes[attr] == val} and
88
+ (text ? text === node.text : true)
89
+ }
90
+ end
91
+
92
+ #
93
+ # Extract specific columns (specified by the position of it's corrensponding
94
+ # header column) from a table.
95
+ #
96
+ # Given the following table:
97
+ #
98
+ # <table>
99
+ # <tr>
100
+ # <td>A</td>
101
+ # <td>B</td>
102
+ # <td>C</td>
103
+ # </tr>
104
+ # <tr>
105
+ # <td>A.1</td>
106
+ # <td>B.1</td>
107
+ # <td>C.1</td>
108
+ # </tr>
109
+ # <tr>
110
+ # <td>A.2</td>
111
+ # <td>B.2</td>
112
+ # <td>C.2</td>
113
+ # </tr>
114
+ # </table>
115
+ #
116
+ # To extract the first (A) and last (C) column:
117
+ #
118
+ # extract_from_table(root_node, ["A", "C"])
119
+ #
120
+ # And you get this as result:
121
+ #
122
+ # [
123
+ # ["A.1", "C.1"],
124
+ # ["A.2", "C.2"]
125
+ # ]
126
+ #
127
+
128
+ def extract_from_table(root_node, headers)
129
+
130
+ # extract and collect all header nodes
131
+
132
+ header_nodes = headers.collect { |header| find_node(root_node, 'td', {}, header) }
133
+
134
+ raise "some headers not found" if header_nodes.compact.size < headers.size
135
+
136
+ # assert that all headers have the same parent 'header_row', which is the row
137
+ # in which the header_nodes are contained. 'table' is the surrounding table tag.
138
+
139
+ header_row = header_nodes.first.parent
140
+ table = header_row.parent
141
+
142
+ raise "different parents" unless header_nodes.all? {|n| n.parent == header_row}
143
+
144
+ # we now iterate over all rows in the table that follows the header_row.
145
+ # for each row we collect the elements at the same positions as the header_nodes.
146
+ # this is what we finally return from the method.
147
+
148
+ (header_row.index_in_parent+1 .. table.elements.size).collect do |inx|
149
+ row = table.elements[inx]
150
+ header_nodes.collect { |n| row.elements[ n.index_in_parent ].text }
151
+ end
152
+ end
153
+
154
+ # Given a HTML table, this method returns a matrix (2-dim array), with all the
155
+ # table-data elements correctly placed in it.
156
+ #
157
+ # If there's a table data element which uses 'colspan', that node is stored in
158
+ # at the current position of the row followed by (colspan-1) nil values.
159
+ #
160
+ # Example:
161
+ #
162
+ # <table>
163
+ # <tr>
164
+ # <td>A</td>
165
+ # <td>B</td>
166
+ # </tr>
167
+ # <tr>
168
+ # <td colspan="2">C</td>
169
+ # </tr>
170
+ # </table>
171
+ #
172
+ # Result:
173
+ #
174
+ # [
175
+ # [A, B],
176
+ # [C, nil]
177
+ # ]
178
+ #
179
+ # where A, B and C are the corresponding "<td>" nodes.
180
+ #
181
+
182
+ def table_to_matrix(table_node)
183
+ matrix = []
184
+
185
+ # for each row
186
+ table_node.elements.each('tr') {|r|
187
+ row = []
188
+ r.elements.each {|data|
189
+ next unless ['td', 'th'].include?(data.name)
190
+ row << data
191
+
192
+ # fill with empty elements
193
+ colspan = (data.attributes['colspan'] || 1).to_i
194
+ (colspan - 1).times { row << nil }
195
+ }
196
+ matrix << row
197
+ }
198
+
199
+ return matrix
200
+ end
data/mechanize.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+
3
+ if File.read('lib/mechanize.rb') =~ /Version\s+=\s+"(\d+\.\d+\.\d+)"/
4
+ version = $1
5
+ else
6
+ raise "no version"
7
+ end
8
+
9
+ spec = Gem::Specification.new do |s|
10
+ s.name = 'mechanize'
11
+ s.version = version
12
+ s.summary = 'Automated web-browsing.'
13
+ s.add_dependency('narf', '>= 0.6.3')
14
+
15
+ s.files = Dir['**/*'].delete_if {|item| item.include?(".svn") }
16
+
17
+ s.require_path = 'lib'
18
+
19
+ s.author = "Michael Neumann"
20
+ s.email = "mneumann@ntecs.de"
21
+ s.homepage = "rubyforge.org/projects/wee"
22
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.4
3
+ specification_version: 1
4
+ name: mechanize
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2005-01-26
8
+ summary: Automated web-browsing.
9
+ require_paths:
10
+ - lib
11
+ email: mneumann@ntecs.de
12
+ homepage: rubyforge.org/projects/wee
13
+ rubyforge_project:
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ -
22
+ - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: ruby
27
+ authors:
28
+ - Michael Neumann
29
+ files:
30
+ - examples
31
+ - lib
32
+ - mechanize.gemspec
33
+ - README
34
+ - examples/rubyforge.rb
35
+ - lib/mechanize
36
+ - lib/mechanize.rb
37
+ - lib/mechanize/net-overrides
38
+ - lib/mechanize/parsing.rb
39
+ - lib/mechanize/net-overrides/net
40
+ - lib/mechanize/net-overrides/net/protocol.rb
41
+ - lib/mechanize/net-overrides/net/http.rb
42
+ - lib/mechanize/net-overrides/net/https.rb
43
+ test_files: []
44
+ rdoc_options: []
45
+ extra_rdoc_files: []
46
+ executables: []
47
+ extensions: []
48
+ requirements: []
49
+ dependencies:
50
+ - !ruby/object:Gem::Dependency
51
+ name: narf
52
+ version_requirement:
53
+ version_requirements: !ruby/object:Gem::Version::Requirement
54
+ requirements:
55
+ -
56
+ - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: 0.6.3
59
+ version: