mechanize 0.5.4 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/CHANGELOG +12 -0
- data/GUIDE +125 -0
- data/NOTES +28 -0
- data/README +9 -5
- data/lib/mechanize.rb +14 -15
- data/lib/mechanize/cookie.rb +35 -55
- data/lib/mechanize/form.rb +39 -48
- data/lib/mechanize/form_elements.rb +7 -9
- data/lib/mechanize/hpricot.rb +12 -0
- data/lib/mechanize/inspect.rb +0 -6
- data/lib/mechanize/mech_version.rb +1 -3
- data/lib/mechanize/page.rb +70 -115
- data/lib/mechanize/page_elements.rb +10 -6
- data/test/htdocs/frame_test.html +1 -1
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/tc_checkboxes.rb +8 -8
- data/test/tc_cookie_jar.rb +36 -28
- data/test/tc_mech.rb +21 -1
- data/test/tc_no_attributes.rb +20 -0
- data/test/tc_page.rb +1 -1
- data/test/tc_pluggable_parser.rb +31 -17
- data/test/tc_pretty_print.rb +1 -1
- data/test/tc_radiobutton.rb +4 -4
- data/test/ts_mech.rb +1 -1
- metadata +126 -134
- data/lib/mechanize/module.rb +0 -27
- data/lib/mechanize/parsing.rb +0 -224
- data/test/parse.rb +0 -39
- data/test/tc_parsing.rb +0 -64
- data/test/test_mech.rb +0 -27
@@ -71,12 +71,12 @@ module WWW
|
|
71
71
|
super(name, value)
|
72
72
|
end
|
73
73
|
|
74
|
-
def
|
74
|
+
def check
|
75
75
|
uncheck_peers
|
76
76
|
@checked = true
|
77
77
|
end
|
78
78
|
|
79
|
-
def
|
79
|
+
def uncheck
|
80
80
|
@checked = false
|
81
81
|
end
|
82
82
|
|
@@ -88,7 +88,7 @@ module WWW
|
|
88
88
|
def uncheck_peers
|
89
89
|
@form.radiobuttons.name(name).each do |b|
|
90
90
|
next if b.value == value
|
91
|
-
b.
|
91
|
+
b.uncheck
|
92
92
|
end
|
93
93
|
end
|
94
94
|
end
|
@@ -117,12 +117,10 @@ module WWW
|
|
117
117
|
@options = WWW::Mechanize::List.new
|
118
118
|
|
119
119
|
# parse
|
120
|
-
node.
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
end
|
125
|
-
}
|
120
|
+
(node/'option').each do |n|
|
121
|
+
option = Option.new(n, self)
|
122
|
+
@options << option
|
123
|
+
end
|
126
124
|
super(name, value)
|
127
125
|
end
|
128
126
|
|
data/lib/mechanize/inspect.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'pp'
|
2
|
-
|
3
1
|
module WWW
|
4
2
|
# :stopdoc:
|
5
3
|
class Mechanize
|
@@ -11,7 +9,6 @@ module WWW
|
|
11
9
|
q.pp current_page
|
12
10
|
}
|
13
11
|
end
|
14
|
-
alias :inspect :pretty_print_inspect
|
15
12
|
|
16
13
|
class Page
|
17
14
|
def pretty_print(q)
|
@@ -43,7 +40,6 @@ module WWW
|
|
43
40
|
}
|
44
41
|
}
|
45
42
|
end
|
46
|
-
alias :inspect :pretty_print_inspect
|
47
43
|
end
|
48
44
|
|
49
45
|
class Link
|
@@ -53,7 +49,6 @@ module WWW
|
|
53
49
|
q.breakable; q.pp href
|
54
50
|
}
|
55
51
|
end
|
56
|
-
alias :inspect :pretty_print_inspect
|
57
52
|
end
|
58
53
|
|
59
54
|
class Form
|
@@ -82,7 +77,6 @@ module WWW
|
|
82
77
|
}
|
83
78
|
}
|
84
79
|
end
|
85
|
-
alias :inspect :pretty_print_inspect
|
86
80
|
end
|
87
81
|
|
88
82
|
class RadioButton
|
data/lib/mechanize/page.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'hpricot'
|
2
3
|
|
3
4
|
module WWW
|
4
5
|
class Mechanize
|
@@ -14,27 +15,18 @@ module WWW
|
|
14
15
|
# agent.get('http://google.com/').class #=> WWW::Mechanize::Page
|
15
16
|
#
|
16
17
|
class Page < File
|
17
|
-
|
18
|
-
|
18
|
+
attr_reader :root, :title, :watch_for_set
|
19
|
+
attr_reader :frames, :iframes, :links, :forms, :meta, :watches
|
19
20
|
|
20
|
-
# Alias our finders so that we can lazily parse the html
|
21
|
-
alias :find_frames :frames
|
22
|
-
alias :find_iframes :iframes
|
23
|
-
alias :find_links :links
|
24
|
-
alias :find_forms :forms
|
25
|
-
alias :find_meta :meta
|
26
|
-
alias :find_watches :watches
|
27
|
-
|
28
21
|
def initialize(uri=nil, response=nil, body=nil, code=nil)
|
29
22
|
super(uri, response, body, code)
|
30
|
-
@
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
@title = nil
|
23
|
+
@watch_for_set = {}
|
24
|
+
|
25
|
+
yield self if block_given?
|
26
|
+
|
27
|
+
raise Mechanize::ContentTypeError.new(response['content-type']) unless
|
28
|
+
content_type() =~ /^text\/html/
|
29
|
+
parse_html if body && response
|
38
30
|
end
|
39
31
|
|
40
32
|
# Get the response header
|
@@ -44,57 +36,23 @@ module WWW
|
|
44
36
|
|
45
37
|
# Get the content type
|
46
38
|
def content_type
|
47
|
-
@response['
|
48
|
-
end
|
49
|
-
|
50
|
-
# Get a list of Form associated with this page.
|
51
|
-
def forms(*args)
|
52
|
-
parse_html() unless @forms
|
53
|
-
find_forms(*args)
|
54
|
-
end
|
55
|
-
|
56
|
-
# Get a list of Link associated with this page.
|
57
|
-
def links(*args)
|
58
|
-
parse_html() unless @links
|
59
|
-
find_links(*args)
|
60
|
-
end
|
61
|
-
|
62
|
-
# Get the root XML parse tree for this page.
|
63
|
-
def root
|
64
|
-
parse_html() unless @root
|
65
|
-
@root
|
66
|
-
end
|
67
|
-
|
68
|
-
# This method watches out for a particular tag, and will call back to the
|
69
|
-
# class specified for the tag in the watch_for_set method. See the example
|
70
|
-
# in this class.
|
71
|
-
def watches(*args)
|
72
|
-
parse_html() unless @watches
|
73
|
-
find_watches(*args)
|
74
|
-
end
|
75
|
-
|
76
|
-
# Get a list of Meta links, usually used for refreshing the page.
|
77
|
-
def meta(*args)
|
78
|
-
parse_html() unless @meta
|
79
|
-
find_meta(*args)
|
39
|
+
@response['content-type']
|
80
40
|
end
|
81
41
|
|
82
|
-
#
|
83
|
-
def
|
84
|
-
|
85
|
-
find_frames(*args)
|
42
|
+
# Search through the page like HPricot
|
43
|
+
def search(*args)
|
44
|
+
@root.search(*args)
|
86
45
|
end
|
87
46
|
|
88
|
-
|
89
|
-
|
90
|
-
parse_html() unless @iframes
|
91
|
-
find_iframes(*args)
|
47
|
+
def at(*args)
|
48
|
+
@root.at(*args)
|
92
49
|
end
|
93
50
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
@
|
51
|
+
alias :/ :search
|
52
|
+
|
53
|
+
def watch_for_set=(obj)
|
54
|
+
@watch_for_set = obj
|
55
|
+
parse_html if @body
|
98
56
|
end
|
99
57
|
|
100
58
|
def form(name)
|
@@ -104,29 +62,8 @@ module WWW
|
|
104
62
|
private
|
105
63
|
|
106
64
|
def parse_html
|
107
|
-
raise Mechanize::ContentTypeError.new(content_type()) unless
|
108
|
-
content_type() =~ /^text\/html/
|
109
|
-
|
110
65
|
# construct parser and feed with HTML
|
111
|
-
|
112
|
-
begin
|
113
|
-
parser.feed(@body)
|
114
|
-
rescue => ex
|
115
|
-
if ex.message =~ /attempted adding second root element to document/ and
|
116
|
-
# Put the whole document inside a single root element, which I
|
117
|
-
# simply name <root>, just to make the parser happy. It's no
|
118
|
-
#longer valid HTML, but without a single root element, it's not
|
119
|
-
# valid HTML as well.
|
120
|
-
|
121
|
-
# TODO: leave a possible doctype definition outside this element.
|
122
|
-
parser = HTMLTree::XMLParser.new
|
123
|
-
parser.feed("<root>" + @body + "</root>")
|
124
|
-
else
|
125
|
-
raise
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
@root = parser.document
|
66
|
+
@root = Hpricot.parse(@body)
|
130
67
|
|
131
68
|
@forms = WWW::Mechanize::List.new
|
132
69
|
@links = WWW::Mechanize::List.new
|
@@ -135,39 +72,57 @@ module WWW
|
|
135
72
|
@iframes = WWW::Mechanize::List.new
|
136
73
|
@watches = {}
|
137
74
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
75
|
+
# Set the title
|
76
|
+
@title = if (@root/'title').text.length > 0
|
77
|
+
(@root/'title').text
|
78
|
+
end
|
79
|
+
|
80
|
+
# Find all the form tags
|
81
|
+
(@root/'form').each do |html_form|
|
82
|
+
form = Form.new(html_form)
|
83
|
+
form.action ||= @uri
|
84
|
+
@forms << form
|
85
|
+
end
|
86
|
+
|
87
|
+
# Find all the 'a' tags
|
88
|
+
(@root/'a').each do |node|
|
89
|
+
@links << Link.new(node)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Find all 'meta' tags
|
93
|
+
(@root/'meta').each do |node|
|
94
|
+
next if node.attributes.nil?
|
95
|
+
next unless node.attributes.has_key? 'http-equiv'
|
96
|
+
next unless node.attributes.has_key? 'content'
|
97
|
+
equiv = node.attributes['http-equiv']
|
98
|
+
content = node.attributes['content']
|
99
|
+
if equiv != nil && equiv.downcase == 'refresh'
|
100
|
+
if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
|
101
|
+
node.attributes['href'] = $1
|
102
|
+
@meta << Meta.new(node)
|
158
103
|
end
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Find all 'frame' tags
|
108
|
+
(@root/'frame').each do |node|
|
109
|
+
@frames << Frame.new(node)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Find all 'iframe' tags
|
113
|
+
(@root/'iframe').each do |node|
|
114
|
+
@iframes << Frame.new(node)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Find all watch tags
|
118
|
+
unless @watch_for_set.nil?
|
119
|
+
@watch_for_set.each do |key, klass|
|
120
|
+
(@root/key).each do |node|
|
121
|
+
@watches[key] ||= []
|
122
|
+
@watches[key] << (klass ? klass.new(node) : node)
|
168
123
|
end
|
169
124
|
end
|
170
|
-
|
125
|
+
end
|
171
126
|
end
|
172
127
|
end
|
173
128
|
end
|
@@ -12,22 +12,25 @@ module WWW
|
|
12
12
|
attr_reader :node
|
13
13
|
attr_reader :href
|
14
14
|
attr_reader :text
|
15
|
+
attr_reader :attributes
|
15
16
|
alias :to_s :text
|
16
17
|
|
17
18
|
def initialize(node)
|
19
|
+
node.attributes ||= {}
|
18
20
|
@node = node
|
19
21
|
@href = node.attributes['href']
|
20
22
|
@text = node.all_text
|
23
|
+
@attributes = node.attributes
|
21
24
|
|
22
25
|
# If there is no text, try to find an image and use it's alt text
|
23
|
-
if (@text.nil? || @text.length == 0) &&
|
26
|
+
if (@text.nil? || @text.length == 0) && (node/'img').length > 0
|
24
27
|
@text = ''
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
}
|
28
|
+
(node/'img').each do |e|
|
29
|
+
e.attributes ||= {}
|
30
|
+
@text << (e.attributes.has_key?('alt') ? e.attributes['alt'] : '')
|
31
|
+
end
|
30
32
|
end
|
33
|
+
|
31
34
|
end
|
32
35
|
|
33
36
|
def uri
|
@@ -51,6 +54,7 @@ module WWW
|
|
51
54
|
alias :name :text
|
52
55
|
|
53
56
|
def initialize(node)
|
57
|
+
node.attributes ||= {}
|
54
58
|
@node = node
|
55
59
|
@text = node.attributes['name']
|
56
60
|
@href = node.attributes['src']
|
data/test/htdocs/frame_test.html
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
<P>This frameset document contains:
|
22
22
|
<UL>
|
23
23
|
<LI><A href="/google.html">Some neat contents</A>
|
24
|
-
<LI><A href="/form_test.html">Form Test</A>
|
24
|
+
<LI><A href="/form_test.html" class="bar">Form Test</A>
|
25
25
|
<LI><A href="/file_upload.html">Some other neat contents</A>
|
26
26
|
</UL>
|
27
27
|
</NOFRAMES>
|
data/test/tc_checkboxes.rb
CHANGED
@@ -15,7 +15,7 @@ class TestCheckBoxes < Test::Unit::TestCase
|
|
15
15
|
|
16
16
|
def test_select_one
|
17
17
|
form = @page.forms.first
|
18
|
-
form.checkboxes.name('green').
|
18
|
+
form.checkboxes.name('green').check
|
19
19
|
assert_equal(true, form.checkboxes.name('green').checked)
|
20
20
|
assert_equal(false, form.checkboxes.name('red').checked)
|
21
21
|
assert_equal(false, form.checkboxes.name('blue').checked)
|
@@ -26,7 +26,7 @@ class TestCheckBoxes < Test::Unit::TestCase
|
|
26
26
|
def test_select_all
|
27
27
|
form = @page.forms.first
|
28
28
|
form.checkboxes.each do |b|
|
29
|
-
b.
|
29
|
+
b.check
|
30
30
|
end
|
31
31
|
form.checkboxes.each do |b|
|
32
32
|
assert_equal(true, b.checked)
|
@@ -36,17 +36,17 @@ class TestCheckBoxes < Test::Unit::TestCase
|
|
36
36
|
def test_select_none
|
37
37
|
form = @page.forms.first
|
38
38
|
form.checkboxes.each do |b|
|
39
|
-
b.
|
39
|
+
b.uncheck
|
40
40
|
end
|
41
41
|
form.checkboxes.each do |b|
|
42
42
|
assert_equal(false, b.checked)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
def
|
46
|
+
def test_check_one
|
47
47
|
form = @page.forms.first
|
48
48
|
assert_equal(2, form.checkboxes.name('green').length)
|
49
|
-
form.checkboxes.name('green')[1].
|
49
|
+
form.checkboxes.name('green')[1].check
|
50
50
|
assert_equal(false, form.checkboxes.name('green')[0].checked)
|
51
51
|
assert_equal(true, form.checkboxes.name('green')[1].checked)
|
52
52
|
page = @agent.submit(form)
|
@@ -54,11 +54,11 @@ class TestCheckBoxes < Test::Unit::TestCase
|
|
54
54
|
assert_equal('green:on', page.links.first.text)
|
55
55
|
end
|
56
56
|
|
57
|
-
def
|
57
|
+
def test_check_two
|
58
58
|
form = @page.forms.first
|
59
59
|
assert_equal(2, form.checkboxes.name('green').length)
|
60
|
-
form.checkboxes.name('green')[0].
|
61
|
-
form.checkboxes.name('green')[1].
|
60
|
+
form.checkboxes.name('green')[0].check
|
61
|
+
form.checkboxes.name('green')[1].check
|
62
62
|
assert_equal(true, form.checkboxes.name('green')[0].checked)
|
63
63
|
assert_equal(true, form.checkboxes.name('green')[1].checked)
|
64
64
|
page = @agent.submit(form)
|