mechanize 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

@@ -71,12 +71,12 @@ module WWW
71
71
  super(name, value)
72
72
  end
73
73
 
74
- def tick
74
+ def check
75
75
  uncheck_peers
76
76
  @checked = true
77
77
  end
78
78
 
79
- def untick
79
+ def uncheck
80
80
  @checked = false
81
81
  end
82
82
 
@@ -88,7 +88,7 @@ module WWW
88
88
  def uncheck_peers
89
89
  @form.radiobuttons.name(name).each do |b|
90
90
  next if b.value == value
91
- b.untick
91
+ b.uncheck
92
92
  end
93
93
  end
94
94
  end
@@ -117,12 +117,10 @@ module WWW
117
117
  @options = WWW::Mechanize::List.new
118
118
 
119
119
  # parse
120
- node.each_recursive {|n|
121
- if n.name.downcase == 'option'
122
- option = Option.new(n, self)
123
- @options << option
124
- end
125
- }
120
+ (node/'option').each do |n|
121
+ option = Option.new(n, self)
122
+ @options << option
123
+ end
126
124
  super(name, value)
127
125
  end
128
126
 
@@ -0,0 +1,12 @@
1
+ require 'hpricot'
2
+ class Hpricot::Elem
3
+ def all_text
4
+ text = ''
5
+ children.each do |child|
6
+ if child.respond_to? :content
7
+ text << child.content
8
+ end
9
+ end
10
+ text
11
+ end
12
+ end
@@ -1,5 +1,3 @@
1
- require 'pp'
2
-
3
1
  module WWW
4
2
  # :stopdoc:
5
3
  class Mechanize
@@ -11,7 +9,6 @@ module WWW
11
9
  q.pp current_page
12
10
  }
13
11
  end
14
- alias :inspect :pretty_print_inspect
15
12
 
16
13
  class Page
17
14
  def pretty_print(q)
@@ -43,7 +40,6 @@ module WWW
43
40
  }
44
41
  }
45
42
  end
46
- alias :inspect :pretty_print_inspect
47
43
  end
48
44
 
49
45
  class Link
@@ -53,7 +49,6 @@ module WWW
53
49
  q.breakable; q.pp href
54
50
  }
55
51
  end
56
- alias :inspect :pretty_print_inspect
57
52
  end
58
53
 
59
54
  class Form
@@ -82,7 +77,6 @@ module WWW
82
77
  }
83
78
  }
84
79
  end
85
- alias :inspect :pretty_print_inspect
86
80
  end
87
81
 
88
82
  class RadioButton
@@ -1,7 +1,5 @@
1
- # DO NOT EDIT
2
- # This file is auto-generated by build scripts
3
1
  module WWW
4
2
  class Mechanize
5
- Version = '0.5.4'
3
+ Version = '0.6.0'
6
4
  end
7
5
  end
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'hpricot'
2
3
 
3
4
  module WWW
4
5
  class Mechanize
@@ -14,27 +15,18 @@ module WWW
14
15
  # agent.get('http://google.com/').class #=> WWW::Mechanize::Page
15
16
  #
16
17
  class Page < File
17
- attr_accessor :watch_for_set
18
- attr_finder :frames, :iframes, :links, :forms, :meta, :watches
18
+ attr_reader :root, :title, :watch_for_set
19
+ attr_reader :frames, :iframes, :links, :forms, :meta, :watches
19
20
 
20
- # Alias our finders so that we can lazily parse the html
21
- alias :find_frames :frames
22
- alias :find_iframes :iframes
23
- alias :find_links :links
24
- alias :find_forms :forms
25
- alias :find_meta :meta
26
- alias :find_watches :watches
27
-
28
21
  def initialize(uri=nil, response=nil, body=nil, code=nil)
29
22
  super(uri, response, body, code)
30
- @frames = nil
31
- @iframes = nil
32
- @links = nil
33
- @forms = nil
34
- @meta = nil
35
- @watches = nil
36
- @root = nil
37
- @title = nil
23
+ @watch_for_set = {}
24
+
25
+ yield self if block_given?
26
+
27
+ raise Mechanize::ContentTypeError.new(response['content-type']) unless
28
+ content_type() =~ /^text\/html/
29
+ parse_html if body && response
38
30
  end
39
31
 
40
32
  # Get the response header
@@ -44,57 +36,23 @@ module WWW
44
36
 
45
37
  # Get the content type
46
38
  def content_type
47
- @response['Content-Type']
48
- end
49
-
50
- # Get a list of Form associated with this page.
51
- def forms(*args)
52
- parse_html() unless @forms
53
- find_forms(*args)
54
- end
55
-
56
- # Get a list of Link associated with this page.
57
- def links(*args)
58
- parse_html() unless @links
59
- find_links(*args)
60
- end
61
-
62
- # Get the root XML parse tree for this page.
63
- def root
64
- parse_html() unless @root
65
- @root
66
- end
67
-
68
- # This method watches out for a particular tag, and will call back to the
69
- # class specified for the tag in the watch_for_set method. See the example
70
- # in this class.
71
- def watches(*args)
72
- parse_html() unless @watches
73
- find_watches(*args)
74
- end
75
-
76
- # Get a list of Meta links, usually used for refreshing the page.
77
- def meta(*args)
78
- parse_html() unless @meta
79
- find_meta(*args)
39
+ @response['content-type']
80
40
  end
81
41
 
82
- # Get a list of Frame from the page
83
- def frames(*args)
84
- parse_html() unless @frames
85
- find_frames(*args)
42
+ # Search through the page like HPricot
43
+ def search(*args)
44
+ @root.search(*args)
86
45
  end
87
46
 
88
- # Get a list of IFrame from the page
89
- def iframes(*args)
90
- parse_html() unless @iframes
91
- find_iframes(*args)
47
+ def at(*args)
48
+ @root.at(*args)
92
49
  end
93
50
 
94
- # Fetch the title of the page
95
- def title
96
- parse_html() unless @title
97
- @title
51
+ alias :/ :search
52
+
53
+ def watch_for_set=(obj)
54
+ @watch_for_set = obj
55
+ parse_html if @body
98
56
  end
99
57
 
100
58
  def form(name)
@@ -104,29 +62,8 @@ module WWW
104
62
  private
105
63
 
106
64
  def parse_html
107
- raise Mechanize::ContentTypeError.new(content_type()) unless
108
- content_type() =~ /^text\/html/
109
-
110
65
  # construct parser and feed with HTML
111
- parser = HTMLTree::XMLParser.new
112
- begin
113
- parser.feed(@body)
114
- rescue => ex
115
- if ex.message =~ /attempted adding second root element to document/ and
116
- # Put the whole document inside a single root element, which I
117
- # simply name <root>, just to make the parser happy. It's no
118
- #longer valid HTML, but without a single root element, it's not
119
- # valid HTML as well.
120
-
121
- # TODO: leave a possible doctype definition outside this element.
122
- parser = HTMLTree::XMLParser.new
123
- parser.feed("<root>" + @body + "</root>")
124
- else
125
- raise
126
- end
127
- end
128
-
129
- @root = parser.document
66
+ @root = Hpricot.parse(@body)
130
67
 
131
68
  @forms = WWW::Mechanize::List.new
132
69
  @links = WWW::Mechanize::List.new
@@ -135,39 +72,57 @@ module WWW
135
72
  @iframes = WWW::Mechanize::List.new
136
73
  @watches = {}
137
74
 
138
- @root.each_recursive {|node|
139
- name = node.name.downcase
140
-
141
- case name
142
- when 'form'
143
- form = Form.new(node)
144
- form.action ||= @uri
145
- @forms << form
146
- when 'title'
147
- @title = node.text
148
- when 'a'
149
- @links << Link.new(node)
150
- when 'meta'
151
- equiv = node.attributes['http-equiv']
152
- content = node.attributes['content']
153
- if equiv != nil && equiv.downcase == 'refresh'
154
- if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
155
- node.attributes['href'] = $1
156
- @meta << Meta.new(node)
157
- end
75
+ # Set the title
76
+ @title = if (@root/'title').text.length > 0
77
+ (@root/'title').text
78
+ end
79
+
80
+ # Find all the form tags
81
+ (@root/'form').each do |html_form|
82
+ form = Form.new(html_form)
83
+ form.action ||= @uri
84
+ @forms << form
85
+ end
86
+
87
+ # Find all the 'a' tags
88
+ (@root/'a').each do |node|
89
+ @links << Link.new(node)
90
+ end
91
+
92
+ # Find all 'meta' tags
93
+ (@root/'meta').each do |node|
94
+ next if node.attributes.nil?
95
+ next unless node.attributes.has_key? 'http-equiv'
96
+ next unless node.attributes.has_key? 'content'
97
+ equiv = node.attributes['http-equiv']
98
+ content = node.attributes['content']
99
+ if equiv != nil && equiv.downcase == 'refresh'
100
+ if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
101
+ node.attributes['href'] = $1
102
+ @meta << Meta.new(node)
158
103
  end
159
- when 'frame'
160
- @frames << Frame.new(node)
161
- when 'iframe'
162
- @iframes << Frame.new(node)
163
- else
164
- if @watch_for_set and @watch_for_set.keys.include?( name )
165
- @watches[name] = [] unless @watches[name]
166
- klass = @watch_for_set[name]
167
- @watches[name] << (klass ? klass.new(node) : node)
104
+ end
105
+ end
106
+
107
+ # Find all 'frame' tags
108
+ (@root/'frame').each do |node|
109
+ @frames << Frame.new(node)
110
+ end
111
+
112
+ # Find all 'iframe' tags
113
+ (@root/'iframe').each do |node|
114
+ @iframes << Frame.new(node)
115
+ end
116
+
117
+ # Find all watch tags
118
+ unless @watch_for_set.nil?
119
+ @watch_for_set.each do |key, klass|
120
+ (@root/key).each do |node|
121
+ @watches[key] ||= []
122
+ @watches[key] << (klass ? klass.new(node) : node)
168
123
  end
169
124
  end
170
- }
125
+ end
171
126
  end
172
127
  end
173
128
  end
@@ -12,22 +12,25 @@ module WWW
12
12
  attr_reader :node
13
13
  attr_reader :href
14
14
  attr_reader :text
15
+ attr_reader :attributes
15
16
  alias :to_s :text
16
17
 
17
18
  def initialize(node)
19
+ node.attributes ||= {}
18
20
  @node = node
19
21
  @href = node.attributes['href']
20
22
  @text = node.all_text
23
+ @attributes = node.attributes
21
24
 
22
25
  # If there is no text, try to find an image and use it's alt text
23
- if (@text.nil? || @text.length == 0) && @node.has_elements?
26
+ if (@text.nil? || @text.length == 0) && (node/'img').length > 0
24
27
  @text = ''
25
- @node.each_element { |e|
26
- if e.name == 'img'
27
- @text << (e.has_attributes? ? e.attributes['alt'] || '' : '')
28
- end
29
- }
28
+ (node/'img').each do |e|
29
+ e.attributes ||= {}
30
+ @text << (e.attributes.has_key?('alt') ? e.attributes['alt'] : '')
31
+ end
30
32
  end
33
+
31
34
  end
32
35
 
33
36
  def uri
@@ -51,6 +54,7 @@ module WWW
51
54
  alias :name :text
52
55
 
53
56
  def initialize(node)
57
+ node.attributes ||= {}
54
58
  @node = node
55
59
  @text = node.attributes['name']
56
60
  @href = node.attributes['src']
@@ -21,7 +21,7 @@
21
21
  <P>This frameset document contains:
22
22
  <UL>
23
23
  <LI><A href="/google.html">Some neat contents</A>
24
- <LI><A href="/form_test.html">Form Test</A>
24
+ <LI><A href="/form_test.html" class="bar">Form Test</A>
25
25
  <LI><A href="/file_upload.html">Some other neat contents</A>
26
26
  </UL>
27
27
  </NOFRAMES>
@@ -0,0 +1,16 @@
1
+ <html>
2
+ <meta>
3
+ <head><title></title>
4
+ <body>
5
+ <a>Hello</a>
6
+ <a><img /></a>
7
+ <form>
8
+ <input />
9
+ <select>
10
+ <option />
11
+ </select>
12
+ <textarea></textarea>
13
+ </form>
14
+ <frame></frame>
15
+ </body>
16
+ </html>
@@ -15,7 +15,7 @@ class TestCheckBoxes < Test::Unit::TestCase
15
15
 
16
16
  def test_select_one
17
17
  form = @page.forms.first
18
- form.checkboxes.name('green').tick
18
+ form.checkboxes.name('green').check
19
19
  assert_equal(true, form.checkboxes.name('green').checked)
20
20
  assert_equal(false, form.checkboxes.name('red').checked)
21
21
  assert_equal(false, form.checkboxes.name('blue').checked)
@@ -26,7 +26,7 @@ class TestCheckBoxes < Test::Unit::TestCase
26
26
  def test_select_all
27
27
  form = @page.forms.first
28
28
  form.checkboxes.each do |b|
29
- b.tick
29
+ b.check
30
30
  end
31
31
  form.checkboxes.each do |b|
32
32
  assert_equal(true, b.checked)
@@ -36,17 +36,17 @@ class TestCheckBoxes < Test::Unit::TestCase
36
36
  def test_select_none
37
37
  form = @page.forms.first
38
38
  form.checkboxes.each do |b|
39
- b.untick
39
+ b.uncheck
40
40
  end
41
41
  form.checkboxes.each do |b|
42
42
  assert_equal(false, b.checked)
43
43
  end
44
44
  end
45
45
 
46
- def test_tick_one
46
+ def test_check_one
47
47
  form = @page.forms.first
48
48
  assert_equal(2, form.checkboxes.name('green').length)
49
- form.checkboxes.name('green')[1].tick
49
+ form.checkboxes.name('green')[1].check
50
50
  assert_equal(false, form.checkboxes.name('green')[0].checked)
51
51
  assert_equal(true, form.checkboxes.name('green')[1].checked)
52
52
  page = @agent.submit(form)
@@ -54,11 +54,11 @@ class TestCheckBoxes < Test::Unit::TestCase
54
54
  assert_equal('green:on', page.links.first.text)
55
55
  end
56
56
 
57
- def test_tick_two
57
+ def test_check_two
58
58
  form = @page.forms.first
59
59
  assert_equal(2, form.checkboxes.name('green').length)
60
- form.checkboxes.name('green')[0].tick
61
- form.checkboxes.name('green')[1].tick
60
+ form.checkboxes.name('green')[0].check
61
+ form.checkboxes.name('green')[1].check
62
62
  assert_equal(true, form.checkboxes.name('green')[0].checked)
63
63
  assert_equal(true, form.checkboxes.name('green')[1].checked)
64
64
  page = @agent.submit(form)