mechanize 0.4.7 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (48) hide show
  1. data/CHANGELOG +17 -0
  2. data/EXAMPLES +23 -44
  3. data/NOTES +49 -0
  4. data/lib/mechanize.rb +95 -80
  5. data/lib/mechanize/cookie.rb +147 -148
  6. data/lib/mechanize/cookie.rb.rej +16 -0
  7. data/lib/mechanize/errors.rb +29 -0
  8. data/lib/mechanize/form.rb +211 -186
  9. data/lib/mechanize/form_elements.rb +31 -71
  10. data/lib/mechanize/list.rb +34 -0
  11. data/lib/mechanize/mech_version.rb +3 -1
  12. data/lib/mechanize/module.rb +1 -1
  13. data/lib/mechanize/page.rb +162 -180
  14. data/lib/mechanize/page_elements.rb +53 -40
  15. data/lib/mechanize/parsing.rb +11 -3
  16. data/lib/mechanize/pluggable_parsers.rb +147 -0
  17. data/test/data/server.crt +14 -0
  18. data/test/data/server.csr +11 -0
  19. data/test/data/server.key +18 -0
  20. data/test/data/server.pem +15 -0
  21. data/test/htdocs/no_title_test.html +6 -0
  22. data/test/parse.rb +39 -0
  23. data/test/proxy.rb +30 -0
  24. data/test/server.rb +2 -0
  25. data/test/servlets.rb +8 -0
  26. data/test/ssl_server.rb +49 -0
  27. data/test/tc_authenticate.rb +8 -6
  28. data/test/tc_cookie_class.rb +28 -18
  29. data/test/tc_cookie_jar.rb +88 -27
  30. data/test/tc_cookies.rb +41 -44
  31. data/test/tc_errors.rb +9 -23
  32. data/test/tc_forms.rb +36 -32
  33. data/test/tc_frames.rb +6 -4
  34. data/test/tc_links.rb +7 -6
  35. data/test/tc_mech.rb +43 -46
  36. data/test/tc_page.rb +24 -0
  37. data/test/tc_pluggable_parser.rb +103 -0
  38. data/test/tc_post_form.rb +41 -0
  39. data/test/tc_proxy.rb +25 -0
  40. data/test/tc_response_code.rb +13 -10
  41. data/test/tc_save_file.rb +25 -0
  42. data/test/tc_ssl_server.rb +27 -0
  43. data/test/tc_upload.rb +8 -6
  44. data/test/tc_watches.rb +5 -2
  45. data/test/test_includes.rb +3 -3
  46. data/test/ts_mech.rb +11 -2
  47. metadata +100 -86
  48. data/test/tc_filter.rb +0 -34
@@ -1,4 +1,5 @@
1
1
  module WWW
2
+ class Mechanize
2
3
  # This class represents a field in a form. It handles the following input
3
4
  # tags found in a form:
4
5
  # text, password, hidden, int, textarea
@@ -12,22 +13,8 @@ module WWW
12
13
  @name, @value = name, value
13
14
  end
14
15
 
15
- # Returns an array of Field objects
16
- # TODO: is this correct?
17
- def self.extract_all_from(root_node)
18
- fields = []
19
- root_node.each_recursive {|node|
20
- if (node.name.downcase == 'input' and
21
- %w(text password hidden checkbox radio int).include?(node.attributes['type'].downcase)) or
22
- %w(textarea option).include?(node.name.downcase)
23
- fields << Field.new(node.attributes['name'], node.attributes['value'])
24
- end
25
- }
26
- return fields
27
- end
28
-
29
16
  def inspect
30
- "#{name} = #{@value}\n"
17
+ "#{name} = #{@value}"
31
18
  end
32
19
  end
33
20
 
@@ -36,45 +23,26 @@ module WWW
36
23
  # to upload and WWW::FileUpload#mime_type= to the appropriate mime type
37
24
  # of the file.
38
25
  # See the example in EXAMPLES[link://files/EXAMPLES.html]
39
- class FileUpload
40
- # value is the file-name, not the file-content
41
- attr_accessor :name
26
+ class FileUpload < Field
27
+ attr_accessor :name # Field name
28
+ attr_accessor :file_name # File name
29
+ attr_accessor :mime_type # Mime Type (Optional)
42
30
 
43
- attr_accessor :file_name, :file_data, :mime_type
31
+ alias :file_data :value
32
+ alias :file_data= :value=
44
33
 
45
34
  def initialize(name, file_name)
46
- @name, @file_name = name, file_name
35
+ @file_name = file_name
47
36
  @file_data = nil
37
+ super(name, @file_data)
48
38
  end
49
39
  end
50
40
 
51
41
  # This class represents a Submit button in a form.
52
- class Button
53
- attr_accessor :name, :value
54
-
55
- def initialize(name, value)
56
- @name, @value = name, value
57
- end
58
-
42
+ class Button < Field
59
43
  def add_to_query(query)
60
44
  query << [@name, @value || ''] if @name
61
45
  end
62
-
63
- # Returns an array of Button objects
64
- def self.extract_all_from(root_node)
65
- buttons = []
66
- root_node.each_recursive {|node|
67
- if node.name.downcase == 'input' and
68
- ['submit'].include?(node.attributes['type'].downcase)
69
- buttons << Button.new(node.attributes['name'], node.attributes['value'])
70
- end
71
- }
72
- return buttons
73
- end
74
-
75
- def inspect
76
- "#{name} = #{@value}\n"
77
- end
78
46
  end
79
47
 
80
48
  # This class represents an image button in a form. Use the x and y methods
@@ -82,6 +50,12 @@ module WWW
82
50
  class ImageButton < Button
83
51
  attr_accessor :x, :y
84
52
 
53
+ def initialize(name, value)
54
+ @x = nil
55
+ @y = nil
56
+ super(name, value)
57
+ end
58
+
85
59
  def add_to_query(query)
86
60
  if @name
87
61
  query << [@name, @value || '']
@@ -93,30 +67,18 @@ module WWW
93
67
 
94
68
  # This class represents a radio button found in a Form. To activate the
95
69
  # RadioButton in the Form, set the checked method to true.
96
- class RadioButton
97
- attr_accessor :name, :value, :checked
70
+ class RadioButton < Field
71
+ attr_accessor :checked
98
72
 
99
73
  def initialize(name, value, checked)
100
- @name, @value, @checked = name, value, checked
101
- end
102
-
103
- def inspect
104
- "#{name} = #{@value}\n"
74
+ @checked = checked
75
+ super(name, value)
105
76
  end
106
77
  end
107
78
 
108
79
  # This class represents a check box found in a Form. To activate the
109
80
  # CheckBox in the Form, set the checked method to true.
110
- class CheckBox
111
- attr_accessor :name, :value, :checked
112
-
113
- def initialize(name, value, checked)
114
- @name, @value, @checked = name, value, checked
115
- end
116
-
117
- def inspect
118
- "#{name} = #{@value}\n"
119
- end
81
+ class CheckBox < RadioButton
120
82
  end
121
83
 
122
84
  # This class represents a select list or drop down box in a Form. Set the
@@ -124,13 +86,11 @@ module WWW
124
86
  # list of Option that were found. After finding the correct option, set
125
87
  # the select lists value to the option value:
126
88
  # selectlist.value = selectlist.options.first.value
127
- class SelectList
128
- attr_accessor :name, :options
129
- attr_reader :value
89
+ class SelectList < Field
90
+ attr_accessor :options
130
91
 
131
92
  def initialize(name, node)
132
- @name = name
133
- @value = nil
93
+ value = nil
134
94
  @options = WWW::Mechanize::List.new
135
95
 
136
96
  # parse
@@ -138,19 +98,18 @@ module WWW
138
98
  if n.name.downcase == 'option'
139
99
  option = Option.new(n)
140
100
  @options << option
141
- @value = option.value if option.selected
101
+ value = option.value if option.selected
142
102
  end
143
103
  }
144
- @value = @options.first.value if (@value == nil && @options.first)
104
+ value = @options.first.value if (value == nil && @options.first)
105
+ super(name, value)
145
106
  end
146
107
 
108
+ alias :old_value= :value=
109
+
147
110
  def value=(value)
148
111
  @value = value.to_s
149
112
  end
150
-
151
- def inspect
152
- "#{name} = #{@value}\n"
153
- end
154
113
  end
155
114
 
156
115
  # This class contains option an option found within SelectList. A
@@ -166,4 +125,5 @@ module WWW
166
125
  @selected = node.attributes['selected'] ? true : false
167
126
  end
168
127
  end
128
+ end
169
129
  end
@@ -1,10 +1,44 @@
1
1
  module WWW
2
2
  class Mechanize
3
+ # = Synopsis
4
+ # This class provides syntax sugar to help find things within Mechanize.
5
+ # Most calls in Mechanize that return arrays, like the 'links' method
6
+ # WWW::Mechanize::Page return a Mechanize::List. This class lets you
7
+ # find things with a particular attribute on the found class.
8
+ #
9
+ # If you have an array with objects that response to the method "name",
10
+ # and you want to find all objects where name equals 'foo', your code
11
+ # would look like this:
12
+ #
13
+ # list.name('foo') # => Mechanize::List
14
+ #
15
+ # == A bit more information
16
+ # Mechanize::List will iterate through all of the objects it contains,
17
+ # testing to see if the object will respond to the "name" method. If it
18
+ # does, it will test to see if calling the name method returns a value
19
+ # equal to the value passed in.
20
+ #
21
+ # Finding the list will return another list, so it is possible to chain
22
+ # calls with Mechanize::List. For example:
23
+ #
24
+ # list.name('foo').href('bar.html')
25
+ #
26
+ # This code will find all elements with name 'foo' and href 'bar.html'.
3
27
  class List < Array
28
+ # This method provides syntax sugar so that you can write expressions
29
+ # like this:
30
+ # form.fields.with.name('foo').and.href('bar.html')
31
+ #
4
32
  def with
5
33
  self
6
34
  end
7
35
 
36
+ # This method will allow the you to set the value of the first element
37
+ # in the list. For example, finding an input field with name 'foo'
38
+ # and setting the value to 'bar'.
39
+ #
40
+ # form.fields.name('foo').value = 'bar'
41
+ #
8
42
  def value=(arg)
9
43
  first().value=(arg)
10
44
  end
@@ -1,5 +1,7 @@
1
1
  # DO NOT EDIT
2
2
  # This file is auto-generated by build scripts
3
3
  module WWW
4
- MechVersion = '0.4.7'
4
+ class Mechanize
5
+ Version = '0.5.0'
6
+ end
5
7
  end
@@ -1,4 +1,4 @@
1
- class Module
1
+ class Module # :nodoc:
2
2
  def attr_finder(*syms)
3
3
  syms.each do |sym|
4
4
  class_eval %{ def #{sym.to_s}(hash = nil)
@@ -1,192 +1,174 @@
1
- module WWW
2
- # = Synopsis
3
- # This class encapsulates a page.
4
- #
5
- # == Example
6
- # require 'rubygems'
7
- # require 'mechanize'
8
- # require 'logger'
9
- #
10
- # class Body
11
- # def initialize(node)
12
- # puts node.attributes['bgcolor']
13
- # end
14
- # end
15
- #
16
- # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
17
- # agent.user_agent_alias = 'Mac Safari'
18
- # page = agent.get("http://www.google.com/")
19
- # page.watch_for_set = { 'body' => Body }
20
- #
21
- # body = page.watches
22
- class Page
23
- attr_accessor :uri, :cookies, :response, :body, :code, :watch_for_set
24
- attr_finder :frames, :iframes, :links, :forms, :meta, :watches
25
- attr_reader :body_filter
1
+ require 'fileutils'
26
2
 
27
- alias :content :body
3
+ module WWW
4
+ class Mechanize
5
+ # = Synopsis
6
+ # This class encapsulates an HTML page. If Mechanize finds a content
7
+ # type of 'text/html', this class will be instantiated and returned.
8
+ #
9
+ # == Example
10
+ # require 'rubygems'
11
+ # require 'mechanize'
12
+ #
13
+ # agent = WWW::Mechanize.new
14
+ # agent.get('http://google.com/').class #=> WWW::Mechanize::Page
15
+ #
16
+ class Page < File
17
+ attr_accessor :watch_for_set
18
+ attr_finder :frames, :iframes, :links, :forms, :meta, :watches
28
19
 
29
- # Alias our finders so that we can lazily parse the html
30
- alias :find_frames :frames
31
- alias :find_iframes :iframes
32
- alias :find_links :links
33
- alias :find_forms :forms
34
- alias :find_meta :meta
35
- alias :find_watches :watches
36
-
37
- def initialize(uri=nil, cookies=[], response=nil, body=nil, code=nil)
38
- @uri, @cookies, @response, @body, @code = uri, cookies, response, body, code
39
- @frames = nil
40
- @iframes = nil
41
- @links = nil
42
- @forms = nil
43
- @meta = nil
44
- @watches = nil
45
- @root = nil
46
- @body_filter = lambda { |body| body }
47
- end
48
-
49
- # Set the body filter for the page. The body should be a Proc object that
50
- # returns what the body should be set to. For example, replace all
51
- # occurrences of 'foo' with 'bar':
52
- # page.body_filter = lambda { |body| body.gsub(/foo/, bar) }
53
- def body_filter=(filter)
54
- @body_filter = filter
55
- parse_html()
56
- end
20
+ # Alias our finders so that we can lazily parse the html
21
+ alias :find_frames :frames
22
+ alias :find_iframes :iframes
23
+ alias :find_links :links
24
+ alias :find_forms :forms
25
+ alias :find_meta :meta
26
+ alias :find_watches :watches
27
+
28
+ def initialize(uri=nil, response=nil, body=nil, code=nil)
29
+ super(uri, response, body, code)
30
+ @frames = nil
31
+ @iframes = nil
32
+ @links = nil
33
+ @forms = nil
34
+ @meta = nil
35
+ @watches = nil
36
+ @root = nil
37
+ @title = nil
38
+ end
39
+
40
+ # Get the response header
41
+ def header
42
+ @response
43
+ end
44
+
45
+ # Get the content type
46
+ def content_type
47
+ @response['Content-Type']
48
+ end
49
+
50
+ # Get a list of Form associated with this page.
51
+ def forms(*args)
52
+ parse_html() unless @forms
53
+ find_forms(*args)
54
+ end
55
+
56
+ # Get a list of Link associated with this page.
57
+ def links(*args)
58
+ parse_html() unless @links
59
+ find_links(*args)
60
+ end
61
+
62
+ # Get the root XML parse tree for this page.
63
+ def root
64
+ parse_html() unless @root
65
+ @root
66
+ end
67
+
68
+ # This method watches out for a particular tag, and will call back to the
69
+ # class specified for the tag in the watch_for_set method. See the example
70
+ # in this class.
71
+ def watches(*args)
72
+ parse_html() unless @watches
73
+ find_watches(*args)
74
+ end
75
+
76
+ # Get a list of Meta links, usually used for refreshing the page.
77
+ def meta(*args)
78
+ parse_html() unless @meta
79
+ find_meta(*args)
80
+ end
57
81
 
58
- # Get the response header
59
- def header
60
- @response.header
61
- end
62
-
63
- # Get the content type
64
- def content_type
65
- @response['Content-Type']
66
- end
67
-
68
- # Get a list of Form associated with this page.
69
- def forms(*args)
70
- parse_html() unless @forms
71
- find_forms(*args)
72
- end
73
-
74
- # Get a list of Link associated with this page.
75
- def links(*args)
76
- parse_html() unless @links
77
- find_links(*args)
78
- end
79
-
80
- # Get the root XML parse tree for this page.
81
- def root
82
- parse_html() unless @root
83
- @root
84
- end
85
-
86
- # This method watches out for a particular tag, and will call back to the
87
- # class specified for the tag in the watch_for_set method. See the example
88
- # in this class.
89
- def watches(*args)
90
- parse_html() unless @watches
91
- find_watches(*args)
92
- end
93
-
94
- # Get a list of Meta links, usually used for refreshing the page.
95
- def meta(*args)
96
- parse_html() unless @meta
97
- find_meta(*args)
98
- end
82
+ # Get a list of Frame from the page
83
+ def frames(*args)
84
+ parse_html() unless @frames
85
+ find_frames(*args)
86
+ end
99
87
 
100
- # Get a list of Frame from the page
101
- def frames(*args)
102
- parse_html() unless @frames
103
- find_frames(*args)
104
- end
88
+ # Get a list of IFrame from the page
89
+ def iframes(*args)
90
+ parse_html() unless @iframes
91
+ find_iframes(*args)
92
+ end
105
93
 
106
- # Get a list of IFrame from the page
107
- def iframes(*args)
108
- parse_html() unless @iframes
109
- find_iframes(*args)
110
- end
111
-
112
- def inspect
113
- string = "[meta]\n"
114
- meta.each { |l| string << l.inspect }
115
- string << "[frames]\n"
116
- frames.each { |l| string << l.inspect }
117
- string << "[iframes]\n"
118
- iframes.each { |l| string << l.inspect }
119
- string << "[links]\n"
120
- links.each { |l| string << l.inspect }
121
- string << "[forms]\n"
122
- forms.each { |l| string << l.inspect }
123
- string
124
- end
94
+ # Fetch the title of the page
95
+ def title
96
+ parse_html() unless @title
97
+ @title
98
+ end
99
+
100
+ def inspect
101
+ "Page: [#{title} '#{uri.to_s}']"
102
+ end
125
103
 
126
- private
127
-
128
- def parse_html
129
- raise Mechanize::ContentTypeError.new(content_type()) unless
130
- content_type() =~ /^text\/html/
131
-
132
- # construct parser and feed with HTML
133
- parser = HTMLTree::XMLParser.new
134
- begin
135
- parser.feed(body_filter.call(@body))
136
- rescue => ex
137
- if ex.message =~ /attempted adding second root element to document/ and
138
- # Put the whole document inside a single root element, which I simply name
139
- # <root>, just to make the parser happy. It's no longer valid HTML, but
140
- # without a single root element, it's not valid HTML as well.
141
-
142
- # TODO: leave a possible doctype definition outside this element.
143
- parser = HTMLTree::XMLParser.new
144
- parser.feed("<root>" + @body + "</root>")
145
- else
146
- raise
104
+ private
105
+
106
+ def parse_html
107
+ raise Mechanize::ContentTypeError.new(content_type()) unless
108
+ content_type() =~ /^text\/html/
109
+
110
+ # construct parser and feed with HTML
111
+ parser = HTMLTree::XMLParser.new
112
+ begin
113
+ parser.feed(@body)
114
+ rescue => ex
115
+ if ex.message =~ /attempted adding second root element to document/ and
116
+ # Put the whole document inside a single root element, which I
117
+ # simply name <root>, just to make the parser happy. It's no
118
+ #longer valid HTML, but without a single root element, it's not
119
+ # valid HTML as well.
120
+
121
+ # TODO: leave a possible doctype definition outside this element.
122
+ parser = HTMLTree::XMLParser.new
123
+ parser.feed("<root>" + @body + "</root>")
124
+ else
125
+ raise
126
+ end
147
127
  end
148
- end
149
-
150
- @root = parser.document
151
-
152
- @forms = WWW::Mechanize::List.new
153
- @links = WWW::Mechanize::List.new
154
- @meta = WWW::Mechanize::List.new
155
- @frames = WWW::Mechanize::List.new
156
- @iframes = WWW::Mechanize::List.new
157
- @watches = {}
158
-
159
- @root.each_recursive {|node|
160
- name = node.name.downcase
161
-
162
- case name
163
- when 'form'
164
- form = Form.new(node)
165
- form.action ||= @uri
166
- @forms << form
167
- when 'a'
168
- @links << Link.new(node)
169
- when 'meta'
170
- equiv = node.attributes['http-equiv']
171
- content = node.attributes['content']
172
- if equiv != nil && equiv.downcase == 'refresh'
173
- if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
174
- node.attributes['href'] = $1
175
- @meta << Meta.new(node)
128
+
129
+ @root = parser.document
130
+
131
+ @forms = WWW::Mechanize::List.new
132
+ @links = WWW::Mechanize::List.new
133
+ @meta = WWW::Mechanize::List.new
134
+ @frames = WWW::Mechanize::List.new
135
+ @iframes = WWW::Mechanize::List.new
136
+ @watches = {}
137
+
138
+ @root.each_recursive {|node|
139
+ name = node.name.downcase
140
+
141
+ case name
142
+ when 'form'
143
+ form = Form.new(node)
144
+ form.action ||= @uri
145
+ @forms << form
146
+ when 'title'
147
+ @title = node.text
148
+ when 'a'
149
+ @links << Link.new(node)
150
+ when 'meta'
151
+ equiv = node.attributes['http-equiv']
152
+ content = node.attributes['content']
153
+ if equiv != nil && equiv.downcase == 'refresh'
154
+ if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
155
+ node.attributes['href'] = $1
156
+ @meta << Meta.new(node)
157
+ end
158
+ end
159
+ when 'frame'
160
+ @frames << Frame.new(node)
161
+ when 'iframe'
162
+ @iframes << Frame.new(node)
163
+ else
164
+ if @watch_for_set and @watch_for_set.keys.include?( name )
165
+ @watches[name] = [] unless @watches[name]
166
+ klass = @watch_for_set[name]
167
+ @watches[name] << (klass ? klass.new(node) : node)
176
168
  end
177
169
  end
178
- when 'frame'
179
- @frames << Frame.new(node)
180
- when 'iframe'
181
- @iframes << Frame.new(node)
182
- else
183
- if @watch_for_set and @watch_for_set.keys.include?( name )
184
- @watches[name] = [] unless @watches[name]
185
- klass = @watch_for_set[name]
186
- @watches[name] << (klass ? klass.new(node) : node)
187
- end
188
- end
189
- }
170
+ }
171
+ end
190
172
  end
191
173
  end
192
174
  end