mechanize 0.6.11 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (91) hide show
  1. data/CHANGELOG.txt +8 -0
  2. data/Manifest.txt +31 -22
  3. data/lib/mechanize.rb +2 -652
  4. data/lib/www/mechanize.rb +635 -0
  5. data/lib/www/mechanize/content_type_error.rb +16 -0
  6. data/lib/www/mechanize/cookie.rb +64 -0
  7. data/lib/{mechanize/cookie.rb → www/mechanize/cookie_jar.rb} +0 -60
  8. data/lib/www/mechanize/file.rb +73 -0
  9. data/lib/www/mechanize/file_saver.rb +39 -0
  10. data/lib/{mechanize → www/mechanize}/form.rb +119 -137
  11. data/lib/www/mechanize/form/button.rb +8 -0
  12. data/lib/www/mechanize/form/check_box.rb +13 -0
  13. data/lib/www/mechanize/form/field.rb +28 -0
  14. data/lib/www/mechanize/form/file_upload.rb +24 -0
  15. data/lib/www/mechanize/form/image_button.rb +23 -0
  16. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  17. data/lib/www/mechanize/form/option.rb +51 -0
  18. data/lib/www/mechanize/form/radio_button.rb +38 -0
  19. data/lib/www/mechanize/form/select_list.rb +41 -0
  20. data/lib/www/mechanize/headers.rb +12 -0
  21. data/lib/{mechanize → www/mechanize}/history.rb +0 -0
  22. data/lib/{mechanize → www/mechanize}/inspect.rb +21 -28
  23. data/lib/{mechanize → www/mechanize}/list.rb +0 -0
  24. data/lib/{mechanize → www/mechanize}/monkey_patch.rb +19 -0
  25. data/lib/www/mechanize/page.rb +121 -0
  26. data/lib/www/mechanize/page/base.rb +10 -0
  27. data/lib/www/mechanize/page/frame.rb +22 -0
  28. data/lib/www/mechanize/page/link.rb +50 -0
  29. data/lib/www/mechanize/page/meta.rb +10 -0
  30. data/lib/www/mechanize/pluggable_parsers.rb +93 -0
  31. data/lib/{mechanize/errors.rb → www/mechanize/response_code_error.rb} +1 -13
  32. data/test/{test_includes.rb → helper.rb} +4 -18
  33. data/test/{test_servlets.rb → servlets.rb} +0 -0
  34. data/test/tc_authenticate.rb +1 -8
  35. data/test/tc_bad_links.rb +3 -10
  36. data/test/tc_blank_form.rb +1 -8
  37. data/test/tc_checkboxes.rb +1 -8
  38. data/test/tc_cookie_class.rb +1 -6
  39. data/test/tc_cookie_jar.rb +1 -7
  40. data/test/tc_cookies.rb +10 -17
  41. data/test/tc_encoded_links.rb +5 -12
  42. data/test/tc_errors.rb +4 -11
  43. data/test/tc_follow_meta.rb +1 -8
  44. data/test/tc_form_action.rb +6 -14
  45. data/test/tc_form_as_hash.rb +1 -9
  46. data/test/tc_form_button.rb +5 -8
  47. data/test/tc_form_no_inputname.rb +1 -8
  48. data/test/tc_forms.rb +16 -24
  49. data/test/tc_frames.rb +3 -10
  50. data/test/tc_gzipping.rb +2 -9
  51. data/test/tc_history.rb +5 -12
  52. data/test/tc_html_unscape_forms.rb +8 -15
  53. data/test/tc_if_modified_since.rb +1 -6
  54. data/test/tc_keep_alive.rb +1 -8
  55. data/test/tc_links.rb +12 -19
  56. data/test/tc_mech.rb +26 -34
  57. data/test/{test_mechanize_file.rb → tc_mechanize_file.rb} +1 -6
  58. data/test/tc_multi_select.rb +10 -17
  59. data/test/tc_no_attributes.rb +1 -8
  60. data/test/tc_page.rb +3 -10
  61. data/test/tc_pluggable_parser.rb +8 -15
  62. data/test/tc_post_form.rb +3 -10
  63. data/test/tc_pretty_print.rb +3 -10
  64. data/test/tc_radiobutton.rb +2 -9
  65. data/test/tc_referer.rb +13 -20
  66. data/test/tc_relative_links.rb +1 -8
  67. data/test/tc_response_code.rb +14 -21
  68. data/test/tc_save_file.rb +1 -9
  69. data/test/tc_select.rb +3 -10
  70. data/test/tc_select_all.rb +2 -10
  71. data/test/tc_select_none.rb +2 -10
  72. data/test/tc_select_noopts.rb +2 -9
  73. data/test/tc_set_fields.rb +2 -9
  74. data/test/tc_ssl_server.rb +5 -12
  75. data/test/tc_subclass.rb +2 -9
  76. data/test/tc_textarea.rb +2 -9
  77. data/test/tc_upload.rb +2 -9
  78. data/test/test_all.rb +4 -43
  79. metadata +96 -80
  80. data/lib/mechanize/form_elements.rb +0 -254
  81. data/lib/mechanize/net-overrides/net/http.rb +0 -2107
  82. data/lib/mechanize/net-overrides/net/https.rb +0 -172
  83. data/lib/mechanize/net-overrides/net/protocol.rb +0 -380
  84. data/lib/mechanize/page.rb +0 -138
  85. data/lib/mechanize/page_elements.rb +0 -77
  86. data/lib/mechanize/parsers/rexml_page.rb +0 -35
  87. data/lib/mechanize/pluggable_parsers.rb +0 -204
  88. data/lib/mechanize/rexml.rb +0 -236
  89. data/setup.rb +0 -1585
  90. data/test/tc_proxy.rb +0 -25
  91. data/test/tc_watches.rb +0 -32
@@ -1,138 +0,0 @@
1
- require 'fileutils'
2
- require 'hpricot'
3
- require 'forwardable'
4
-
5
- module WWW
6
- class Mechanize
7
- # = Synopsis
8
- # This class encapsulates an HTML page. If Mechanize finds a content
9
- # type of 'text/html', this class will be instantiated and returned.
10
- #
11
- # == Example
12
- # require 'rubygems'
13
- # require 'mechanize'
14
- #
15
- # agent = WWW::Mechanize.new
16
- # agent.get('http://google.com/').class #=> WWW::Mechanize::Page
17
- #
18
- class Page < File
19
- extend Forwardable
20
-
21
- attr_reader :parser, :title, :watch_for_set
22
- attr_reader :frames, :iframes, :links, :forms, :meta, :watches, :bases
23
- attr_accessor :mech
24
-
25
- alias :root :parser
26
-
27
- def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
28
- super(uri, response, body, code)
29
- @watch_for_set ||= {}
30
- @mech ||= mech
31
-
32
- raise Mechanize::ContentTypeError.new(response['content-type']) unless
33
- content_type() =~ /^text\/html/
34
-
35
- # construct parser and feed with HTML
36
- if body && response
37
- @parser ||= Hpricot.parse(body)
38
- parse_html
39
- end
40
- end
41
-
42
- # Get the content type
43
- def content_type
44
- @response['content-type']
45
- end
46
-
47
- # Search through the page like HPricot
48
- def_delegator :@parser, :search, :search
49
- def_delegator :@parser, :/, :/
50
- def_delegator :@parser, :at, :at
51
-
52
- def watch_for_set=(obj)
53
- @watch_for_set = obj
54
- parse_html if @body && @watch_for_set
55
- end
56
-
57
- # Find a form with +name+. Form will be yeilded if a block is given.
58
- def form(name)
59
- f = forms.name(name).first
60
- yield f if block_given?
61
- f
62
- end
63
-
64
- private
65
-
66
- def parse_html
67
- @forms = WWW::Mechanize::List.new
68
- @links = WWW::Mechanize::List.new
69
- @meta = WWW::Mechanize::List.new
70
- @frames = WWW::Mechanize::List.new
71
- @iframes = WWW::Mechanize::List.new
72
- @bases = WWW::Mechanize::List.new
73
- @watches = {}
74
-
75
- # Set the title
76
- @title = if (@parser/'title').text.length > 0
77
- (@parser/'title').text
78
- end
79
-
80
- # Find all 'base' tags
81
- (@parser/'base').each do |node|
82
- @bases << Base.new(node, @mech, self)
83
- end
84
-
85
- # Find all the form tags
86
- (@parser/'form').each do |html_form|
87
- form = Form.new(html_form, @mech, self)
88
- form.action ||= @uri
89
- @forms << form
90
- end
91
-
92
- # Find all the 'a' tags
93
- (@parser/'a').each do |node|
94
- @links << Link.new(node, @mech, self)
95
- end
96
-
97
- # Find all the 'area' tags
98
- (@parser/'area').each do |node|
99
- @links << Link.new(node, @mech, self)
100
- end
101
-
102
- # Find all 'meta' tags
103
- (@parser/'meta').each do |node|
104
- next unless node['http-equiv']
105
- next unless node['content']
106
- equiv = node['http-equiv']
107
- content = node['content']
108
- if equiv != nil && equiv.downcase == 'refresh'
109
- if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
110
- node['href'] = $1
111
- @meta << Meta.new(node, @mech, self)
112
- end
113
- end
114
- end
115
-
116
- # Find all 'frame' tags
117
- (@parser/'frame').each do |node|
118
- @frames << Frame.new(node, @mech, self)
119
- end
120
-
121
- # Find all 'iframe' tags
122
- (@parser/'iframe').each do |node|
123
- @iframes << Frame.new(node, @mech, self)
124
- end
125
-
126
- # Find all watch tags
127
- unless @watch_for_set.nil?
128
- @watch_for_set.each do |key, klass|
129
- (@parser/key).each do |node|
130
- @watches[key] ||= []
131
- @watches[key] << (klass ? klass.new(node) : node)
132
- end
133
- end
134
- end
135
- end
136
- end
137
- end
138
- end
@@ -1,77 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- # This class encapsulates links. It contains the text and the URI for
4
- # 'a' tags parsed out of an HTML page. If the link contains an image,
5
- # the alt text will be used for that image.
6
- #
7
- # For example, the text for the following links with both be 'Hello World':
8
- #
9
- # <a href="http://rubyforge.org">Hello World</a>
10
- # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
11
- class Link
12
- attr_reader :node
13
- attr_reader :href
14
- attr_reader :text
15
- attr_reader :attributes
16
- attr_reader :page
17
- alias :to_s :text
18
- alias :referer :page
19
-
20
- def initialize(node, mech, page)
21
- @node = node
22
- @href = node['href']
23
- @text = node.inner_text
24
- @page = page
25
- @mech = mech
26
- @attributes = node
27
-
28
- # If there is no text, try to find an image and use it's alt text
29
- if (@text.nil? || @text.length == 0) && (node/'img').length > 0
30
- @text = ''
31
- (node/'img').each do |e|
32
- @text << ( e['alt'] || '')
33
- end
34
- end
35
-
36
- end
37
-
38
- def uri
39
- URI.parse(@href)
40
- end
41
-
42
- # Click on this link
43
- def click
44
- @mech.click self
45
- end
46
- end
47
-
48
- # This class encapsulates a Meta tag. Mechanize treats meta tags just
49
- # like 'a' tags. Meta objects will contain links, but most likely will
50
- # have no text.
51
- class Meta < Link
52
- end
53
-
54
- # This class encapsulates a 'frame' tag. Frame objects can be treated
55
- # just like Link objects. They contain src, the link they refer to,
56
- # name, the name of the frame. 'src' and 'name' are aliased to 'href'
57
- # and 'text' respectively so that a Frame object can be treated just
58
- # like a Link.
59
- class Frame < Link
60
- alias :src :href
61
- alias :name :text
62
-
63
- def initialize(node, mech, referer)
64
- super(node, mech, referer)
65
- @node = node
66
- @text = node['name']
67
- @href = node['src']
68
- end
69
- end
70
-
71
- # This class encapsulates a Base tag. Mechanize treats base tags just like
72
- # 'a' tags. Base objects will contain links, but most likely will have
73
- # no text.
74
- class Base < Link
75
- end
76
- end
77
- end
@@ -1,35 +0,0 @@
1
- require 'web/htmltools/xmltree'
2
- require 'mechanize/rexml'
3
-
4
- class WWW::Mechanize::REXMLPage < WWW::Mechanize::Page
5
- def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
6
- @body = body
7
- @watch_for_set = {}
8
- @mech = mech
9
-
10
- # construct parser and feed with HTML
11
- parser = HTMLTree::XMLParser.new
12
- begin
13
- parser.feed(@body)
14
- rescue => ex
15
- if ex.message =~ /attempted adding second root element to document/ and
16
- # Put the whole document inside a single root element, which I
17
- # simply name <root>, just to make the parser happy. It's no
18
- #longer valid HTML, but without a single root element, it's not
19
- # valid HTML as well.
20
-
21
- # TODO: leave a possible doctype definition outside this element.
22
- parser = HTMLTree::XMLParser.new
23
- parser.feed("<root>" + @body + "</root>")
24
- else
25
- raise
26
- end
27
- end
28
-
29
- @root = parser.document
30
-
31
- yield self if block_given?
32
-
33
- super(uri, response, body, code)
34
- end
35
- end
@@ -1,204 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- # = Synopsis
4
- # This is the default (and base) class for the Pluggable Parsers. If
5
- # Mechanize cannot find an appropriate class to use for the content type,
6
- # this class will be used. For example, if you download a JPG, Mechanize
7
- # will not know how to parse it, so this class will be instantiated.
8
- #
9
- # This is a good class to use as the base class for building your own
10
- # pluggable parsers.
11
- #
12
- # == Example
13
- # require 'rubygems'
14
- # require 'mechanize'
15
- #
16
- # agent = WWW::Mechanize.new
17
- # agent.get('http://example.com/foo.jpg').class #=> WWW::Mechanize::File
18
- #
19
- class File
20
- attr_accessor :uri, :response, :body, :code, :filename
21
- alias :header :response
22
-
23
- alias :content :body
24
-
25
- def initialize(uri=nil, response=nil, body=nil, code=nil)
26
- @uri, @body, @code = uri, body, code
27
- @response = Headers.new
28
-
29
- # Copy the headers in to a hash to prevent memory leaks
30
- if response
31
- response.each { |k,v|
32
- @response[k] = v
33
- }
34
- end
35
-
36
- @filename = 'index.html'
37
-
38
- # Set the filename
39
- if disposition = @response['content-disposition']
40
- disposition.split(/;\s*/).each do |pair|
41
- k,v = pair.split(/=/, 2)
42
- @filename = v if k.downcase == 'filename'
43
- end
44
- else
45
- if @uri
46
- @filename = @uri.path.split(/\//).last || 'index.html'
47
- @filename << ".html" unless @filename =~ /\./
48
- end
49
- end
50
-
51
- yield self if block_given?
52
- end
53
-
54
- # Use this method to save the content of this object to filename
55
- def save_as(filename = nil)
56
- if filename.nil?
57
- filename = @filename
58
- number = 1
59
- while(::File.exists?(filename))
60
- filename = "#{@filename}.#{number}"
61
- number += 1
62
- end
63
- end
64
-
65
- ::File::open(filename, "wb") { |f|
66
- f.write body
67
- }
68
- end
69
-
70
- alias :save :save_as
71
- end
72
-
73
- # = Synopsis
74
- # This is a pluggable parser that automatically saves every file
75
- # it encounters. It saves the files as a tree, reflecting the
76
- # host and file path.
77
- #
78
- # == Example to save all PDF's
79
- # require 'rubygems'
80
- # require 'mechanize'
81
- #
82
- # agent = WWW::Mechanize.new
83
- # agent.pluggable_parser.pdf = WWW::Mechanize::FileSaver
84
- # agent.get('http://example.com/foo.pdf')
85
- #
86
- class FileSaver < File
87
- attr_reader :filename
88
-
89
- def initialize(uri=nil, response=nil, body=nil, code=nil)
90
- super(uri, response, body, code)
91
- path = uri.path.empty? ? 'index.html' : uri.path.gsub(/^[\/]*/, '')
92
- path += 'index.html' if path =~ /\/$/
93
-
94
- split_path = path.split(/\//)
95
- filename = split_path.length > 0 ? split_path.pop : 'index.html'
96
- joined_path = split_path.join(::File::SEPARATOR)
97
- path = if joined_path.empty?
98
- uri.host
99
- else
100
- "#{uri.host}#{::File::SEPARATOR}#{joined_path}"
101
- end
102
-
103
- @filename = "#{path}#{::File::SEPARATOR}#{filename}"
104
- FileUtils.mkdir_p(path)
105
- save_as(@filename)
106
- end
107
- end
108
-
109
- # = Synopsis
110
- # This class is used to register and maintain pluggable parsers for
111
- # Mechanize to use.
112
- #
113
- # A Pluggable Parser is a parser that Mechanize uses for any particular
114
- # content type. Mechanize will ask PluggableParser for the class it
115
- # should initialize given any content type. This class allows users to
116
- # register their own pluggable parsers, or modify existing pluggable
117
- # parsers.
118
- #
119
- # PluggableParser returns a WWW::Mechanize::File object for content types
120
- # that it does not know how to handle. WWW::Mechanize::File provides
121
- # basic functionality for any content type, so it is a good class to
122
- # extend when building your own parsers.
123
- # == Example
124
- # To create your own parser, just create a class that takes four
125
- # parameters in the constructor. Here is an example of registering
126
- # a pluggable parser that handles CSV files:
127
- # class CSVParser < WWW::Mechanize::File
128
- # attr_reader :csv
129
- # def initialize(uri=nil, response=nil, body=nil, code=nil)
130
- # super(uri, response, body, code)
131
- # @csv = CSV.parse(body)
132
- # end
133
- # end
134
- # agent = WWW::Mechanize.new
135
- # agent.pluggable_parser.csv = CSVParser
136
- # agent.get('http://example.com/test.csv') # => CSVParser
137
- # Now any page that returns the content type of 'text/csv' will initialize
138
- # a CSVParser and return that object to the caller.
139
- #
140
- # To register a pluggable parser for a content type that pluggable parser
141
- # does not know about, just use the hash syntax:
142
- # agent.pluggable_parser['text/something'] = SomeClass
143
- #
144
- # To set the default parser, just use the 'defaut' method:
145
- # agent.pluggable_parser.default = SomeClass
146
- # Now all unknown content types will be instances of SomeClass.
147
- class PluggableParser
148
- CONTENT_TYPES = {
149
- :html => 'text/html',
150
- :pdf => 'application/pdf',
151
- :csv => 'text/csv',
152
- :xml => 'text/xml',
153
- }
154
-
155
- attr_accessor :default
156
-
157
- def initialize
158
- @parsers = { CONTENT_TYPES[:html] => Page }
159
- @default = File
160
- end
161
-
162
- def parser(content_type)
163
- content_type.nil? ? default : @parsers[content_type] || default
164
- end
165
-
166
- def register_parser(content_type, klass)
167
- @parsers[content_type] = klass
168
- end
169
-
170
- def html=(klass)
171
- register_parser(CONTENT_TYPES[:html], klass)
172
- end
173
-
174
- def pdf=(klass)
175
- register_parser(CONTENT_TYPES[:pdf], klass)
176
- end
177
-
178
- def csv=(klass)
179
- register_parser(CONTENT_TYPES[:csv], klass)
180
- end
181
-
182
- def xml=(klass)
183
- register_parser(CONTENT_TYPES[:xml], klass)
184
- end
185
-
186
- def [](content_type)
187
- @parsers[content_type]
188
- end
189
-
190
- def []=(content_type, klass)
191
- @parsers[content_type] = klass
192
- end
193
- end
194
-
195
- class Headers < Hash
196
- def [](key)
197
- super(key.downcase)
198
- end
199
- def []=(key, value)
200
- super(key.downcase, value)
201
- end
202
- end
203
- end
204
- end