mechanize 0.6.11 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (91) hide show
  1. data/CHANGELOG.txt +8 -0
  2. data/Manifest.txt +31 -22
  3. data/lib/mechanize.rb +2 -652
  4. data/lib/www/mechanize.rb +635 -0
  5. data/lib/www/mechanize/content_type_error.rb +16 -0
  6. data/lib/www/mechanize/cookie.rb +64 -0
  7. data/lib/{mechanize/cookie.rb → www/mechanize/cookie_jar.rb} +0 -60
  8. data/lib/www/mechanize/file.rb +73 -0
  9. data/lib/www/mechanize/file_saver.rb +39 -0
  10. data/lib/{mechanize → www/mechanize}/form.rb +119 -137
  11. data/lib/www/mechanize/form/button.rb +8 -0
  12. data/lib/www/mechanize/form/check_box.rb +13 -0
  13. data/lib/www/mechanize/form/field.rb +28 -0
  14. data/lib/www/mechanize/form/file_upload.rb +24 -0
  15. data/lib/www/mechanize/form/image_button.rb +23 -0
  16. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  17. data/lib/www/mechanize/form/option.rb +51 -0
  18. data/lib/www/mechanize/form/radio_button.rb +38 -0
  19. data/lib/www/mechanize/form/select_list.rb +41 -0
  20. data/lib/www/mechanize/headers.rb +12 -0
  21. data/lib/{mechanize → www/mechanize}/history.rb +0 -0
  22. data/lib/{mechanize → www/mechanize}/inspect.rb +21 -28
  23. data/lib/{mechanize → www/mechanize}/list.rb +0 -0
  24. data/lib/{mechanize → www/mechanize}/monkey_patch.rb +19 -0
  25. data/lib/www/mechanize/page.rb +121 -0
  26. data/lib/www/mechanize/page/base.rb +10 -0
  27. data/lib/www/mechanize/page/frame.rb +22 -0
  28. data/lib/www/mechanize/page/link.rb +50 -0
  29. data/lib/www/mechanize/page/meta.rb +10 -0
  30. data/lib/www/mechanize/pluggable_parsers.rb +93 -0
  31. data/lib/{mechanize/errors.rb → www/mechanize/response_code_error.rb} +1 -13
  32. data/test/{test_includes.rb → helper.rb} +4 -18
  33. data/test/{test_servlets.rb → servlets.rb} +0 -0
  34. data/test/tc_authenticate.rb +1 -8
  35. data/test/tc_bad_links.rb +3 -10
  36. data/test/tc_blank_form.rb +1 -8
  37. data/test/tc_checkboxes.rb +1 -8
  38. data/test/tc_cookie_class.rb +1 -6
  39. data/test/tc_cookie_jar.rb +1 -7
  40. data/test/tc_cookies.rb +10 -17
  41. data/test/tc_encoded_links.rb +5 -12
  42. data/test/tc_errors.rb +4 -11
  43. data/test/tc_follow_meta.rb +1 -8
  44. data/test/tc_form_action.rb +6 -14
  45. data/test/tc_form_as_hash.rb +1 -9
  46. data/test/tc_form_button.rb +5 -8
  47. data/test/tc_form_no_inputname.rb +1 -8
  48. data/test/tc_forms.rb +16 -24
  49. data/test/tc_frames.rb +3 -10
  50. data/test/tc_gzipping.rb +2 -9
  51. data/test/tc_history.rb +5 -12
  52. data/test/tc_html_unscape_forms.rb +8 -15
  53. data/test/tc_if_modified_since.rb +1 -6
  54. data/test/tc_keep_alive.rb +1 -8
  55. data/test/tc_links.rb +12 -19
  56. data/test/tc_mech.rb +26 -34
  57. data/test/{test_mechanize_file.rb → tc_mechanize_file.rb} +1 -6
  58. data/test/tc_multi_select.rb +10 -17
  59. data/test/tc_no_attributes.rb +1 -8
  60. data/test/tc_page.rb +3 -10
  61. data/test/tc_pluggable_parser.rb +8 -15
  62. data/test/tc_post_form.rb +3 -10
  63. data/test/tc_pretty_print.rb +3 -10
  64. data/test/tc_radiobutton.rb +2 -9
  65. data/test/tc_referer.rb +13 -20
  66. data/test/tc_relative_links.rb +1 -8
  67. data/test/tc_response_code.rb +14 -21
  68. data/test/tc_save_file.rb +1 -9
  69. data/test/tc_select.rb +3 -10
  70. data/test/tc_select_all.rb +2 -10
  71. data/test/tc_select_none.rb +2 -10
  72. data/test/tc_select_noopts.rb +2 -9
  73. data/test/tc_set_fields.rb +2 -9
  74. data/test/tc_ssl_server.rb +5 -12
  75. data/test/tc_subclass.rb +2 -9
  76. data/test/tc_textarea.rb +2 -9
  77. data/test/tc_upload.rb +2 -9
  78. data/test/test_all.rb +4 -43
  79. metadata +96 -80
  80. data/lib/mechanize/form_elements.rb +0 -254
  81. data/lib/mechanize/net-overrides/net/http.rb +0 -2107
  82. data/lib/mechanize/net-overrides/net/https.rb +0 -172
  83. data/lib/mechanize/net-overrides/net/protocol.rb +0 -380
  84. data/lib/mechanize/page.rb +0 -138
  85. data/lib/mechanize/page_elements.rb +0 -77
  86. data/lib/mechanize/parsers/rexml_page.rb +0 -35
  87. data/lib/mechanize/pluggable_parsers.rb +0 -204
  88. data/lib/mechanize/rexml.rb +0 -236
  89. data/setup.rb +0 -1585
  90. data/test/tc_proxy.rb +0 -25
  91. data/test/tc_watches.rb +0 -32
@@ -0,0 +1,121 @@
1
+ require 'fileutils'
2
+ require 'hpricot'
3
+ require 'forwardable'
4
+
5
+ require 'www/mechanize/page/link'
6
+ require 'www/mechanize/page/meta'
7
+ require 'www/mechanize/page/base'
8
+ require 'www/mechanize/page/frame'
9
+ require 'www/mechanize/headers'
10
+
11
+ module WWW
12
+ class Mechanize
13
+ # = Synopsis
14
+ # This class encapsulates an HTML page. If Mechanize finds a content
15
+ # type of 'text/html', this class will be instantiated and returned.
16
+ #
17
+ # == Example
18
+ # require 'rubygems'
19
+ # require 'mechanize'
20
+ #
21
+ # agent = WWW::Mechanize.new
22
+ # agent.get('http://google.com/').class #=> WWW::Mechanize::Page
23
+ #
24
+ class Page < WWW::Mechanize::File
25
+ extend Forwardable
26
+
27
+ attr_accessor :mech
28
+
29
+ def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
30
+ super(uri, response, body, code)
31
+ @mech ||= mech
32
+
33
+ raise Mechanize::ContentTypeError.new(response['content-type']) unless
34
+ content_type() =~ /^text\/html/
35
+
36
+ @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
37
+ end
38
+
39
+ def title
40
+ @title ||= if parser && search('title').text.length > 0
41
+ search('title').text
42
+ end
43
+ end
44
+
45
+ def parser
46
+ @parser ||= body && response ? Hpricot.parse(body) : nil
47
+ end
48
+ alias :root :parser
49
+
50
+ # Get the content type
51
+ def content_type
52
+ response['content-type']
53
+ end
54
+
55
+ # Search through the page like HPricot
56
+ def_delegator :parser, :search, :search
57
+ def_delegator :parser, :/, :/
58
+ def_delegator :parser, :at, :at
59
+
60
+ # Find a form with +name+. Form will be yielded if a block is given.
61
+ def form(name)
62
+ f = forms.name(name).first
63
+ yield f if block_given?
64
+ f
65
+ end
66
+
67
+ def links
68
+ @links ||= WWW::Mechanize::List.new(
69
+ %w{ a area }.map do |tag|
70
+ search(tag).map do |node|
71
+ Link.new(node, @mech, self)
72
+ end
73
+ end.flatten
74
+ )
75
+ end
76
+
77
+ def forms
78
+ @forms ||= WWW::Mechanize::List.new(
79
+ search('form').map do |html_form|
80
+ form = Form.new(html_form, @mech, self)
81
+ form.action ||= @uri
82
+ form
83
+ end
84
+ )
85
+ end
86
+
87
+ def meta
88
+ @meta ||= WWW::Mechanize::List.new(
89
+ search('meta').map do |node|
90
+ next unless node['http-equiv'] && node['content']
91
+ (equiv, content) = node['http-equiv'], node['content']
92
+ if equiv && equiv.downcase == 'refresh'
93
+ if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
94
+ node['href'] = $1
95
+ Meta.new(node, @mech, self)
96
+ end
97
+ end
98
+ end.compact
99
+ )
100
+ end
101
+
102
+ def bases
103
+ @bases ||= WWW::Mechanize::List.new(
104
+ search('base').map { |node| Base.new(node, @mech, self) }
105
+ )
106
+ end
107
+
108
+ def frames
109
+ @frames ||= WWW::Mechanize::List.new(
110
+ search('frame').map { |node| Frame.new(node, @mech, self) }
111
+ )
112
+ end
113
+
114
+ def iframes
115
+ @iframes ||= WWW::Mechanize::List.new(
116
+ search('iframe').map { |node| Frame.new(node, @mech, self) }
117
+ )
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,10 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates a Base tag. Mechanize treats base tags just
5
+ # like 'a' tags. Base objects will contain links, but most likely will
6
+ # have no text.
7
+ class Base < Link; end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,22 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates a 'frame' tag. Frame objects can be treated
5
+ # just like Link objects. They contain src, the link they refer to,
6
+ # name, the name of the frame. 'src' and 'name' are aliased to 'href'
7
+ # and 'text' respectively so that a Frame object can be treated just
8
+ # like a Link.
9
+ class Frame < Link
10
+ alias :src :href
11
+ alias :name :text
12
+
13
+ def initialize(node, mech, referer)
14
+ super(node, mech, referer)
15
+ @node = node
16
+ @text = node['name']
17
+ @href = node['src']
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,50 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates links. It contains the text and the URI for
5
+ # 'a' tags parsed out of an HTML page. If the link contains an image,
6
+ # the alt text will be used for that image.
7
+ #
8
+ # For example, the text for the following links with both be 'Hello World':
9
+ #
10
+ # <a href="http://rubyforge.org">Hello World</a>
11
+ # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
12
+ class Link
13
+ attr_reader :node
14
+ attr_reader :href
15
+ attr_reader :text
16
+ attr_reader :attributes
17
+ attr_reader :page
18
+ alias :to_s :text
19
+ alias :referer :page
20
+
21
+ def initialize(node, mech, page)
22
+ @node = node
23
+ @href = node['href']
24
+ @text = node.inner_text
25
+ @page = page
26
+ @mech = mech
27
+ @attributes = node
28
+
29
+ # If there is no text, try to find an image and use it's alt text
30
+ if (@text.nil? || @text.length == 0) && (node/'img').length > 0
31
+ @text = ''
32
+ (node/'img').each do |e|
33
+ @text << ( e['alt'] || '')
34
+ end
35
+ end
36
+
37
+ end
38
+
39
+ def uri
40
+ URI.parse(@href)
41
+ end
42
+
43
+ # Click on this link
44
+ def click
45
+ @mech.click self
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,10 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates a Meta tag. Mechanize treats meta tags just
5
+ # like 'a' tags. Meta objects will contain links, but most likely will
6
+ # have no text.
7
+ class Meta < Link; end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,93 @@
1
+ require 'www/mechanize/file'
2
+ require 'www/mechanize/file_saver'
3
+ require 'www/mechanize/page'
4
+
5
+ module WWW
6
+ class Mechanize
7
+ # = Synopsis
8
+ # This class is used to register and maintain pluggable parsers for
9
+ # Mechanize to use.
10
+ #
11
+ # A Pluggable Parser is a parser that Mechanize uses for any particular
12
+ # content type. Mechanize will ask PluggableParser for the class it
13
+ # should initialize given any content type. This class allows users to
14
+ # register their own pluggable parsers, or modify existing pluggable
15
+ # parsers.
16
+ #
17
+ # PluggableParser returns a WWW::Mechanize::File object for content types
18
+ # that it does not know how to handle. WWW::Mechanize::File provides
19
+ # basic functionality for any content type, so it is a good class to
20
+ # extend when building your own parsers.
21
+ # == Example
22
+ # To create your own parser, just create a class that takes four
23
+ # parameters in the constructor. Here is an example of registering
24
+ # a pluggable parser that handles CSV files:
25
+ # class CSVParser < WWW::Mechanize::File
26
+ # attr_reader :csv
27
+ # def initialize(uri=nil, response=nil, body=nil, code=nil)
28
+ # super(uri, response, body, code)
29
+ # @csv = CSV.parse(body)
30
+ # end
31
+ # end
32
+ # agent = WWW::Mechanize.new
33
+ # agent.pluggable_parser.csv = CSVParser
34
+ # agent.get('http://example.com/test.csv') # => CSVParser
35
+ # Now any page that returns the content type of 'text/csv' will initialize
36
+ # a CSVParser and return that object to the caller.
37
+ #
38
+ # To register a pluggable parser for a content type that pluggable parser
39
+ # does not know about, just use the hash syntax:
40
+ # agent.pluggable_parser['text/something'] = SomeClass
41
+ #
42
+ # To set the default parser, just use the 'defaut' method:
43
+ # agent.pluggable_parser.default = SomeClass
44
+ # Now all unknown content types will be instances of SomeClass.
45
+ class PluggableParser
46
+ CONTENT_TYPES = {
47
+ :html => 'text/html',
48
+ :pdf => 'application/pdf',
49
+ :csv => 'text/csv',
50
+ :xml => 'text/xml',
51
+ }
52
+
53
+ attr_accessor :default
54
+
55
+ def initialize
56
+ @parsers = { CONTENT_TYPES[:html] => Page }
57
+ @default = File
58
+ end
59
+
60
+ def parser(content_type)
61
+ content_type.nil? ? default : @parsers[content_type] || default
62
+ end
63
+
64
+ def register_parser(content_type, klass)
65
+ @parsers[content_type] = klass
66
+ end
67
+
68
+ def html=(klass)
69
+ register_parser(CONTENT_TYPES[:html], klass)
70
+ end
71
+
72
+ def pdf=(klass)
73
+ register_parser(CONTENT_TYPES[:pdf], klass)
74
+ end
75
+
76
+ def csv=(klass)
77
+ register_parser(CONTENT_TYPES[:csv], klass)
78
+ end
79
+
80
+ def xml=(klass)
81
+ register_parser(CONTENT_TYPES[:xml], klass)
82
+ end
83
+
84
+ def [](content_type)
85
+ @parsers[content_type]
86
+ end
87
+
88
+ def []=(content_type, klass)
89
+ @parsers[content_type] = klass
90
+ end
91
+ end
92
+ end
93
+ end
@@ -1,18 +1,5 @@
1
1
  module WWW
2
2
  class Mechanize
3
- # =Synopsis
4
- # This class contains an error for when a pluggable parser tries to
5
- # parse a content type that it does not know how to handle. For example
6
- # if WWW::Mechanize::Page were to try to parse a PDF, a ContentTypeError
7
- # would be thrown.
8
- class ContentTypeError < RuntimeError
9
- attr_reader :content_type
10
-
11
- def initialize(content_type)
12
- @content_type = content_type
13
- end
14
- end
15
-
16
3
  # =Synopsis
17
4
  # This error is thrown when Mechanize encounters a response code it does
18
5
  # not know how to handle. Currently, this exception will be thrown
@@ -35,3 +22,4 @@ module WWW
35
22
  end
36
23
  end
37
24
  end
25
+
@@ -1,15 +1,12 @@
1
- require 'net/http'
2
- require 'test_servlets'
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'mechanize'
3
4
  require 'webrick/httputils'
5
+ require 'servlets'
4
6
 
5
7
  BASE_DIR = File.dirname(__FILE__)
6
8
 
7
9
  class Net::HTTP
8
- #def self.new(*args)
9
- # obj = allocate
10
- # return obj
11
- #end
12
-
13
10
  alias :old_do_start :do_start
14
11
 
15
12
  def do_start
@@ -108,14 +105,3 @@ class Response
108
105
  yield body
109
106
  end
110
107
  end
111
-
112
-
113
- module TestMethods
114
- PORT = 2000
115
- PROXYPORT = 2001
116
- SSLPORT = 2002
117
-
118
- def html_response
119
- { 'content-type' => 'text/html' }
120
- end
121
- end
File without changes
@@ -1,13 +1,6 @@
1
- $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
-
3
- require 'test/unit'
4
- require 'rubygems'
5
- require 'mechanize'
6
- require 'test_includes'
1
+ require File.dirname(__FILE__) + "/helper"
7
2
 
8
3
  class BasicAuthTest < Test::Unit::TestCase
9
- include TestMethods
10
-
11
4
  def setup
12
5
  @agent = WWW::Mechanize.new
13
6
  end
data/test/tc_bad_links.rb CHANGED
@@ -1,16 +1,9 @@
1
- $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
-
3
- require 'test/unit'
4
- require 'rubygems'
5
- require 'mechanize'
6
- require 'test_includes'
1
+ require File.dirname(__FILE__) + "/helper"
7
2
 
8
3
  class TestBadLinks < Test::Unit::TestCase
9
- include TestMethods
10
-
11
4
  def setup
12
5
  @agent = WWW::Mechanize.new
13
- @page = @agent.get("http://localhost:#{PORT}/tc_bad_links.html")
6
+ @page = @agent.get("http://localhost/tc_bad_links.html")
14
7
  end
15
8
 
16
9
  def test_space_in_link
@@ -24,7 +17,7 @@ class TestBadLinks < Test::Unit::TestCase
24
17
  def test_space_in_url
25
18
  page = nil
26
19
  assert_nothing_raised do
27
- page = @agent.get("http://localhost:#{PORT}/tc_bad_links.html ")
20
+ page = @agent.get("http://localhost/tc_bad_links.html ")
28
21
  end
29
22
  assert_match(/tc_bad_links.html$/, @agent.history.last.uri.to_s)
30
23
  assert_equal(2, @agent.history.length)
@@ -1,13 +1,6 @@
1
- $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
-
3
- require 'test/unit'
4
- require 'rubygems'
5
- require 'mechanize'
6
- require 'test_includes'
1
+ require File.dirname(__FILE__) + "/helper"
7
2
 
8
3
  class BlankFormTest < Test::Unit::TestCase
9
- include TestMethods
10
-
11
4
  def setup
12
5
  @agent = WWW::Mechanize.new
13
6
  end
@@ -1,13 +1,6 @@
1
- $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
-
3
- require 'test/unit'
4
- require 'rubygems'
5
- require 'mechanize'
6
- require 'test_includes'
1
+ require File.dirname(__FILE__) + "/helper"
7
2
 
8
3
  class TestCheckBoxes < Test::Unit::TestCase
9
- include TestMethods
10
-
11
4
  def setup
12
5
  @agent = WWW::Mechanize.new
13
6
  @page = @agent.get('http://localhost/tc_checkboxes.html')