mechanize 0.6.11 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/CHANGELOG.txt +8 -0
- data/Manifest.txt +31 -22
- data/lib/mechanize.rb +2 -652
- data/lib/www/mechanize.rb +635 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +64 -0
- data/lib/{mechanize/cookie.rb → www/mechanize/cookie_jar.rb} +0 -60
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/{mechanize → www/mechanize}/form.rb +119 -137
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +41 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/{mechanize → www/mechanize}/history.rb +0 -0
- data/lib/{mechanize → www/mechanize}/inspect.rb +21 -28
- data/lib/{mechanize → www/mechanize}/list.rb +0 -0
- data/lib/{mechanize → www/mechanize}/monkey_patch.rb +19 -0
- data/lib/www/mechanize/page.rb +121 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +10 -0
- data/lib/www/mechanize/pluggable_parsers.rb +93 -0
- data/lib/{mechanize/errors.rb → www/mechanize/response_code_error.rb} +1 -13
- data/test/{test_includes.rb → helper.rb} +4 -18
- data/test/{test_servlets.rb → servlets.rb} +0 -0
- data/test/tc_authenticate.rb +1 -8
- data/test/tc_bad_links.rb +3 -10
- data/test/tc_blank_form.rb +1 -8
- data/test/tc_checkboxes.rb +1 -8
- data/test/tc_cookie_class.rb +1 -6
- data/test/tc_cookie_jar.rb +1 -7
- data/test/tc_cookies.rb +10 -17
- data/test/tc_encoded_links.rb +5 -12
- data/test/tc_errors.rb +4 -11
- data/test/tc_follow_meta.rb +1 -8
- data/test/tc_form_action.rb +6 -14
- data/test/tc_form_as_hash.rb +1 -9
- data/test/tc_form_button.rb +5 -8
- data/test/tc_form_no_inputname.rb +1 -8
- data/test/tc_forms.rb +16 -24
- data/test/tc_frames.rb +3 -10
- data/test/tc_gzipping.rb +2 -9
- data/test/tc_history.rb +5 -12
- data/test/tc_html_unscape_forms.rb +8 -15
- data/test/tc_if_modified_since.rb +1 -6
- data/test/tc_keep_alive.rb +1 -8
- data/test/tc_links.rb +12 -19
- data/test/tc_mech.rb +26 -34
- data/test/{test_mechanize_file.rb → tc_mechanize_file.rb} +1 -6
- data/test/tc_multi_select.rb +10 -17
- data/test/tc_no_attributes.rb +1 -8
- data/test/tc_page.rb +3 -10
- data/test/tc_pluggable_parser.rb +8 -15
- data/test/tc_post_form.rb +3 -10
- data/test/tc_pretty_print.rb +3 -10
- data/test/tc_radiobutton.rb +2 -9
- data/test/tc_referer.rb +13 -20
- data/test/tc_relative_links.rb +1 -8
- data/test/tc_response_code.rb +14 -21
- data/test/tc_save_file.rb +1 -9
- data/test/tc_select.rb +3 -10
- data/test/tc_select_all.rb +2 -10
- data/test/tc_select_none.rb +2 -10
- data/test/tc_select_noopts.rb +2 -9
- data/test/tc_set_fields.rb +2 -9
- data/test/tc_ssl_server.rb +5 -12
- data/test/tc_subclass.rb +2 -9
- data/test/tc_textarea.rb +2 -9
- data/test/tc_upload.rb +2 -9
- data/test/test_all.rb +4 -43
- metadata +96 -80
- data/lib/mechanize/form_elements.rb +0 -254
- data/lib/mechanize/net-overrides/net/http.rb +0 -2107
- data/lib/mechanize/net-overrides/net/https.rb +0 -172
- data/lib/mechanize/net-overrides/net/protocol.rb +0 -380
- data/lib/mechanize/page.rb +0 -138
- data/lib/mechanize/page_elements.rb +0 -77
- data/lib/mechanize/parsers/rexml_page.rb +0 -35
- data/lib/mechanize/pluggable_parsers.rb +0 -204
- data/lib/mechanize/rexml.rb +0 -236
- data/setup.rb +0 -1585
- data/test/tc_proxy.rb +0 -25
- data/test/tc_watches.rb +0 -32
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
require 'www/mechanize/page/link'
|
6
|
+
require 'www/mechanize/page/meta'
|
7
|
+
require 'www/mechanize/page/base'
|
8
|
+
require 'www/mechanize/page/frame'
|
9
|
+
require 'www/mechanize/headers'
|
10
|
+
|
11
|
+
module WWW
|
12
|
+
class Mechanize
|
13
|
+
# = Synopsis
|
14
|
+
# This class encapsulates an HTML page. If Mechanize finds a content
|
15
|
+
# type of 'text/html', this class will be instantiated and returned.
|
16
|
+
#
|
17
|
+
# == Example
|
18
|
+
# require 'rubygems'
|
19
|
+
# require 'mechanize'
|
20
|
+
#
|
21
|
+
# agent = WWW::Mechanize.new
|
22
|
+
# agent.get('http://google.com/').class #=> WWW::Mechanize::Page
|
23
|
+
#
|
24
|
+
class Page < WWW::Mechanize::File
|
25
|
+
extend Forwardable
|
26
|
+
|
27
|
+
attr_accessor :mech
|
28
|
+
|
29
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
30
|
+
super(uri, response, body, code)
|
31
|
+
@mech ||= mech
|
32
|
+
|
33
|
+
raise Mechanize::ContentTypeError.new(response['content-type']) unless
|
34
|
+
content_type() =~ /^text\/html/
|
35
|
+
|
36
|
+
@parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def title
|
40
|
+
@title ||= if parser && search('title').text.length > 0
|
41
|
+
search('title').text
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def parser
|
46
|
+
@parser ||= body && response ? Hpricot.parse(body) : nil
|
47
|
+
end
|
48
|
+
alias :root :parser
|
49
|
+
|
50
|
+
# Get the content type
|
51
|
+
def content_type
|
52
|
+
response['content-type']
|
53
|
+
end
|
54
|
+
|
55
|
+
# Search through the page like HPricot
|
56
|
+
def_delegator :parser, :search, :search
|
57
|
+
def_delegator :parser, :/, :/
|
58
|
+
def_delegator :parser, :at, :at
|
59
|
+
|
60
|
+
# Find a form with +name+. Form will be yielded if a block is given.
|
61
|
+
def form(name)
|
62
|
+
f = forms.name(name).first
|
63
|
+
yield f if block_given?
|
64
|
+
f
|
65
|
+
end
|
66
|
+
|
67
|
+
def links
|
68
|
+
@links ||= WWW::Mechanize::List.new(
|
69
|
+
%w{ a area }.map do |tag|
|
70
|
+
search(tag).map do |node|
|
71
|
+
Link.new(node, @mech, self)
|
72
|
+
end
|
73
|
+
end.flatten
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
def forms
|
78
|
+
@forms ||= WWW::Mechanize::List.new(
|
79
|
+
search('form').map do |html_form|
|
80
|
+
form = Form.new(html_form, @mech, self)
|
81
|
+
form.action ||= @uri
|
82
|
+
form
|
83
|
+
end
|
84
|
+
)
|
85
|
+
end
|
86
|
+
|
87
|
+
def meta
|
88
|
+
@meta ||= WWW::Mechanize::List.new(
|
89
|
+
search('meta').map do |node|
|
90
|
+
next unless node['http-equiv'] && node['content']
|
91
|
+
(equiv, content) = node['http-equiv'], node['content']
|
92
|
+
if equiv && equiv.downcase == 'refresh'
|
93
|
+
if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
|
94
|
+
node['href'] = $1
|
95
|
+
Meta.new(node, @mech, self)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end.compact
|
99
|
+
)
|
100
|
+
end
|
101
|
+
|
102
|
+
def bases
|
103
|
+
@bases ||= WWW::Mechanize::List.new(
|
104
|
+
search('base').map { |node| Base.new(node, @mech, self) }
|
105
|
+
)
|
106
|
+
end
|
107
|
+
|
108
|
+
def frames
|
109
|
+
@frames ||= WWW::Mechanize::List.new(
|
110
|
+
search('frame').map { |node| Frame.new(node, @mech, self) }
|
111
|
+
)
|
112
|
+
end
|
113
|
+
|
114
|
+
def iframes
|
115
|
+
@iframes ||= WWW::Mechanize::List.new(
|
116
|
+
search('iframe').map { |node| Frame.new(node, @mech, self) }
|
117
|
+
)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates a Base tag. Mechanize treats base tags just
|
5
|
+
# like 'a' tags. Base objects will contain links, but most likely will
|
6
|
+
# have no text.
|
7
|
+
class Base < Link; end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates a 'frame' tag. Frame objects can be treated
|
5
|
+
# just like Link objects. They contain src, the link they refer to,
|
6
|
+
# name, the name of the frame. 'src' and 'name' are aliased to 'href'
|
7
|
+
# and 'text' respectively so that a Frame object can be treated just
|
8
|
+
# like a Link.
|
9
|
+
class Frame < Link
|
10
|
+
alias :src :href
|
11
|
+
alias :name :text
|
12
|
+
|
13
|
+
def initialize(node, mech, referer)
|
14
|
+
super(node, mech, referer)
|
15
|
+
@node = node
|
16
|
+
@text = node['name']
|
17
|
+
@href = node['src']
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates links. It contains the text and the URI for
|
5
|
+
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
6
|
+
# the alt text will be used for that image.
|
7
|
+
#
|
8
|
+
# For example, the text for the following links with both be 'Hello World':
|
9
|
+
#
|
10
|
+
# <a href="http://rubyforge.org">Hello World</a>
|
11
|
+
# <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
|
12
|
+
class Link
|
13
|
+
attr_reader :node
|
14
|
+
attr_reader :href
|
15
|
+
attr_reader :text
|
16
|
+
attr_reader :attributes
|
17
|
+
attr_reader :page
|
18
|
+
alias :to_s :text
|
19
|
+
alias :referer :page
|
20
|
+
|
21
|
+
def initialize(node, mech, page)
|
22
|
+
@node = node
|
23
|
+
@href = node['href']
|
24
|
+
@text = node.inner_text
|
25
|
+
@page = page
|
26
|
+
@mech = mech
|
27
|
+
@attributes = node
|
28
|
+
|
29
|
+
# If there is no text, try to find an image and use it's alt text
|
30
|
+
if (@text.nil? || @text.length == 0) && (node/'img').length > 0
|
31
|
+
@text = ''
|
32
|
+
(node/'img').each do |e|
|
33
|
+
@text << ( e['alt'] || '')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def uri
|
40
|
+
URI.parse(@href)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Click on this link
|
44
|
+
def click
|
45
|
+
@mech.click self
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates a Meta tag. Mechanize treats meta tags just
|
5
|
+
# like 'a' tags. Meta objects will contain links, but most likely will
|
6
|
+
# have no text.
|
7
|
+
class Meta < Link; end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'www/mechanize/file'
|
2
|
+
require 'www/mechanize/file_saver'
|
3
|
+
require 'www/mechanize/page'
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
class Mechanize
|
7
|
+
# = Synopsis
|
8
|
+
# This class is used to register and maintain pluggable parsers for
|
9
|
+
# Mechanize to use.
|
10
|
+
#
|
11
|
+
# A Pluggable Parser is a parser that Mechanize uses for any particular
|
12
|
+
# content type. Mechanize will ask PluggableParser for the class it
|
13
|
+
# should initialize given any content type. This class allows users to
|
14
|
+
# register their own pluggable parsers, or modify existing pluggable
|
15
|
+
# parsers.
|
16
|
+
#
|
17
|
+
# PluggableParser returns a WWW::Mechanize::File object for content types
|
18
|
+
# that it does not know how to handle. WWW::Mechanize::File provides
|
19
|
+
# basic functionality for any content type, so it is a good class to
|
20
|
+
# extend when building your own parsers.
|
21
|
+
# == Example
|
22
|
+
# To create your own parser, just create a class that takes four
|
23
|
+
# parameters in the constructor. Here is an example of registering
|
24
|
+
# a pluggable parser that handles CSV files:
|
25
|
+
# class CSVParser < WWW::Mechanize::File
|
26
|
+
# attr_reader :csv
|
27
|
+
# def initialize(uri=nil, response=nil, body=nil, code=nil)
|
28
|
+
# super(uri, response, body, code)
|
29
|
+
# @csv = CSV.parse(body)
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
# agent = WWW::Mechanize.new
|
33
|
+
# agent.pluggable_parser.csv = CSVParser
|
34
|
+
# agent.get('http://example.com/test.csv') # => CSVParser
|
35
|
+
# Now any page that returns the content type of 'text/csv' will initialize
|
36
|
+
# a CSVParser and return that object to the caller.
|
37
|
+
#
|
38
|
+
# To register a pluggable parser for a content type that pluggable parser
|
39
|
+
# does not know about, just use the hash syntax:
|
40
|
+
# agent.pluggable_parser['text/something'] = SomeClass
|
41
|
+
#
|
42
|
+
# To set the default parser, just use the 'defaut' method:
|
43
|
+
# agent.pluggable_parser.default = SomeClass
|
44
|
+
# Now all unknown content types will be instances of SomeClass.
|
45
|
+
class PluggableParser
|
46
|
+
CONTENT_TYPES = {
|
47
|
+
:html => 'text/html',
|
48
|
+
:pdf => 'application/pdf',
|
49
|
+
:csv => 'text/csv',
|
50
|
+
:xml => 'text/xml',
|
51
|
+
}
|
52
|
+
|
53
|
+
attr_accessor :default
|
54
|
+
|
55
|
+
def initialize
|
56
|
+
@parsers = { CONTENT_TYPES[:html] => Page }
|
57
|
+
@default = File
|
58
|
+
end
|
59
|
+
|
60
|
+
def parser(content_type)
|
61
|
+
content_type.nil? ? default : @parsers[content_type] || default
|
62
|
+
end
|
63
|
+
|
64
|
+
def register_parser(content_type, klass)
|
65
|
+
@parsers[content_type] = klass
|
66
|
+
end
|
67
|
+
|
68
|
+
def html=(klass)
|
69
|
+
register_parser(CONTENT_TYPES[:html], klass)
|
70
|
+
end
|
71
|
+
|
72
|
+
def pdf=(klass)
|
73
|
+
register_parser(CONTENT_TYPES[:pdf], klass)
|
74
|
+
end
|
75
|
+
|
76
|
+
def csv=(klass)
|
77
|
+
register_parser(CONTENT_TYPES[:csv], klass)
|
78
|
+
end
|
79
|
+
|
80
|
+
def xml=(klass)
|
81
|
+
register_parser(CONTENT_TYPES[:xml], klass)
|
82
|
+
end
|
83
|
+
|
84
|
+
def [](content_type)
|
85
|
+
@parsers[content_type]
|
86
|
+
end
|
87
|
+
|
88
|
+
def []=(content_type, klass)
|
89
|
+
@parsers[content_type] = klass
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -1,18 +1,5 @@
|
|
1
1
|
module WWW
|
2
2
|
class Mechanize
|
3
|
-
# =Synopsis
|
4
|
-
# This class contains an error for when a pluggable parser tries to
|
5
|
-
# parse a content type that it does not know how to handle. For example
|
6
|
-
# if WWW::Mechanize::Page were to try to parse a PDF, a ContentTypeError
|
7
|
-
# would be thrown.
|
8
|
-
class ContentTypeError < RuntimeError
|
9
|
-
attr_reader :content_type
|
10
|
-
|
11
|
-
def initialize(content_type)
|
12
|
-
@content_type = content_type
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
3
|
# =Synopsis
|
17
4
|
# This error is thrown when Mechanize encounters a response code it does
|
18
5
|
# not know how to handle. Currently, this exception will be thrown
|
@@ -35,3 +22,4 @@ module WWW
|
|
35
22
|
end
|
36
23
|
end
|
37
24
|
end
|
25
|
+
|
@@ -1,15 +1,12 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'mechanize'
|
3
4
|
require 'webrick/httputils'
|
5
|
+
require 'servlets'
|
4
6
|
|
5
7
|
BASE_DIR = File.dirname(__FILE__)
|
6
8
|
|
7
9
|
class Net::HTTP
|
8
|
-
#def self.new(*args)
|
9
|
-
# obj = allocate
|
10
|
-
# return obj
|
11
|
-
#end
|
12
|
-
|
13
10
|
alias :old_do_start :do_start
|
14
11
|
|
15
12
|
def do_start
|
@@ -108,14 +105,3 @@ class Response
|
|
108
105
|
yield body
|
109
106
|
end
|
110
107
|
end
|
111
|
-
|
112
|
-
|
113
|
-
module TestMethods
|
114
|
-
PORT = 2000
|
115
|
-
PROXYPORT = 2001
|
116
|
-
SSLPORT = 2002
|
117
|
-
|
118
|
-
def html_response
|
119
|
-
{ 'content-type' => 'text/html' }
|
120
|
-
end
|
121
|
-
end
|
File without changes
|
data/test/tc_authenticate.rb
CHANGED
@@ -1,13 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'rubygems'
|
5
|
-
require 'mechanize'
|
6
|
-
require 'test_includes'
|
1
|
+
require File.dirname(__FILE__) + "/helper"
|
7
2
|
|
8
3
|
class BasicAuthTest < Test::Unit::TestCase
|
9
|
-
include TestMethods
|
10
|
-
|
11
4
|
def setup
|
12
5
|
@agent = WWW::Mechanize.new
|
13
6
|
end
|
data/test/tc_bad_links.rb
CHANGED
@@ -1,16 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'rubygems'
|
5
|
-
require 'mechanize'
|
6
|
-
require 'test_includes'
|
1
|
+
require File.dirname(__FILE__) + "/helper"
|
7
2
|
|
8
3
|
class TestBadLinks < Test::Unit::TestCase
|
9
|
-
include TestMethods
|
10
|
-
|
11
4
|
def setup
|
12
5
|
@agent = WWW::Mechanize.new
|
13
|
-
@page = @agent.get("http://localhost
|
6
|
+
@page = @agent.get("http://localhost/tc_bad_links.html")
|
14
7
|
end
|
15
8
|
|
16
9
|
def test_space_in_link
|
@@ -24,7 +17,7 @@ class TestBadLinks < Test::Unit::TestCase
|
|
24
17
|
def test_space_in_url
|
25
18
|
page = nil
|
26
19
|
assert_nothing_raised do
|
27
|
-
page = @agent.get("http://localhost
|
20
|
+
page = @agent.get("http://localhost/tc_bad_links.html ")
|
28
21
|
end
|
29
22
|
assert_match(/tc_bad_links.html$/, @agent.history.last.uri.to_s)
|
30
23
|
assert_equal(2, @agent.history.length)
|
data/test/tc_blank_form.rb
CHANGED
@@ -1,13 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'rubygems'
|
5
|
-
require 'mechanize'
|
6
|
-
require 'test_includes'
|
1
|
+
require File.dirname(__FILE__) + "/helper"
|
7
2
|
|
8
3
|
class BlankFormTest < Test::Unit::TestCase
|
9
|
-
include TestMethods
|
10
|
-
|
11
4
|
def setup
|
12
5
|
@agent = WWW::Mechanize.new
|
13
6
|
end
|
data/test/tc_checkboxes.rb
CHANGED
@@ -1,13 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'rubygems'
|
5
|
-
require 'mechanize'
|
6
|
-
require 'test_includes'
|
1
|
+
require File.dirname(__FILE__) + "/helper"
|
7
2
|
|
8
3
|
class TestCheckBoxes < Test::Unit::TestCase
|
9
|
-
include TestMethods
|
10
|
-
|
11
4
|
def setup
|
12
5
|
@agent = WWW::Mechanize.new
|
13
6
|
@page = @agent.get('http://localhost/tc_checkboxes.html')
|