mechanize 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (243) hide show
  1. data/EXAMPLES +55 -0
  2. data/README +3 -12
  3. data/lib/mechanize/cookie.rb +1 -2
  4. data/lib/mechanize/form.rb +182 -0
  5. data/lib/mechanize/form_elements.rb +105 -0
  6. data/lib/mechanize/page.rb +130 -0
  7. data/lib/mechanize/page_elements.rb +28 -0
  8. data/lib/mechanize.rb +27 -369
  9. data/test/htdocs/file_upload.html +13 -0
  10. data/test/htdocs/frame_test.html +23 -0
  11. data/test/server.rb +1 -0
  12. data/test/servlets.rb +6 -0
  13. data/test/tc_cookies.rb +2 -3
  14. data/test/tc_forms.rb +2 -3
  15. data/test/tc_frames.rb +22 -0
  16. data/test/tc_links.rb +2 -3
  17. data/test/tc_mech.rb +2 -3
  18. data/test/tc_response_code.rb +2 -3
  19. data/test/tc_upload.rb +34 -0
  20. data/test/test_includes.rb +5 -0
  21. data/test/ts_mech.rb +2 -0
  22. metadata +59 -307
  23. data/doc/classes/Net/HTTP/Copy.html +0 -134
  24. data/doc/classes/Net/HTTP/Delete.html +0 -134
  25. data/doc/classes/Net/HTTP/Get.html +0 -134
  26. data/doc/classes/Net/HTTP/Head.html +0 -134
  27. data/doc/classes/Net/HTTP/Lock.html +0 -134
  28. data/doc/classes/Net/HTTP/Mkcol.html +0 -134
  29. data/doc/classes/Net/HTTP/Move.html +0 -134
  30. data/doc/classes/Net/HTTP/Options.html +0 -134
  31. data/doc/classes/Net/HTTP/Post.html +0 -134
  32. data/doc/classes/Net/HTTP/Propfind.html +0 -134
  33. data/doc/classes/Net/HTTP/Proppatch.html +0 -134
  34. data/doc/classes/Net/HTTP/Put.html +0 -134
  35. data/doc/classes/Net/HTTP/Trace.html +0 -134
  36. data/doc/classes/Net/HTTP/Unlock.html +0 -134
  37. data/doc/classes/Net/HTTP.html +0 -1716
  38. data/doc/classes/Net/HTTP.src/M000103.html +0 -18
  39. data/doc/classes/Net/HTTP.src/M000104.html +0 -18
  40. data/doc/classes/Net/HTTP.src/M000105.html +0 -18
  41. data/doc/classes/Net/HTTP.src/M000106.html +0 -18
  42. data/doc/classes/Net/HTTP.src/M000107.html +0 -29
  43. data/doc/classes/Net/HTTP.src/M000108.html +0 -18
  44. data/doc/classes/Net/HTTP.src/M000109.html +0 -22
  45. data/doc/classes/Net/HTTP.src/M000110.html +0 -18
  46. data/doc/classes/Net/HTTP.src/M000111.html +0 -18
  47. data/doc/classes/Net/HTTP.src/M000112.html +0 -18
  48. data/doc/classes/Net/HTTP.src/M000113.html +0 -18
  49. data/doc/classes/Net/HTTP.src/M000114.html +0 -22
  50. data/doc/classes/Net/HTTP.src/M000115.html +0 -29
  51. data/doc/classes/Net/HTTP.src/M000116.html +0 -18
  52. data/doc/classes/Net/HTTP.src/M000117.html +0 -19
  53. data/doc/classes/Net/HTTP.src/M000118.html +0 -19
  54. data/doc/classes/Net/HTTP.src/M000119.html +0 -18
  55. data/doc/classes/Net/HTTP.src/M000121.html +0 -18
  56. data/doc/classes/Net/HTTP.src/M000122.html +0 -28
  57. data/doc/classes/Net/HTTP.src/M000123.html +0 -19
  58. data/doc/classes/Net/HTTP.src/M000124.html +0 -30
  59. data/doc/classes/Net/HTTP.src/M000125.html +0 -18
  60. data/doc/classes/Net/HTTP.src/M000126.html +0 -18
  61. data/doc/classes/Net/HTTP.src/M000127.html +0 -18
  62. data/doc/classes/Net/HTTP.src/M000128.html +0 -18
  63. data/doc/classes/Net/HTTP.src/M000129.html +0 -18
  64. data/doc/classes/Net/HTTP.src/M000130.html +0 -18
  65. data/doc/classes/Net/HTTP.src/M000133.html +0 -28
  66. data/doc/classes/Net/HTTP.src/M000134.html +0 -20
  67. data/doc/classes/Net/HTTP.src/M000135.html +0 -28
  68. data/doc/classes/Net/HTTP.src/M000136.html +0 -18
  69. data/doc/classes/Net/HTTP.src/M000137.html +0 -18
  70. data/doc/classes/Net/HTTP.src/M000138.html +0 -18
  71. data/doc/classes/Net/HTTP.src/M000139.html +0 -18
  72. data/doc/classes/Net/HTTP.src/M000140.html +0 -18
  73. data/doc/classes/Net/HTTP.src/M000141.html +0 -18
  74. data/doc/classes/Net/HTTP.src/M000142.html +0 -18
  75. data/doc/classes/Net/HTTP.src/M000143.html +0 -18
  76. data/doc/classes/Net/HTTP.src/M000144.html +0 -18
  77. data/doc/classes/Net/HTTP.src/M000145.html +0 -18
  78. data/doc/classes/Net/HTTP.src/M000146.html +0 -18
  79. data/doc/classes/Net/HTTP.src/M000147.html +0 -18
  80. data/doc/classes/Net/HTTP.src/M000148.html +0 -18
  81. data/doc/classes/Net/HTTP.src/M000152.html +0 -19
  82. data/doc/classes/Net/HTTP.src/M000153.html +0 -39
  83. data/doc/classes/Net/HTTP.src/M000154.html +0 -18
  84. data/doc/classes/Net/HTTP.src/M000156.html +0 -24
  85. data/doc/classes/Net/HTTP.src/M000157.html +0 -18
  86. data/doc/classes/Net/HTTP.src/M000158.html +0 -19
  87. data/doc/classes/Net/HTTP.src/M000159.html +0 -21
  88. data/doc/classes/Net/HTTP.src/M000161.html +0 -19
  89. data/doc/classes/Net/HTTPError.html +0 -120
  90. data/doc/classes/Net/HTTPExceptions.html +0 -137
  91. data/doc/classes/Net/HTTPFatalError.html +0 -120
  92. data/doc/classes/Net/HTTPGenericRequest.html +0 -274
  93. data/doc/classes/Net/HTTPGenericRequest.src/M000088.html +0 -26
  94. data/doc/classes/Net/HTTPGenericRequest.src/M000089.html +0 -18
  95. data/doc/classes/Net/HTTPGenericRequest.src/M000090.html +0 -18
  96. data/doc/classes/Net/HTTPGenericRequest.src/M000091.html +0 -18
  97. data/doc/classes/Net/HTTPGenericRequest.src/M000092.html +0 -19
  98. data/doc/classes/Net/HTTPGenericRequest.src/M000093.html +0 -20
  99. data/doc/classes/Net/HTTPGenericRequest.src/M000094.html +0 -20
  100. data/doc/classes/Net/HTTPHeader.html +0 -734
  101. data/doc/classes/Net/HTTPHeader.src/M000055.html +0 -23
  102. data/doc/classes/Net/HTTPHeader.src/M000056.html +0 -19
  103. data/doc/classes/Net/HTTPHeader.src/M000057.html +0 -22
  104. data/doc/classes/Net/HTTPHeader.src/M000058.html +0 -22
  105. data/doc/classes/Net/HTTPHeader.src/M000059.html +0 -19
  106. data/doc/classes/Net/HTTPHeader.src/M000060.html +0 -19
  107. data/doc/classes/Net/HTTPHeader.src/M000061.html +0 -20
  108. data/doc/classes/Net/HTTPHeader.src/M000063.html +0 -18
  109. data/doc/classes/Net/HTTPHeader.src/M000065.html +0 -20
  110. data/doc/classes/Net/HTTPHeader.src/M000066.html +0 -20
  111. data/doc/classes/Net/HTTPHeader.src/M000067.html +0 -18
  112. data/doc/classes/Net/HTTPHeader.src/M000068.html +0 -18
  113. data/doc/classes/Net/HTTPHeader.src/M000069.html +0 -18
  114. data/doc/classes/Net/HTTPHeader.src/M000070.html +0 -20
  115. data/doc/classes/Net/HTTPHeader.src/M000072.html +0 -30
  116. data/doc/classes/Net/HTTPHeader.src/M000073.html +0 -43
  117. data/doc/classes/Net/HTTPHeader.src/M000075.html +0 -21
  118. data/doc/classes/Net/HTTPHeader.src/M000076.html +0 -22
  119. data/doc/classes/Net/HTTPHeader.src/M000077.html +0 -20
  120. data/doc/classes/Net/HTTPHeader.src/M000078.html +0 -21
  121. data/doc/classes/Net/HTTPHeader.src/M000079.html +0 -19
  122. data/doc/classes/Net/HTTPHeader.src/M000080.html +0 -18
  123. data/doc/classes/Net/HTTPHeader.src/M000081.html +0 -19
  124. data/doc/classes/Net/HTTPHeader.src/M000082.html +0 -19
  125. data/doc/classes/Net/HTTPHeader.src/M000083.html +0 -23
  126. data/doc/classes/Net/HTTPHeader.src/M000084.html +0 -18
  127. data/doc/classes/Net/HTTPHeader.src/M000086.html +0 -18
  128. data/doc/classes/Net/HTTPHeader.src/M000087.html +0 -18
  129. data/doc/classes/Net/HTTPRequest.html +0 -150
  130. data/doc/classes/Net/HTTPRequest.src/M000102.html +0 -21
  131. data/doc/classes/Net/HTTPResponse.html +0 -425
  132. data/doc/classes/Net/HTTPResponse.src/M000162.html +0 -18
  133. data/doc/classes/Net/HTTPResponse.src/M000163.html +0 -18
  134. data/doc/classes/Net/HTTPResponse.src/M000164.html +0 -23
  135. data/doc/classes/Net/HTTPResponse.src/M000165.html +0 -18
  136. data/doc/classes/Net/HTTPResponse.src/M000166.html +0 -32
  137. data/doc/classes/Net/HTTPResponse.src/M000167.html +0 -18
  138. data/doc/classes/Net/HTTPRetriableError.html +0 -120
  139. data/doc/classes/Net/HTTPServerException.html +0 -120
  140. data/doc/classes/Net/ProtoAuthError.html +0 -113
  141. data/doc/classes/Net/ProtoCommandError.html +0 -113
  142. data/doc/classes/Net/ProtoFatalError.html +0 -113
  143. data/doc/classes/Net/ProtoRetriableError.html +0 -113
  144. data/doc/classes/Net/ProtoServerError.html +0 -113
  145. data/doc/classes/Net/ProtoSyntaxError.html +0 -113
  146. data/doc/classes/Net/ProtoUnknownError.html +0 -113
  147. data/doc/classes/Net/ProtocolError.html +0 -111
  148. data/doc/classes/Net/WriteAdapter.html +0 -235
  149. data/doc/classes/Net/WriteAdapter.src/M000095.html +0 -19
  150. data/doc/classes/Net/WriteAdapter.src/M000096.html +0 -18
  151. data/doc/classes/Net/WriteAdapter.src/M000097.html +0 -18
  152. data/doc/classes/Net/WriteAdapter.src/M000099.html +0 -19
  153. data/doc/classes/Net/WriteAdapter.src/M000100.html +0 -18
  154. data/doc/classes/Net/WriteAdapter.src/M000101.html +0 -18
  155. data/doc/classes/REXML/Comment.html +0 -137
  156. data/doc/classes/REXML/Comment.src/M000053.html +0 -18
  157. data/doc/classes/REXML/Node.html +0 -240
  158. data/doc/classes/REXML/Node.src/M000047.html +0 -21
  159. data/doc/classes/REXML/Node.src/M000048.html +0 -21
  160. data/doc/classes/REXML/Node.src/M000049.html +0 -22
  161. data/doc/classes/REXML/Node.src/M000050.html +0 -18
  162. data/doc/classes/REXML/Node.src/M000051.html +0 -18
  163. data/doc/classes/REXML/Node.src/M000052.html +0 -18
  164. data/doc/classes/REXML/Text.html +0 -137
  165. data/doc/classes/REXML/Text.src/M000054.html +0 -18
  166. data/doc/classes/REXML.html +0 -109
  167. data/doc/classes/WWW/Button.html +0 -190
  168. data/doc/classes/WWW/Button.src/M000028.html +0 -18
  169. data/doc/classes/WWW/Button.src/M000029.html +0 -18
  170. data/doc/classes/WWW/Button.src/M000030.html +0 -25
  171. data/doc/classes/WWW/CheckBox.html +0 -160
  172. data/doc/classes/WWW/CheckBox.src/M000024.html +0 -18
  173. data/doc/classes/WWW/Cookie.html +0 -207
  174. data/doc/classes/WWW/Cookie.src/M000032.html +0 -59
  175. data/doc/classes/WWW/Cookie.src/M000033.html +0 -21
  176. data/doc/classes/WWW/Cookie.src/M000034.html +0 -18
  177. data/doc/classes/WWW/CookieJar.html +0 -197
  178. data/doc/classes/WWW/CookieJar.src/M000008.html +0 -18
  179. data/doc/classes/WWW/CookieJar.src/M000009.html +0 -22
  180. data/doc/classes/WWW/CookieJar.src/M000010.html +0 -33
  181. data/doc/classes/WWW/CookieJar.src/M000011.html +0 -18
  182. data/doc/classes/WWW/Field.html +0 -174
  183. data/doc/classes/WWW/Field.src/M000045.html +0 -18
  184. data/doc/classes/WWW/Field.src/M000046.html +0 -26
  185. data/doc/classes/WWW/FileUpload.html +0 -163
  186. data/doc/classes/WWW/FileUpload.src/M000031.html +0 -19
  187. data/doc/classes/WWW/Form.html +0 -152
  188. data/doc/classes/WWW/Form.src/M000044.html +0 -19
  189. data/doc/classes/WWW/GlobalForm.html +0 -271
  190. data/doc/classes/WWW/GlobalForm.src/M000004.html +0 -24
  191. data/doc/classes/WWW/GlobalForm.src/M000005.html +0 -26
  192. data/doc/classes/WWW/GlobalForm.src/M000006.html +0 -46
  193. data/doc/classes/WWW/GlobalForm.src/M000007.html +0 -46
  194. data/doc/classes/WWW/ImageButton.html +0 -157
  195. data/doc/classes/WWW/ImageButton.src/M000027.html +0 -22
  196. data/doc/classes/WWW/Link.html +0 -160
  197. data/doc/classes/WWW/Link.src/M000025.html +0 -20
  198. data/doc/classes/WWW/Mechanize.html +0 -379
  199. data/doc/classes/WWW/Mechanize.src/M000013.html +0 -21
  200. data/doc/classes/WWW/Mechanize.src/M000014.html +0 -18
  201. data/doc/classes/WWW/Mechanize.src/M000015.html +0 -24
  202. data/doc/classes/WWW/Mechanize.src/M000016.html +0 -19
  203. data/doc/classes/WWW/Mechanize.src/M000017.html +0 -23
  204. data/doc/classes/WWW/Mechanize.src/M000018.html +0 -32
  205. data/doc/classes/WWW/Mechanize.src/M000019.html +0 -19
  206. data/doc/classes/WWW/Mechanize.src/M000020.html +0 -18
  207. data/doc/classes/WWW/Mechanize.src/M000021.html +0 -33
  208. data/doc/classes/WWW/Mechanize.src/M000022.html +0 -18
  209. data/doc/classes/WWW/Meta.html +0 -113
  210. data/doc/classes/WWW/Page.html +0 -313
  211. data/doc/classes/WWW/Page.src/M000036.html +0 -18
  212. data/doc/classes/WWW/Page.src/M000037.html +0 -18
  213. data/doc/classes/WWW/Page.src/M000038.html +0 -18
  214. data/doc/classes/WWW/Page.src/M000039.html +0 -19
  215. data/doc/classes/WWW/Page.src/M000040.html +0 -19
  216. data/doc/classes/WWW/Page.src/M000041.html +0 -19
  217. data/doc/classes/WWW/Page.src/M000042.html +0 -19
  218. data/doc/classes/WWW/Page.src/M000043.html +0 -19
  219. data/doc/classes/WWW/RadioButton.html +0 -160
  220. data/doc/classes/WWW/RadioButton.src/M000012.html +0 -18
  221. data/doc/classes/WWW/ResponseCodeError.html +0 -150
  222. data/doc/classes/WWW/ResponseCodeError.src/M000035.html +0 -18
  223. data/doc/classes/WWW/SelectList.html +0 -160
  224. data/doc/classes/WWW/SelectList.src/M000026.html +0 -28
  225. data/doc/classes/WWW.html +0 -130
  226. data/doc/created.rid +0 -1
  227. data/doc/files/CHANGELOG.html +0 -136
  228. data/doc/files/LICENSE.html +0 -531
  229. data/doc/files/README.html +0 -161
  230. data/doc/files/lib/mechanize/cookie_rb.html +0 -101
  231. data/doc/files/lib/mechanize/net-overrides/net/http_rb.html +0 -139
  232. data/doc/files/lib/mechanize/net-overrides/net/https_rb.html +0 -109
  233. data/doc/files/lib/mechanize/net-overrides/net/protocol_rb.html +0 -117
  234. data/doc/files/lib/mechanize/parsing_rb.html +0 -267
  235. data/doc/files/lib/mechanize/parsing_rb.src/M000001.html +0 -22
  236. data/doc/files/lib/mechanize/parsing_rb.src/M000002.html +0 -44
  237. data/doc/files/lib/mechanize/parsing_rb.src/M000003.html +0 -34
  238. data/doc/files/lib/mechanize_rb.html +0 -152
  239. data/doc/fr_class_index.html +0 -80
  240. data/doc/fr_file_index.html +0 -35
  241. data/doc/fr_method_index.html +0 -194
  242. data/doc/index.html +0 -24
  243. data/doc/rdoc-style.css +0 -208
data/EXAMPLES ADDED
@@ -0,0 +1,55 @@
1
+ = WWW::Mechanize examples
2
+
3
+ == Google
4
+ require 'rubygems'
5
+ require 'mechanize'
6
+ require 'logger'
7
+
8
+ agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
9
+ agent.user_agent_alias = 'Mac Safari'
10
+ page = agent.get("http://www.google.com/")
11
+ search_form = page.forms.find { |f| f.name == "f" }
12
+ search_form.fields.find { |f| f.name == "q" }.value = "Hello"
13
+ search_results = agent.submit(search_form)
14
+ puts search_results.body
15
+
16
+ == Rubyforge
17
+ require 'mechanize'
18
+
19
+ agent = WWW::Mechanize.new {|a| a.log = Logger.new(STDERR) }
20
+ page = agent.get('http://rubyforge.org/')
21
+ link = page.links.find {|l| l.node.text =~ /Log In/ }
22
+ page = agent.click(link)
23
+ form = page.forms[1]
24
+ form.fields.find {|f| f.name == 'form_loginname'}.value = ARGV[0]
25
+ form.fields.find {|f| f.name == 'form_pw'}.value = ARGV[1]
26
+ page = agent.submit(form, form.buttons.first)
27
+
28
+ puts page.body
29
+
30
+ == File Upload
31
+ This example uploads one image as two different images to flickr.
32
+
33
+ require 'rubygems'
34
+ require 'mechanize'
35
+
36
+ agent = WWW::Mechanize.new
37
+ page = agent.get('http://flickr.com/signin/flickr/')
38
+ form = page.forms.first
39
+ form.fields.find { |f| f.name == 'email' }.value = ARGV[0]
40
+ form.fields.find { |f| f.name == 'password' }.value = ARGV[1]
41
+ page = agent.submit(form)
42
+ page = agent.click(page.links.find { |l| l.text == 'Upload' })
43
+ form = page.forms.first
44
+ img1 = form.file_uploads.find { |f| f.name == 'file1' }
45
+ img2 = form.file_uploads.find { |f| f.name == 'file2' }
46
+
47
+ img1.file_name = img2.file_name = ARGV[2]
48
+ File.open(ARGV[2], "r") { |f|
49
+ img1.file_data = img2.file_data = f.read
50
+ }
51
+
52
+ img1.mime_type = img2.mime_type = 'image/jpeg'
53
+
54
+ agent.submit(form)
55
+
data/README CHANGED
@@ -20,18 +20,9 @@ Note that the files in the net-overrides/ directory are taken from Ruby 1.9.0.
20
20
 
21
21
  Note that only the ruby-htmltools package bunded with narf works.
22
22
 
23
- == Example
24
- require 'rubygems'
25
- require 'mechanize'
26
- require 'logger'
27
-
28
- agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
29
- agent.user_agent_alias = 'Mac Safari'
30
- page = agent.get("http://www.google.com/")
31
- search_form = page.forms.find { |f| f.name == "f" }
32
- search_form.fields.find { |f| f.name == "q" }.value = "Hello"
33
- search_results = agent.submit(search_form)
34
- puts search_results.body
23
+ == Examples
24
+
25
+ See the EXAMPLES[link://files/EXAMPLES.html] file
35
26
 
36
27
  == Authors
37
28
 
@@ -15,8 +15,7 @@ module WWW
15
15
 
16
16
  expires_key = cookie.keys.find { |k| k.downcase == "expires" }
17
17
  if expires_key
18
- @expires = DateTime.parse(cookie.delete(expires_key),
19
- "%A %d-%b-%y %H:%M:%S %Z")
18
+ @expires = DateTime.parse(cookie.delete(expires_key))
20
19
  end
21
20
 
22
21
  secure_key = cookie.keys.find { |k| k.downcase == "secure" }
@@ -0,0 +1,182 @@
1
+ module WWW
2
+ # Class Form does not work in the case there is some invalid (unbalanced) html
3
+ # involved, such as:
4
+ #
5
+ # <td>
6
+ # <form>
7
+ # </td>
8
+ # <td>
9
+ # <input .../>
10
+ # </form>
11
+ # </td>
12
+ #
13
+ # GlobalForm takes two nodes, the node where the form tag is located
14
+ # (form_node), and another node, from which to start looking for form elements
15
+ # (elements_node) like buttons and the like. For class Form both fall together
16
+ # into one and the same node.
17
+ class GlobalForm
18
+ attr_reader :form_node, :elements_node
19
+ attr_accessor :method, :action, :name
20
+
21
+ attr_reader :fields, :buttons, :file_uploads, :radiobuttons, :checkboxes
22
+ attr_reader :enctype
23
+
24
+ def initialize(form_node, elements_node)
25
+ @form_node, @elements_node = form_node, elements_node
26
+
27
+ @method = (@form_node.attributes['method'] || 'GET').upcase
28
+ @action = @form_node.attributes['action']
29
+ @name = @form_node.attributes['name']
30
+ @enctype = @form_node.attributes['enctype'] || 'application/x-www-form-urlencoded'
31
+
32
+ parse
33
+ end
34
+
35
+ # In the case of malformed HTML, fields of multiple forms might occure in this forms'
36
+ # field array. If the fields have the same name, posterior fields overwrite former fields.
37
+ # To avoid this, this method rejects all posterior duplicate fields.
38
+
39
+ def uniq_fields!
40
+ names_in = {}
41
+ fields.reject! {|f|
42
+ if names_in.include?(f.name)
43
+ true
44
+ else
45
+ names_in[f.name] = true
46
+ false
47
+ end
48
+ }
49
+ end
50
+
51
+ def build_query
52
+ query = {}
53
+
54
+ fields().each do |f|
55
+ query[f.name] = f.value || ""
56
+ end
57
+
58
+ checkboxes().each do |f|
59
+ query[f.name] = f.value || "on" if f.checked
60
+ end
61
+
62
+ radio_groups = {}
63
+ radiobuttons().each do |f|
64
+ radio_groups[f.name] ||= []
65
+ radio_groups[f.name] << f
66
+ end
67
+
68
+ # take one radio button from each group
69
+ radio_groups.each_value do |g|
70
+ checked = g.select {|f| f.checked}
71
+
72
+ if checked.size == 1
73
+ f = checked.first
74
+ query[f.name] = f.value || ""
75
+ elsif checked.size > 1
76
+ raise "multiple radiobuttons are checked in the same group!"
77
+ end
78
+ end
79
+
80
+ query
81
+ end
82
+
83
+ def request_data
84
+ query_params = build_query()
85
+ query = nil
86
+ case @enctype.downcase
87
+ when 'multipart/form-data'
88
+ boundary = rand_string(20)
89
+ @enctype << ", boundary=#{boundary}"
90
+ params = []
91
+ query_params.each { |k,v| params << param_to_multipart(k, v) }
92
+ @file_uploads.each { |f| params << file_to_multipart(f) }
93
+ query = params.collect { |p| "--#{boundary}\r\n#{p}" }.join('') +
94
+ "--#{boundary}--\r\n"
95
+ else
96
+ query = build_query_string(query_params)
97
+ end
98
+
99
+ query
100
+ end
101
+
102
+ def parse
103
+ @fields = []
104
+ @buttons = []
105
+ @file_uploads = []
106
+ @radiobuttons = []
107
+ @checkboxes = []
108
+
109
+ @elements_node.each_recursive {|node|
110
+ case node.name.downcase
111
+ when 'input'
112
+ case (node.attributes['type'] || 'text').downcase
113
+ when 'text', 'password', 'hidden', 'int'
114
+ @fields << Field.new(node.attributes['name'], node.attributes['value'])
115
+ when 'radio'
116
+ @radiobuttons << RadioButton.new(node.attributes['name'], node.attributes['value'], node.attributes.has_key?('checked'))
117
+ when 'checkbox'
118
+ @checkboxes << CheckBox.new(node.attributes['name'], node.attributes['value'], node.attributes.has_key?('checked'))
119
+ when 'file'
120
+ @file_uploads << FileUpload.new(node.attributes['name'], node.attributes['value'])
121
+ when 'submit'
122
+ @buttons << Button.new(node.attributes['name'], node.attributes['value'])
123
+ when 'image'
124
+ @buttons << ImageButton.new(node.attributes['name'], node.attributes['value'])
125
+ end
126
+ when 'textarea'
127
+ @fields << Field.new(node.attributes['name'], node.all_text)
128
+ when 'select'
129
+ @fields << SelectList.new(node.attributes['name'], node)
130
+ end
131
+ }
132
+ end
133
+
134
+ private
135
+ def rand_string(len = 10)
136
+ chars = ("a".."z").to_a + ("A".."Z").to_a
137
+ string = ""
138
+ 1.upto(len) { |i| string << chars[rand(chars.size-1)] }
139
+ string
140
+ end
141
+
142
+ def param_to_multipart(name, value)
143
+ return "Content-Disposition: form-data; name=\"" +
144
+ "#{WEBrick::HTTPUtils.escape_form(name)}\"\r\n" +
145
+ "\r\n#{value}\r\n"
146
+ end
147
+
148
+ def file_to_multipart(file)
149
+ body = "Content-Disposition: form-data; name=\"" +
150
+ "#{WEBrick::HTTPUtils.escape_form(file.name)}\"; " +
151
+ "filename=\"#{file.file_name}\"\r\n" +
152
+ "Content-Transfer-Encoding: binary\r\n"
153
+ if file.mime_type != nil
154
+ body << "Content-Type: #{file.mime_type}\r\n"
155
+ end
156
+
157
+ body << "\r\n#{file.file_data}\r\n"
158
+
159
+ body
160
+ end
161
+
162
+ def build_query_string(hash)
163
+ vals = []
164
+ hash.each_pair do |k,v|
165
+ vals << [
166
+ WEBrick::HTTPUtils.escape_form(k),
167
+ WEBrick::HTTPUtils.escape_form(v)
168
+ ].join("=")
169
+ end
170
+ vals.join("&")
171
+ end
172
+ end
173
+
174
+ class Form < GlobalForm
175
+ attr_reader :node
176
+
177
+ def initialize(node)
178
+ @node = node
179
+ super(@node, @node)
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,105 @@
1
+ module WWW
2
+ class Field
3
+ attr_accessor :name, :value
4
+
5
+ def initialize(name, value)
6
+ @name, @value = name, value
7
+ end
8
+
9
+ # Returns an array of Field objects
10
+ # TODO: is this correct?
11
+ def self.extract_all_from(root_node)
12
+ fields = []
13
+ root_node.each_recursive {|node|
14
+ if (node.name.downcase == 'input' and
15
+ %w(text password hidden checkbox radio int).include?(node.attributes['type'].downcase)) or
16
+ %w(textarea option).include?(node.name.downcase)
17
+ fields << Field.new(node.attributes['name'], node.attributes['value'])
18
+ end
19
+ }
20
+ return fields
21
+ end
22
+ end
23
+
24
+ class FileUpload
25
+ # value is the file-name, not the file-content
26
+ attr_accessor :name
27
+
28
+ attr_accessor :file_name, :file_data, :mime_type
29
+
30
+ def initialize(name, file_name)
31
+ @name, @file_name = name, file_name
32
+ @file_data = nil
33
+ end
34
+ end
35
+
36
+ class Button
37
+ attr_accessor :name, :value
38
+
39
+ def initialize(name, value)
40
+ @name, @value = name, value
41
+ end
42
+
43
+ def add_to_query(query)
44
+ query[@name] = @value || "" if @name
45
+ end
46
+
47
+ # Returns an array of Button objects
48
+ def self.extract_all_from(root_node)
49
+ buttons = []
50
+ root_node.each_recursive {|node|
51
+ if node.name.downcase == 'input' and
52
+ ['submit'].include?(node.attributes['type'].downcase)
53
+ buttons << Button.new(node.attributes['name'], node.attributes['value'])
54
+ end
55
+ }
56
+ return buttons
57
+ end
58
+ end
59
+
60
+ class ImageButton < Button
61
+ attr_accessor :x, :y
62
+
63
+ def add_to_query(query)
64
+ if @name
65
+ query[@name] = @value || ""
66
+ query[@name+".x"] = (@x || "0").to_s
67
+ query[@name+".y"] = (@y || "0").to_s
68
+ end
69
+ end
70
+ end
71
+
72
+ class RadioButton
73
+ attr_accessor :name, :value, :checked
74
+
75
+ def initialize(name, value, checked)
76
+ @name, @value, @checked = name, value, checked
77
+ end
78
+ end
79
+
80
+ class CheckBox
81
+ attr_accessor :name, :value, :checked
82
+
83
+ def initialize(name, value, checked)
84
+ @name, @value, @checked = name, value, checked
85
+ end
86
+ end
87
+
88
+ class SelectList
89
+ attr_accessor :name, :value, :options
90
+
91
+ def initialize(name, node)
92
+ @name = name
93
+ @options = []
94
+
95
+ # parse
96
+ node.each_recursive {|n|
97
+ if n.name.downcase == 'option'
98
+ value = n.attributes['value']
99
+ @options << value
100
+ @value = value if n.attributes['selected']
101
+ end
102
+ }
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,130 @@
1
+ module WWW
2
+ # = Synopsis
3
+ # This class encapsulates a page.
4
+ #
5
+ # == Example
6
+ # require 'rubygems'
7
+ # require 'mechanize'
8
+ # require 'logger'
9
+ #
10
+ # class Body
11
+ # def initialize(node)
12
+ # puts node.attributes['bgcolor']
13
+ # end
14
+ # end
15
+ #
16
+ # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
17
+ # agent.user_agent_alias = 'Mac Safari'
18
+ # page = agent.get("http://www.google.com/")
19
+ # page.watch_for_set = { 'body' => Body }
20
+ #
21
+ # body = page.watches
22
+ class Page
23
+ attr_accessor :uri, :cookies, :response, :body, :code, :watch_for_set
24
+
25
+ def initialize(uri=nil, cookies=[], response=nil, body=nil, code=nil)
26
+ @uri, @cookies, @response, @body, @code = uri, cookies, response, body, code
27
+ end
28
+
29
+ def header
30
+ @response.header
31
+ end
32
+
33
+ def content_type
34
+ header['Content-Type']
35
+ end
36
+
37
+ def forms
38
+ parse_html() unless @forms
39
+ @forms
40
+ end
41
+
42
+ def links
43
+ parse_html() unless @links
44
+ @links
45
+ end
46
+
47
+ def root
48
+ parse_html() unless @root
49
+ @root
50
+ end
51
+
52
+ # This method watches out for a particular tag, and will call back to the
53
+ # class specified for the tag in the watch_for_set method. See the example
54
+ # in this class.
55
+ def watches
56
+ parse_html() unless @watches
57
+ @watches
58
+ end
59
+
60
+ def meta
61
+ parse_html() unless @meta
62
+ @meta
63
+ end
64
+
65
+ def frames
66
+ parse_html() unless @frames
67
+ @frames
68
+ end
69
+
70
+ private
71
+
72
+ def parse_html
73
+ raise "no html" unless content_type() =~ /^text\/html/
74
+
75
+ # construct parser and feed with HTML
76
+ parser = HTMLTree::XMLParser.new
77
+ begin
78
+ parser.feed(@body)
79
+ rescue => ex
80
+ if ex.message =~ /attempted adding second root element to document/ and
81
+ # Put the whole document inside a single root element, which I simply name
82
+ # <root>, just to make the parser happy. It's no longer valid HTML, but
83
+ # without a single root element, it's not valid HTML as well.
84
+
85
+ # TODO: leave a possible doctype definition outside this element.
86
+ parser = HTMLTree::XMLParser.new
87
+ parser.feed("<root>" + @body + "</root>")
88
+ else
89
+ raise
90
+ end
91
+ end
92
+
93
+ @root = parser.document
94
+
95
+ @forms = []
96
+ @links = []
97
+ @meta = []
98
+ @frames = []
99
+ @watches = {}
100
+
101
+ @root.each_recursive {|node|
102
+ name = node.name.downcase
103
+
104
+ case name
105
+ when 'form'
106
+ @forms << Form.new(node)
107
+ when 'a'
108
+ @links << Link.new(node)
109
+ when 'meta'
110
+ equiv = node.attributes['http-equiv']
111
+ content = node.attributes['content']
112
+ if equiv != nil && equiv.downcase == 'refresh'
113
+ if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
114
+ node.attributes['href'] = $1
115
+ @meta << Meta.new(node)
116
+ end
117
+ end
118
+ when 'frame'
119
+ @frames << Frame.new(node)
120
+ else
121
+ if @watch_for_set and @watch_for_set.keys.include?( name )
122
+ @watches[name] = [] unless @watches[name]
123
+ klass = @watch_for_set[name]
124
+ @watches[name] << (klass ? klass.new(node) : node)
125
+ end
126
+ end
127
+ }
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,28 @@
1
+ module WWW
2
+ class Link
3
+ attr_reader :node
4
+ attr_reader :href
5
+ attr_reader :text
6
+
7
+ def initialize(node)
8
+ @node = node
9
+ @href = node.attributes['href']
10
+ @text = node.all_text
11
+ end
12
+ end
13
+
14
+ class Meta < Link
15
+ end
16
+
17
+ class Frame
18
+ attr_reader :node
19
+ attr_reader :name
20
+ attr_reader :src
21
+
22
+ def initialize(node)
23
+ @node = node
24
+ @name = node.attributes['name']
25
+ @src = node.attributes['src']
26
+ end
27
+ end
28
+ end