pstuteville-scrubyt 0.4.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (520) hide show
  1. data/CHANGELOG +355 -0
  2. data/COPYING +340 -0
  3. data/README.rdoc +121 -0
  4. data/Rakefile +120 -0
  5. data/VERSION +1 -0
  6. data/examples/README.txt +1 -0
  7. data/examples/events/delta/input.html +682 -0
  8. data/examples/events/delta/test.rb +16 -0
  9. data/examples/misc/auto_regex/input.html +22 -0
  10. data/examples/misc/auto_regex/test.rb +14 -0
  11. data/examples/misc/compound_example/advanced/test.rb +11 -0
  12. data/examples/misc/compound_example/advanced/tricky_compound.html +9 -0
  13. data/examples/misc/compound_example/regexp/regexp_compound.html +17 -0
  14. data/examples/misc/compound_example/regexp/test.rb +11 -0
  15. data/examples/misc/compound_example/simple/compound.html +5 -0
  16. data/examples/misc/compound_example/simple/test.rb +11 -0
  17. data/examples/misc/detail_page/detailpage.html +6 -0
  18. data/examples/misc/detail_page/index.html +9 -0
  19. data/examples/misc/detail_page/test.rb +17 -0
  20. data/examples/misc/google/test.rb +39 -0
  21. data/examples/misc/identical_examples/data_extractor_export.rb +12 -0
  22. data/examples/misc/identical_examples/input.html +16 -0
  23. data/examples/misc/identical_examples/test.rb +15 -0
  24. data/examples/misc/immediate_attribute_extraction/data_extractor_export.rb +10 -0
  25. data/examples/misc/immediate_attribute_extraction/input.html +16 -0
  26. data/examples/misc/immediate_attribute_extraction/test.rb +14 -0
  27. data/examples/misc/multiple_examples/input.html +7 -0
  28. data/examples/misc/multiple_examples/test.rb +22 -0
  29. data/examples/misc/on_click_next/next_page_link.rb +42 -0
  30. data/examples/misc/on_click_next/page_1.html +10 -0
  31. data/examples/misc/on_click_next/page_2.html +10 -0
  32. data/examples/misc/on_click_next/page_3.html +7 -0
  33. data/examples/misc/rubycorner/test.rb +29 -0
  34. data/examples/misc/rubyforge_login/test.rb +30 -0
  35. data/examples/misc/tables/ambigous_records/input.html +17 -0
  36. data/examples/misc/tables/ambigous_records/test.rb +37 -0
  37. data/examples/misc/tables/another_plain_table/input.html +15 -0
  38. data/examples/misc/tables/another_plain_table/test.rb +25 -0
  39. data/examples/misc/tables/complex_table/input.html +45 -0
  40. data/examples/misc/tables/complex_table/test.rb +30 -0
  41. data/examples/misc/tables/grab_rows/input.html +20 -0
  42. data/examples/misc/tables/grab_rows/test.rb +30 -0
  43. data/examples/misc/tables/plain_table/input.html +39 -0
  44. data/examples/misc/tables/plain_table/test.rb +35 -0
  45. data/examples/misc/tables/plain_table_morepages/2.html +38 -0
  46. data/examples/misc/tables/plain_table_morepages/3.html +33 -0
  47. data/examples/misc/tables/plain_table_morepages/input.html +40 -0
  48. data/examples/misc/tables/plain_table_morepages/test.rb +32 -0
  49. data/examples/misc/tables/plain_table_morepages_with_image/2.html +40 -0
  50. data/examples/misc/tables/plain_table_morepages_with_image/3.html +33 -0
  51. data/examples/misc/tables/plain_table_morepages_with_image/images/right_arrow.png +0 -0
  52. data/examples/misc/tables/plain_table_morepages_with_image/input.html +42 -0
  53. data/examples/misc/tables/plain_table_morepages_with_image/test.rb +32 -0
  54. data/examples/misc/tables/test_select_indices/input.html +46 -0
  55. data/examples/misc/tables/test_select_indices/test.rb +55 -0
  56. data/examples/misc/xpath_example_type/input.html +15 -0
  57. data/examples/misc/xpath_example_type/test.rb +18 -0
  58. data/examples/misc/yahoo_finance/test.rb +26 -0
  59. data/examples/social/blog_comment/test.rb +27 -0
  60. data/examples/social/del.icio.us/test.rb +22 -0
  61. data/examples/social/digg/test.rb +37 -0
  62. data/examples/social/dzone/test.rb +28 -0
  63. data/examples/social/linkedin/linkedin.rb +23 -0
  64. data/examples/social/reddit/test.rb +23 -0
  65. data/examples/tones_extractor_export.rb +23 -0
  66. data/examples/webshops/amazon/002-8212888-3924065.html +5311 -0
  67. data/examples/webshops/amazon/002-8212888-3924065_files/0130796034.jpg +0 -0
  68. data/examples/webshops/amazon/002-8212888-3924065_files/020161622X.jpg +0 -0
  69. data/examples/webshops/amazon/002-8212888-3924065_files/0321223675.jpg +0 -0
  70. data/examples/webshops/amazon/002-8212888-3924065_files/0465067107.jpg +0 -0
  71. data/examples/webshops/amazon/002-8212888-3924065_files/0470069155.jpg +0 -0
  72. data/examples/webshops/amazon/002-8212888-3924065_files/0470081201.jpg +0 -0
  73. data/examples/webshops/amazon/002-8212888-3924065_files/0596005253.jpg +0 -0
  74. data/examples/webshops/amazon/002-8212888-3924065_files/0596101325.jpg +0 -0
  75. data/examples/webshops/amazon/002-8212888-3924065_files/0596523696.jpg +0 -0
  76. data/examples/webshops/amazon/002-8212888-3924065_files/0672328844.jpg +0 -0
  77. data/examples/webshops/amazon/002-8212888-3924065_files/0764596861.jpg +0 -0
  78. data/examples/webshops/amazon/002-8212888-3924065_files/0974514055.jpg +0 -0
  79. data/examples/webshops/amazon/002-8212888-3924065_files/0976694069.jpg +0 -0
  80. data/examples/webshops/amazon/002-8212888-3924065_files/0977616606.jpg +0 -0
  81. data/examples/webshops/amazon/002-8212888-3924065_files/0977616614.jpg +0 -0
  82. data/examples/webshops/amazon/002-8212888-3924065_files/0977616630.jpg +0 -0
  83. data/examples/webshops/amazon/002-8212888-3924065_files/1590597362.jpg +0 -0
  84. data/examples/webshops/amazon/002-8212888-3924065_files/1594480060.jpg +0 -0
  85. data/examples/webshops/amazon/002-8212888-3924065_files/1932394699.jpg +0 -0
  86. data/examples/webshops/amazon/002-8212888-3924065_files/2841772101.jpg +0 -0
  87. data/examples/webshops/amazon/002-8212888-3924065_files/amzn-logo-118w.gif +0 -0
  88. data/examples/webshops/amazon/002-8212888-3924065_files/askville-adwidget-bullet.gif +0 -0
  89. data/examples/webshops/amazon/002-8212888-3924065_files/askville-logo-sm-adwidget-white-bg.gif +0 -0
  90. data/examples/webshops/amazon/002-8212888-3924065_files/book_display_on_website-icon.gif +0 -0
  91. data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no-ns.gif +0 -0
  92. data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no.gif +0 -0
  93. data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes-ns.gif +0 -0
  94. data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes.gif +0 -0
  95. data/examples/webshops/amazon/002-8212888-3924065_files/btn-no-tiny.gif +0 -0
  96. data/examples/webshops/amazon/002-8212888-3924065_files/btn-yes-tiny.gif +0 -0
  97. data/examples/webshops/amazon/002-8212888-3924065_files/buybox-button-find-gifts-a.gif +0 -0
  98. data/examples/webshops/amazon/002-8212888-3924065_files/c7y_badge_rn_1.gif +0 -0
  99. data/examples/webshops/amazon/002-8212888-3924065_files/cap-a9-3.gif +0 -0
  100. data/examples/webshops/amazon/002-8212888-3924065_files/drop-down-icon-small-arrow.gif +0 -0
  101. data/examples/webshops/amazon/002-8212888-3924065_files/endcap-a9-go-2.gif +0 -0
  102. data/examples/webshops/amazon/002-8212888-3924065_files/gb-open-new.gif +0 -0
  103. data/examples/webshops/amazon/002-8212888-3924065_files/gc-logo-popover-a.gif +0 -0
  104. data/examples/webshops/amazon/002-8212888-3924065_files/gift-cert-roto-pop-a.gif +0 -0
  105. data/examples/webshops/amazon/002-8212888-3924065_files/go-button-books.gif +0 -0
  106. data/examples/webshops/amazon/002-8212888-3924065_files/go-button.gif +0 -0
  107. data/examples/webshops/amazon/002-8212888-3924065_files/go-orange-trans.gif +0 -0
  108. data/examples/webshops/amazon/002-8212888-3924065_files/go_button_photo.gif +0 -0
  109. data/examples/webshops/amazon/002-8212888-3924065_files/logo-off.gif +0 -0
  110. data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-events-18134.js +1407 -0
  111. data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-n2v1-57871.css +364 -0
  112. data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-simplePopover-41153.js +749 -0
  113. data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-utilities-25439.js +1608 -0
  114. data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow.gif +0 -0
  115. data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow_002.gif +0 -0
  116. data/examples/webshops/amazon/002-8212888-3924065_files/popover-blurb.gif +0 -0
  117. data/examples/webshops/amazon/002-8212888-3924065_files/powered-by-a9.gif +0 -0
  118. data/examples/webshops/amazon/002-8212888-3924065_files/stars-3-5.gif +0 -0
  119. data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-0.gif +0 -0
  120. data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-5.gif +0 -0
  121. data/examples/webshops/amazon/002-8212888-3924065_files/stars-5-0.gif +0 -0
  122. data/examples/webshops/amazon/002-8212888-3924065_files/tagline-adwidget.gif +0 -0
  123. data/examples/webshops/amazon/002-8212888-3924065_files/topnav-cart.gif +0 -0
  124. data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel.gif +0 -0
  125. data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel_002.gif +0 -0
  126. data/examples/webshops/amazon/test.rb +27 -0
  127. data/examples/webshops/amazon-online/test.rb +34 -0
  128. data/examples/webshops/barnes_and_noble/test.rb +32 -0
  129. data/examples/webshops/barnes_offline/barnes_and_noble.html +115 -0
  130. data/examples/webshops/barnes_offline/barnes_and_noble_files/10964080.gif +0 -0
  131. data/examples/webshops/barnes_offline/barnes_and_noble_files/10999676.gif +0 -0
  132. data/examples/webshops/barnes_offline/barnes_and_noble_files/11018492.gif +0 -0
  133. data/examples/webshops/barnes_offline/barnes_and_noble_files/11656534.gif +0 -0
  134. data/examples/webshops/barnes_offline/barnes_and_noble_files/11985045.gif +0 -0
  135. data/examples/webshops/barnes_offline/barnes_and_noble_files/12052378.gif +0 -0
  136. data/examples/webshops/barnes_offline/barnes_and_noble_files/12138286.gif +0 -0
  137. data/examples/webshops/barnes_offline/barnes_and_noble_files/12533212.gif +0 -0
  138. data/examples/webshops/barnes_offline/barnes_and_noble_files/12533268.gif +0 -0
  139. data/examples/webshops/barnes_offline/barnes_and_noble_files/9583392.gif +0 -0
  140. data/examples/webshops/barnes_offline/barnes_and_noble_files/SearchProduct.css +626 -0
  141. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin3_gtpointup.gif +0 -0
  142. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_aboutshipping.gif +0 -0
  143. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_account.gif +0 -0
  144. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_colon.gif +0 -0
  145. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_giftreminder.gif +0 -0
  146. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_help.gif +0 -0
  147. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_orderstatus.gif +0 -0
  148. data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_wishlist.gif +0 -0
  149. data/examples/webshops/barnes_offline/barnes_and_noble_files/bg.gif +0 -0
  150. data/examples/webshops/barnes_offline/barnes_and_noble_files/btnGoGrn.gif +0 -0
  151. data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot.gif +0 -0
  152. data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot_002.gif +0 -0
  153. data/examples/webshops/barnes_offline/barnes_and_noble_files/dot4.gif +0 -0
  154. data/examples/webshops/barnes_offline/barnes_and_noble_files/dotGold20.gif +0 -0
  155. data/examples/webshops/barnes_offline/barnes_and_noble_files/hdCantFind.gif +0 -0
  156. data/examples/webshops/barnes_offline/barnes_and_noble_files/hdSearchResults.gif +0 -0
  157. data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_home_cold.gif +0 -0
  158. data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_toy_cold.gif +0 -0
  159. data/examples/webshops/barnes_offline/barnes_and_noble_files/iframeKMP.js +172 -0
  160. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2.html +25 -0
  161. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/070226_mc_lnav_search.gif +0 -0
  162. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XmlUtil.js +199 -0
  163. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XslStyleSheet.js +1 -0
  164. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/kmp_gen.css +81 -0
  165. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview-core.js +337 -0
  166. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview.css +36 -0
  167. data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/productpreview.js +11 -0
  168. data/examples/webshops/barnes_offline/barnes_and_noble_files/linePagination.gif +0 -0
  169. data/examples/webshops/barnes_offline/barnes_and_noble_files/logo_bn05.gif +0 -0
  170. data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar.js +34 -0
  171. data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar_06.css +136 -0
  172. data/examples/webshops/barnes_offline/barnes_and_noble_files/popup_open.js +116 -0
  173. data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch3_vline_dots.gif +0 -0
  174. data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch4_search.gif +0 -0
  175. data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch_AdvSearch.jpg +0 -0
  176. data/examples/webshops/barnes_offline/barnes_and_noble_files/subnav_colon.gif +0 -0
  177. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_Bookclubs_cold.gif +0 -0
  178. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_bnjr_cold.gif +0 -0
  179. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_books_hot.gif +0 -0
  180. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_dvd_cold.gif +0 -0
  181. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_giftcards_cold.gif +0 -0
  182. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_home_cold.gif +0 -0
  183. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_member_cc_cold.gif +0 -0
  184. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_music_cold.gif +0 -0
  185. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_pipe.gif +0 -0
  186. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_textbooksonly_cold.gif +0 -0
  187. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_usedoop_cold.gif +0 -0
  188. data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_videogames_cold.gif +0 -0
  189. data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo3_rule.gif +0 -0
  190. data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo_fastfree05.gif +0 -0
  191. data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_btn_checkout.gif +0 -0
  192. data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_icon_cart.gif +0 -0
  193. data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_topbot_rule.gif +0 -0
  194. data/examples/webshops/barnes_offline/barnes_and_noble_files/visualcart_prodid.js +401 -0
  195. data/examples/webshops/barnes_offline/test.rb +30 -0
  196. data/examples/webshops/buydig/2_files/03AA1BB9089A4A6A92CF23F280D664EB.jpg +0 -0
  197. data/examples/webshops/buydig/2_files/1008.gif +0 -0
  198. data/examples/webshops/buydig/2_files/1013.gif +0 -0
  199. data/examples/webshops/buydig/2_files/1020.gif +0 -0
  200. data/examples/webshops/buydig/2_files/106CF2FB84B446518397517C3E6D5AD8.jpg +0 -0
  201. data/examples/webshops/buydig/2_files/13-www.gif +0 -0
  202. data/examples/webshops/buydig/2_files/1E9BB2E56AB145FC9D6EF952703AF476.jpg +0 -0
  203. data/examples/webshops/buydig/2_files/1FCDFBE85CDB4D429EC2C8CB24D20457.jpg +0 -0
  204. data/examples/webshops/buydig/2_files/1pix.gif +0 -0
  205. data/examples/webshops/buydig/2_files/2014.gif +0 -0
  206. data/examples/webshops/buydig/2_files/2089.gif +0 -0
  207. data/examples/webshops/buydig/2_files/24992_medal.gif +0 -0
  208. data/examples/webshops/buydig/2_files/24BBCBA1397F4DDCBBBBE8456D6D6E5B.jpg +0 -0
  209. data/examples/webshops/buydig/2_files/281F8A6019B140F38DFD45EB7B69B0FB.jpg +0 -0
  210. data/examples/webshops/buydig/2_files/2975F866CB2149F7ACBC559C8E24E304.jpg +0 -0
  211. data/examples/webshops/buydig/2_files/316FC9256DC9460ABC3C5ECAF6C60286.jpg +0 -0
  212. data/examples/webshops/buydig/2_files/50569327D8B94252B95E449AE470E505.jpg +0 -0
  213. data/examples/webshops/buydig/2_files/519CDAB404FA4543B76B5F281468ACBF.jpg +0 -0
  214. data/examples/webshops/buydig/2_files/57D6146419A647BA89C96AF0B5CAB03C.jpg +0 -0
  215. data/examples/webshops/buydig/2_files/58E3F988E184448B8C0A59874AE123A8.jpg +0 -0
  216. data/examples/webshops/buydig/2_files/5E5B10197A4E4C9B9ECCD6309DBE4C54.jpg +0 -0
  217. data/examples/webshops/buydig/2_files/609A249177D04065B37B9161CB0BC92D.jpg +0 -0
  218. data/examples/webshops/buydig/2_files/676CEE8E53C2445982E991871B4DF613.jpg +0 -0
  219. data/examples/webshops/buydig/2_files/712BA08FAB524A31A76ABB9E2009FF8E.jpg +0 -0
  220. data/examples/webshops/buydig/2_files/734BD08D7A5049339393166491D09D21.jpg +0 -0
  221. data/examples/webshops/buydig/2_files/751E72B7003343248497FE6905F80787.jpg +0 -0
  222. data/examples/webshops/buydig/2_files/76493D4F02F14EF7B5886510604C7BB4.jpg +0 -0
  223. data/examples/webshops/buydig/2_files/79521E251278486DB29529C60C9D012A.jpg +0 -0
  224. data/examples/webshops/buydig/2_files/9C9AF82AC3B54BDC8C705278B50FDFD6.jpg +0 -0
  225. data/examples/webshops/buydig/2_files/BC3FD8307B9948FDB7EEF156D8629C37.jpg +0 -0
  226. data/examples/webshops/buydig/2_files/C0DD4574765047D1836F505E69DC8AE5.jpg +0 -0
  227. data/examples/webshops/buydig/2_files/C143F48515274A44B04F4B3E46306BD2.jpg +0 -0
  228. data/examples/webshops/buydig/2_files/C6B02E88F729464699DB275D140F4563.jpg +0 -0
  229. data/examples/webshops/buydig/2_files/CE334D6206DB4FA9AFDF339AEF0AF50F.jpg +0 -0
  230. data/examples/webshops/buydig/2_files/D66AE0DC865A4021AB300ED3A0C4CD11.jpg +0 -0
  231. data/examples/webshops/buydig/2_files/DEA2EC2093DC474D96B651068576DAE5.jpg +0 -0
  232. data/examples/webshops/buydig/2_files/F547677D83844042BF13A4BE6523BB50.jpg +0 -0
  233. data/examples/webshops/buydig/2_files/Rbbbonlineseal.gif +0 -0
  234. data/examples/webshops/buydig/2_files/TopSellers_bottom.gif +0 -0
  235. data/examples/webshops/buydig/2_files/TopSellers_ttl.gif +0 -0
  236. data/examples/webshops/buydig/2_files/addToFavorites_ttl.gif +0 -0
  237. data/examples/webshops/buydig/2_files/banner_CorporateSales.gif +0 -0
  238. data/examples/webshops/buydig/2_files/banner_Shipping.gif +0 -0
  239. data/examples/webshops/buydig/2_files/bizratehonoree.gif +0 -0
  240. data/examples/webshops/buydig/2_files/btn_submit.gif +0 -0
  241. data/examples/webshops/buydig/2_files/checkFlash.js +110 -0
  242. data/examples/webshops/buydig/2_files/checkFlash2.js +109 -0
  243. data/examples/webshops/buydig/2_files/cnetcertified.gif +0 -0
  244. data/examples/webshops/buydig/2_files/credPriceGrabber.gif +0 -0
  245. data/examples/webshops/buydig/2_files/credShopping.gif +0 -0
  246. data/examples/webshops/buydig/2_files/credential_paypal.gif +0 -0
  247. data/examples/webshops/buydig/2_files/credentials.gif +0 -0
  248. data/examples/webshops/buydig/2_files/dealtime.gif +0 -0
  249. data/examples/webshops/buydig/2_files/dvxstyle.css +754 -0
  250. data/examples/webshops/buydig/2_files/footer_021306_1_v1.gif +0 -0
  251. data/examples/webshops/buydig/2_files/func.js +132 -0
  252. data/examples/webshops/buydig/2_files/getseal +1 -0
  253. data/examples/webshops/buydig/2_files/help.gif +0 -0
  254. data/examples/webshops/buydig/2_files/home.gif +0 -0
  255. data/examples/webshops/buydig/2_files/java.js +155 -0
  256. data/examples/webshops/buydig/2_files/leftnv_help.gif +0 -0
  257. data/examples/webshops/buydig/2_files/logo.gif +0 -0
  258. data/examples/webshops/buydig/2_files/logo2.gif +0 -0
  259. data/examples/webshops/buydig/2_files/logo3.gif +0 -0
  260. data/examples/webshops/buydig/2_files/main.js +227 -0
  261. data/examples/webshops/buydig/2_files/mastercard_secured.gif +0 -0
  262. data/examples/webshops/buydig/2_files/newsBox_bkg.jpg +0 -0
  263. data/examples/webshops/buydig/2_files/newsBox_bottom.jpg +0 -0
  264. data/examples/webshops/buydig/2_files/newsBox_text.gif +0 -0
  265. data/examples/webshops/buydig/2_files/newsBox_ttl.jpg +0 -0
  266. data/examples/webshops/buydig/2_files/noimage75.gif +0 -0
  267. data/examples/webshops/buydig/2_files/orangeleftfrmbtm.gif +0 -0
  268. data/examples/webshops/buydig/2_files/pixel153.gif +0 -0
  269. data/examples/webshops/buydig/2_files/rightnv_bottom.gif +0 -0
  270. data/examples/webshops/buydig/2_files/search_btn_off.gif +0 -0
  271. data/examples/webshops/buydig/2_files/search_c1.gif +0 -0
  272. data/examples/webshops/buydig/2_files/search_c2.gif +0 -0
  273. data/examples/webshops/buydig/2_files/search_c3.gif +0 -0
  274. data/examples/webshops/buydig/2_files/search_c4.gif +0 -0
  275. data/examples/webshops/buydig/2_files/search_down.gif +0 -0
  276. data/examples/webshops/buydig/2_files/search_left.gif +0 -0
  277. data/examples/webshops/buydig/2_files/search_right.gif +0 -0
  278. data/examples/webshops/buydig/2_files/search_top.gif +0 -0
  279. data/examples/webshops/buydig/2_files/siteLinks_bottom.gif +0 -0
  280. data/examples/webshops/buydig/2_files/siteLinks_bullet.gif +0 -0
  281. data/examples/webshops/buydig/2_files/siteLinks_ttl.gif +0 -0
  282. data/examples/webshops/buydig/2_files/spacer.gif +0 -0
  283. data/examples/webshops/buydig/2_files/style.js +45 -0
  284. data/examples/webshops/buydig/2_files/styles.html +33 -0
  285. data/examples/webshops/buydig/2_files/track_orders.jpg +0 -0
  286. data/examples/webshops/buydig/2_files/urchin +534 -0
  287. data/examples/webshops/buydig/2_files/verified_by_visa.gif +0 -0
  288. data/examples/webshops/buydig/2_files/welcome.gif +0 -0
  289. data/examples/webshops/buydig/2_files/welcome_ttl.gif +0 -0
  290. data/examples/webshops/buydig/2_files/yahoologo.gif +0 -0
  291. data/examples/webshops/buydig/input.html +1194 -0
  292. data/examples/webshops/buydig/test.rb +31 -0
  293. data/examples/webshops/ebay/test.rb +32 -0
  294. data/examples/webshops/finewines_offline/_finewines.html +1739 -0
  295. data/examples/webshops/finewines_offline/_finewines_cut.html +371 -0
  296. data/examples/webshops/finewines_offline/_finewines_files/011064.jpg +0 -0
  297. data/examples/webshops/finewines_offline/_finewines_files/012674.jpg +0 -0
  298. data/examples/webshops/finewines_offline/_finewines_files/013268.jpg +0 -0
  299. data/examples/webshops/finewines_offline/_finewines_files/013300.jpg +0 -0
  300. data/examples/webshops/finewines_offline/_finewines_files/013409.jpg +0 -0
  301. data/examples/webshops/finewines_offline/_finewines_files/014340.jpg +0 -0
  302. data/examples/webshops/finewines_offline/_finewines_files/015073.jpg +0 -0
  303. data/examples/webshops/finewines_offline/_finewines_files/015255.jpg +0 -0
  304. data/examples/webshops/finewines_offline/_finewines_files/015479.jpg +0 -0
  305. data/examples/webshops/finewines_offline/_finewines_files/015487.jpg +0 -0
  306. data/examples/webshops/finewines_offline/_finewines_files/017038.jpg +0 -0
  307. data/examples/webshops/finewines_offline/_finewines_files/017129.jpg +0 -0
  308. data/examples/webshops/finewines_offline/_finewines_files/017145.jpg +0 -0
  309. data/examples/webshops/finewines_offline/_finewines_files/017152.jpg +0 -0
  310. data/examples/webshops/finewines_offline/_finewines_files/017285.jpg +0 -0
  311. data/examples/webshops/finewines_offline/_finewines_files/017392.jpg +0 -0
  312. data/examples/webshops/finewines_offline/_finewines_files/017400.jpg +0 -0
  313. data/examples/webshops/finewines_offline/_finewines_files/019778.jpg +0 -0
  314. data/examples/webshops/finewines_offline/_finewines_files/019786.jpg +0 -0
  315. data/examples/webshops/finewines_offline/_finewines_files/020503.jpg +0 -0
  316. data/examples/webshops/finewines_offline/_finewines_files/021253.jpg +0 -0
  317. data/examples/webshops/finewines_offline/_finewines_files/021279.jpg +0 -0
  318. data/examples/webshops/finewines_offline/_finewines_files/021337.jpg +0 -0
  319. data/examples/webshops/finewines_offline/_finewines_files/021352.jpg +0 -0
  320. data/examples/webshops/finewines_offline/_finewines_files/023002.jpg +0 -0
  321. data/examples/webshops/finewines_offline/_finewines_files/023135.jpg +0 -0
  322. data/examples/webshops/finewines_offline/_finewines_files/023143.jpg +0 -0
  323. data/examples/webshops/finewines_offline/_finewines_files/023788.jpg +0 -0
  324. data/examples/webshops/finewines_offline/_finewines_files/024166.jpg +0 -0
  325. data/examples/webshops/finewines_offline/_finewines_files/024182.jpg +0 -0
  326. data/examples/webshops/finewines_offline/_finewines_files/024216.jpg +0 -0
  327. data/examples/webshops/finewines_offline/_finewines_files/027268.jpg +0 -0
  328. data/examples/webshops/finewines_offline/_finewines_files/027516.jpg +0 -0
  329. data/examples/webshops/finewines_offline/_finewines_files/027862.jpg +0 -0
  330. data/examples/webshops/finewines_offline/_finewines_files/028118.jpg +0 -0
  331. data/examples/webshops/finewines_offline/_finewines_files/028936.jpg +0 -0
  332. data/examples/webshops/finewines_offline/_finewines_files/033894.jpg +0 -0
  333. data/examples/webshops/finewines_offline/_finewines_files/033902.jpg +0 -0
  334. data/examples/webshops/finewines_offline/_finewines_files/033910.jpg +0 -0
  335. data/examples/webshops/finewines_offline/_finewines_files/033936.jpg +0 -0
  336. data/examples/webshops/finewines_offline/_finewines_files/033944.jpg +0 -0
  337. data/examples/webshops/finewines_offline/_finewines_files/033951.jpg +0 -0
  338. data/examples/webshops/finewines_offline/_finewines_files/034553.jpg +0 -0
  339. data/examples/webshops/finewines_offline/_finewines_files/034561.jpg +0 -0
  340. data/examples/webshops/finewines_offline/_finewines_files/232439.jpg +0 -0
  341. data/examples/webshops/finewines_offline/_finewines_files/237834.jpg +0 -0
  342. data/examples/webshops/finewines_offline/_finewines_files/268359.jpg +0 -0
  343. data/examples/webshops/finewines_offline/_finewines_files/289082.jpg +0 -0
  344. data/examples/webshops/finewines_offline/_finewines_files/331603.jpg +0 -0
  345. data/examples/webshops/finewines_offline/_finewines_files/369686.jpg +0 -0
  346. data/examples/webshops/finewines_offline/_finewines_files/420257.jpg +0 -0
  347. data/examples/webshops/finewines_offline/_finewines_files/422014.jpg +0 -0
  348. data/examples/webshops/finewines_offline/_finewines_files/460410.jpg +0 -0
  349. data/examples/webshops/finewines_offline/_finewines_files/480533.jpg +0 -0
  350. data/examples/webshops/finewines_offline/_finewines_files/556795.jpg +0 -0
  351. data/examples/webshops/finewines_offline/_finewines_files/597054.jpg +0 -0
  352. data/examples/webshops/finewines_offline/_finewines_files/650606.jpg +0 -0
  353. data/examples/webshops/finewines_offline/_finewines_files/652628.jpg +0 -0
  354. data/examples/webshops/finewines_offline/_finewines_files/653790.jpg +0 -0
  355. data/examples/webshops/finewines_offline/_finewines_files/658450.jpg +0 -0
  356. data/examples/webshops/finewines_offline/_finewines_files/660027.jpg +0 -0
  357. data/examples/webshops/finewines_offline/_finewines_files/660951.jpg +0 -0
  358. data/examples/webshops/finewines_offline/_finewines_files/684514.jpg +0 -0
  359. data/examples/webshops/finewines_offline/_finewines_files/685131.jpg +0 -0
  360. data/examples/webshops/finewines_offline/_finewines_files/686865.jpg +0 -0
  361. data/examples/webshops/finewines_offline/_finewines_files/699330.jpg +0 -0
  362. data/examples/webshops/finewines_offline/_finewines_files/703017.jpg +0 -0
  363. data/examples/webshops/finewines_offline/_finewines_files/703140.jpg +0 -0
  364. data/examples/webshops/finewines_offline/_finewines_files/703850.jpg +0 -0
  365. data/examples/webshops/finewines_offline/_finewines_files/717306.jpg +0 -0
  366. data/examples/webshops/finewines_offline/_finewines_files/900274.jpg +0 -0
  367. data/examples/webshops/finewines_offline/_finewines_files/938225.jpg +0 -0
  368. data/examples/webshops/finewines_offline/_finewines_files/947440.jpg +0 -0
  369. data/examples/webshops/finewines_offline/_finewines_files/951319.jpg +0 -0
  370. data/examples/webshops/finewines_offline/_finewines_files/967893.jpg +0 -0
  371. data/examples/webshops/finewines_offline/_finewines_files/981407.jpg +0 -0
  372. data/examples/webshops/finewines_offline/_finewines_files/981613.jpg +0 -0
  373. data/examples/webshops/finewines_offline/_finewines_files/982421.jpg +0 -0
  374. data/examples/webshops/finewines_offline/_finewines_files/985598.jpg +0 -0
  375. data/examples/webshops/finewines_offline/_finewines_files/986737.jpg +0 -0
  376. data/examples/webshops/finewines_offline/_finewines_files/987503.jpg +0 -0
  377. data/examples/webshops/finewines_offline/_finewines_files/992800.jpg +0 -0
  378. data/examples/webshops/finewines_offline/_finewines_files/VintageslogoEN.gif +0 -0
  379. data/examples/webshops/finewines_offline/_finewines_files/blanc-up.gif +0 -0
  380. data/examples/webshops/finewines_offline/_finewines_files/btn_vintages_latest.gif +0 -0
  381. data/examples/webshops/finewines_offline/_finewines_files/cc_en.gif +0 -0
  382. data/examples/webshops/finewines_offline/_finewines_files/common.js +860 -0
  383. data/examples/webshops/finewines_offline/_finewines_files/drink.gif +0 -0
  384. data/examples/webshops/finewines_offline/_finewines_files/drinkhold.gif +0 -0
  385. data/examples/webshops/finewines_offline/_finewines_files/ec_en.gif +0 -0
  386. data/examples/webshops/finewines_offline/_finewines_files/ev_en.gif +0 -0
  387. data/examples/webshops/finewines_offline/_finewines_files/hold.gif +0 -0
  388. data/examples/webshops/finewines_offline/_finewines_files/index-wines-features.jpg +0 -0
  389. data/examples/webshops/finewines_offline/_finewines_files/indexSTYLE.css +398 -0
  390. data/examples/webshops/finewines_offline/_finewines_files/keyword_search.gif +0 -0
  391. data/examples/webshops/finewines_offline/_finewines_files/mm_menu.js +1 -0
  392. data/examples/webshops/finewines_offline/_finewines_files/nr_en.gif +0 -0
  393. data/examples/webshops/finewines_offline/_finewines_files/ontario_en.gif +0 -0
  394. data/examples/webshops/finewines_offline/_finewines_files/popup.js +81 -0
  395. data/examples/webshops/finewines_offline/_finewines_files/releases_nav.js +229 -0
  396. data/examples/webshops/finewines_offline/_finewines_files/so_en.gif +0 -0
  397. data/examples/webshops/finewines_offline/_finewines_files/spacer.gif +0 -0
  398. data/examples/webshops/finewines_offline/_finewines_files/top.gif +0 -0
  399. data/examples/webshops/finewines_offline/_finewines_files/urchin.js +576 -0
  400. data/examples/webshops/finewines_offline/_finewines_files/wom_en.gif +0 -0
  401. data/examples/webshops/finewines_offline/test.rb +30 -0
  402. data/examples/webshops/us1camera/1_files/1pix.gif +0 -0
  403. data/examples/webshops/us1camera/1_files/1pix_002.gif +0 -0
  404. data/examples/webshops/us1camera/1_files/CnetCertified.gif +0 -0
  405. data/examples/webshops/us1camera/1_files/CyberSource.gif +0 -0
  406. data/examples/webshops/us1camera/1_files/Images50.gif +0 -0
  407. data/examples/webshops/us1camera/1_files/Images50_002.gif +0 -0
  408. data/examples/webshops/us1camera/1_files/Images50_003.gif +0 -0
  409. data/examples/webshops/us1camera/1_files/Images50_004.gif +0 -0
  410. data/examples/webshops/us1camera/1_files/Images50_005.gif +0 -0
  411. data/examples/webshops/us1camera/1_files/Images50_006.gif +0 -0
  412. data/examples/webshops/us1camera/1_files/PriceGrabber.gif +0 -0
  413. data/examples/webshops/us1camera/1_files/QSearch.gif +0 -0
  414. data/examples/webshops/us1camera/1_files/ban-m.jpg +0 -0
  415. data/examples/webshops/us1camera/1_files/banner1.bin +0 -0
  416. data/examples/webshops/us1camera/1_files/banner3.bin +0 -0
  417. data/examples/webshops/us1camera/1_files/block1.jpg +0 -0
  418. data/examples/webshops/us1camera/1_files/block2.jpg +0 -0
  419. data/examples/webshops/us1camera/1_files/block3.jpg +0 -0
  420. data/examples/webshops/us1camera/1_files/block4.jpg +0 -0
  421. data/examples/webshops/us1camera/1_files/block5.jpg +0 -0
  422. data/examples/webshops/us1camera/1_files/block6.jpg +0 -0
  423. data/examples/webshops/us1camera/1_files/bos.js +280 -0
  424. data/examples/webshops/us1camera/1_files/box1.jpg +0 -0
  425. data/examples/webshops/us1camera/1_files/box2.jpg +0 -0
  426. data/examples/webshops/us1camera/1_files/box3.jpg +0 -0
  427. data/examples/webshops/us1camera/1_files/box4.jpg +0 -0
  428. data/examples/webshops/us1camera/1_files/dot.jpg +0 -0
  429. data/examples/webshops/us1camera/1_files/eDevix.gif +0 -0
  430. data/examples/webshops/us1camera/1_files/electronics1.jpg +0 -0
  431. data/examples/webshops/us1camera/1_files/getseal +1 -0
  432. data/examples/webshops/us1camera/1_files/pride.jpg +0 -0
  433. data/examples/webshops/us1camera/1_files/search.jpg +0 -0
  434. data/examples/webshops/us1camera/1_files/sidebutton.jpg +0 -0
  435. data/examples/webshops/us1camera/1_files/sslroilogic.js +49 -0
  436. data/examples/webshops/us1camera/1_files/style.css +1 -0
  437. data/examples/webshops/us1camera/1_files/tl.html +2 -0
  438. data/examples/webshops/us1camera/input.html +548 -0
  439. data/examples/webshops/us1camera/test.rb +37 -0
  440. data/lib/scrubyt/core/navigation/agents/firewatir.rb +285 -0
  441. data/lib/scrubyt/core/navigation/agents/mechanize.rb +315 -0
  442. data/lib/scrubyt/core/navigation/fetch_action.rb +63 -0
  443. data/lib/scrubyt/core/navigation/navigation_actions.rb +107 -0
  444. data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
  445. data/lib/scrubyt/core/scraping/constraint.rb +169 -0
  446. data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
  447. data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
  448. data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
  449. data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
  450. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
  451. data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
  452. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
  453. data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
  454. data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
  455. data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
  456. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
  457. data/lib/scrubyt/core/scraping/pattern.rb +359 -0
  458. data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
  459. data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
  460. data/lib/scrubyt/core/shared/extractor.rb +171 -0
  461. data/lib/scrubyt/logging.rb +154 -0
  462. data/lib/scrubyt/output/post_processor.rb +139 -0
  463. data/lib/scrubyt/output/result.rb +44 -0
  464. data/lib/scrubyt/output/result_dumper.rb +154 -0
  465. data/lib/scrubyt/output/result_node.rb +145 -0
  466. data/lib/scrubyt/output/scrubyt_result.rb +42 -0
  467. data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
  468. data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
  469. data/lib/scrubyt/utils/shared_utils.rb +58 -0
  470. data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
  471. data/lib/scrubyt/utils/xpathutils.rb +202 -0
  472. data/lib/scrubyt.rb +53 -0
  473. data/pkg/scrubyt-0.4.31.gem +0 -0
  474. data/resources/allison/LICENSE +184 -0
  475. data/resources/allison/README +37 -0
  476. data/resources/allison/allison.css +301 -0
  477. data/resources/allison/allison.gif +0 -0
  478. data/resources/allison/allison.js +307 -0
  479. data/resources/allison/allison.rb +287 -0
  480. data/resources/allison/cache/BODY +588 -0
  481. data/resources/allison/cache/CLASS_INDEX +4 -0
  482. data/resources/allison/cache/CLASS_PAGE +1 -0
  483. data/resources/allison/cache/FILE_INDEX +4 -0
  484. data/resources/allison/cache/FILE_PAGE +1 -0
  485. data/resources/allison/cache/FONTS +1 -0
  486. data/resources/allison/cache/FR_INDEX_BODY +1 -0
  487. data/resources/allison/cache/IMGPATH +1 -0
  488. data/resources/allison/cache/INDEX +1 -0
  489. data/resources/allison/cache/JAVASCRIPT +307 -0
  490. data/resources/allison/cache/METHOD_INDEX +4 -0
  491. data/resources/allison/cache/METHOD_LIST +1 -0
  492. data/resources/allison/cache/SRC_PAGE +1 -0
  493. data/resources/allison/cache/STYLE +323 -0
  494. data/resources/allison/cache/URL +1 -0
  495. data/scrubyt.gemspec +609 -0
  496. data/test/blackbox_test.rb +60 -0
  497. data/test/blackbox_tests/basic/multi_root.expected.xml +8 -0
  498. data/test/blackbox_tests/basic/multi_root.rb +6 -0
  499. data/test/blackbox_tests/basic/simple.expected.xml +5 -0
  500. data/test/blackbox_tests/basic/simple.rb +5 -0
  501. data/test/blackbox_tests/basic/three_divs.html +12 -0
  502. data/test/blackbox_tests/detail_page/detail_page_1.html +7 -0
  503. data/test/blackbox_tests/detail_page/detail_page_2.html +7 -0
  504. data/test/blackbox_tests/detail_page/main_page_1.html +5 -0
  505. data/test/blackbox_tests/detail_page/main_page_2.html +6 -0
  506. data/test/blackbox_tests/detail_page/one_detail_page.expected.xml +7 -0
  507. data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
  508. data/test/blackbox_tests/detail_page/two_detail_pages.expected.xml +12 -0
  509. data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
  510. data/test/blackbox_tests/next_page/next_page_link.expected.xml +11 -0
  511. data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
  512. data/test/blackbox_tests/next_page/page_1.html +11 -0
  513. data/test/blackbox_tests/next_page/page_2.html +11 -0
  514. data/test/blackbox_tests/next_page/page_3.html +7 -0
  515. data/test/blackbox_tests/next_page/page_list_links.expected.xml +11 -0
  516. data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
  517. data/test/blackbox_tests/next_page/page_list_links.tofix +7 -0
  518. data/todo/backlog.txt +73 -0
  519. data/todo/scenario_ideas.txt +19 -0
  520. metadata +637 -0
@@ -0,0 +1,285 @@
1
+ require 'firewatir'
2
+
3
+ module Scrubyt
4
+ ##
5
+ #=<tt>Fetching pages (and related functionality)</tt>
6
+ #
7
+ #Since lot of things are happening during (and before)
8
+ #the fetching of a document, I decided to move out fetching related
9
+ #functionality to a separate class - so if you are looking for anything
10
+ #which is loading a document (even by submitting a form or clicking a link)
11
+ #and related things like setting a proxy etc. you should find it here.
12
+ module Navigation
13
+ module Firewatir
14
+
15
+ def self.included(base)
16
+ base.module_eval do
17
+ @@agent = FireWatir::Firefox.new unless defined? @@agent
18
+ @@current_doc_url = nil
19
+ @@current_doc_protocol = nil
20
+ @@base_dir = nil
21
+ @@host_name = nil
22
+ @@history = []
23
+ @@current_form = nil
24
+ @@current_frame = nil
25
+
26
+ ##
27
+ #Action to fetch a document (either a file or a http address)
28
+ #
29
+ #*parameters*
30
+ #
31
+ #_doc_url_ - the url or file name to fetch
32
+ def self.fetch(doc_url, *args)
33
+ #Refactor this crap!!! with option_accessor stuff
34
+ if args.size > 0
35
+ mechanize_doc = args[0][:mechanize_doc]
36
+ resolve = args[0][:resolve]
37
+ basic_auth = args[0][:basic_auth]
38
+ #Refactor this whole stuff as well!!! It looks awful...
39
+ parse_and_set_basic_auth(basic_auth) if basic_auth
40
+ else
41
+ mechanize_doc = nil
42
+ resolve = :full
43
+ end
44
+
45
+ @@current_doc_url = doc_url
46
+ @@current_doc_protocol = determine_protocol
47
+ if mechanize_doc.nil?
48
+ handle_relative_path(doc_url) unless @@current_doc_protocol == 'xpath'
49
+ handle_relative_url(doc_url, resolve)
50
+ Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
51
+ case @@current_doc_protocol
52
+ when 'file': @@agent.goto("file://"+ @@current_doc_url)
53
+ else @@agent.goto(@@current_doc_url)
54
+ end
55
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
56
+ else
57
+ @@mechanize_doc = mechanize_doc
58
+ end
59
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
60
+ store_host_name(@@agent.url) # in case we're on a new host
61
+ end
62
+
63
+ def self.use_current_page
64
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
65
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
66
+ end
67
+
68
+ def self.frame(attribute, value)
69
+ if @@current_frame
70
+ @@current_frame.frame(attribute, value)
71
+ else
72
+ @@current_frame = @@agent.frame(attribute, value)
73
+ end
74
+ end
75
+
76
+ ##
77
+ #Submit the last form;
78
+ def self.submit(current_form, sleep_time=nil, button=nil, type=nil)
79
+ if @@current_frame
80
+ #BRUTAL hax but FW is such a shitty piece of software
81
+ #this sucks FAIL omg
82
+ @@current_frame.locate
83
+ form = Document.new(@@current_frame).all.find{|t| t.tagName=="FORM"}
84
+ form.submit
85
+ else
86
+ @@agent.element_by_xpath(@@current_form).submit
87
+ end
88
+
89
+ if sleep_time
90
+ sleep sleep_time
91
+ @@agent.wait
92
+ end
93
+
94
+ @@current_doc_url = @@agent.url
95
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
96
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
97
+ end
98
+
99
+ ##
100
+ #Click the link specified by the text
101
+ def self.click_link(link_spec,index = 0,wait_secs=0)
102
+ Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
103
+ if link_spec.is_a?(Hash)
104
+ elem = XPathUtils.generate_XPath(CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index), nil, true)
105
+ result_page = @@agent.element_by_xpath(elem).click
106
+ else
107
+ @@agent.link(:innerHTML, Regexp.escape(link_spec)).click
108
+ end
109
+ sleep(wait_secs) if wait_secs > 0
110
+ @@agent.wait
111
+
112
+ # evaluate the results
113
+ extractor.evaluate_extractor
114
+
115
+ @@current_doc_url = @@agent.url
116
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
117
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
118
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
119
+ end
120
+
121
+ def self.click_by_xpath_if_exists(xpath, wait_secs=0)
122
+ begin
123
+ result_page = @@agent.element_by_xpath(xpath).click
124
+ sleep(wait_secs) if wait_secs > 0
125
+ @@agent.wait
126
+
127
+ extractor.evaluate_extractor
128
+
129
+ @@current_doc_url = @@agent.url
130
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
131
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
132
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
133
+ rescue Watir::Exception::UnknownObjectException
134
+ Scrubyt.log :INFO, "XPath #{xpath} doesn't exist in this document"
135
+ end
136
+ end
137
+
138
+ def self.click_by_xpath(xpath, wait_secs=0)
139
+ Scrubyt.log :ACTION, "Clicking by XPath : %p" % xpath
140
+ @@agent.element_by_xpath(xpath).click
141
+ Scrubyt.log :INFO, "sleeping #{wait_secs}..."
142
+ sleep(wait_secs) if wait_secs > 0
143
+ @@agent.wait
144
+
145
+ # evaluate the results
146
+ extractor.evaluate_extractor
147
+
148
+ @@current_doc_url = @@agent.url
149
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
150
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
151
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
152
+ end
153
+
154
+ def self.click_image_map(index = 0)
155
+ Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
156
+ uri = @@mechanize_doc.search("//area")[index]['href']
157
+ result_page = @@agent.get(uri)
158
+ @@current_doc_url = result_page.uri.to_s
159
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
160
+ fetch(@@current_doc_url, :mechanize_doc => result_page)
161
+ end
162
+
163
+ def self.store_host_name(doc_url)
164
+ @@host_name = doc_url.match(/.*\..*?\//)[0] if doc_url.match(/.*\..*?\//)
165
+ @@original_host_name ||= @@host_name
166
+ end #end of method store_host_name
167
+
168
+ def self.determine_protocol
169
+ old_protocol = @@current_doc_protocol
170
+ new_protocol = case @@current_doc_url
171
+ when /^\/\//
172
+ 'xpath'
173
+ when /^https/
174
+ 'https'
175
+ when /^http/
176
+ 'http'
177
+ when /^www\./
178
+ 'http'
179
+ else
180
+ 'file'
181
+ end
182
+ return 'http' if ((old_protocol == 'http') && new_protocol == 'file')
183
+ return 'https' if ((old_protocol == 'https') && new_protocol == 'file')
184
+ new_protocol
185
+ end
186
+
187
+ def self.parse_and_set_basic_auth(basic_auth)
188
+ login, pass = basic_auth.split('@')
189
+ Scrubyt.log :ACTION, "Basic authentication: login=<#{login}>, pass=<#{pass}>"
190
+ @@agent.basic_auth(login, pass)
191
+ end
192
+
193
+ def self.handle_relative_path(doc_url)
194
+ if @@base_dir == nil || doc_url[0..0] == "/"
195
+ @@base_dir = doc_url.scan(/.+\//)[0] if @@current_doc_protocol == 'file'
196
+ else
197
+ @@current_doc_url = ((@@base_dir + doc_url) if doc_url !~ /#{@@base_dir}/)
198
+ end
199
+ end
200
+
201
+ def self.handle_relative_url(doc_url, resolve)
202
+ return if doc_url =~ /^(http:|javascript:)/
203
+ if doc_url !~ /^\//
204
+ first_char = doc_url[0..0]
205
+ doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
206
+ if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
207
+ current_uri = @@mechanize_doc.uri.to_s
208
+ current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
209
+ if (current_uri.include? '?')
210
+ current_uri = current_uri.scan(/.+\//)[0]
211
+ else
212
+ current_uri += '/' unless current_uri[-1..-1] == '/'
213
+ end
214
+ @@current_doc_url = current_uri + doc_url
215
+ return
216
+ end
217
+ end
218
+ case resolve
219
+ when :full
220
+ @@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
221
+ @@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
222
+ when :host
223
+ base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
224
+ @@current_doc_url = base_host_name + doc_url
225
+ else
226
+ #custom resilving
227
+ @@current_doc_url = resolve + doc_url
228
+ end
229
+ end
230
+
231
+ def self.fill_textfield(textfield_name, query_string, wait_secs, useValue)
232
+ @@current_form = "//input[@name='#{textfield_name}']/ancestor::form"
233
+ target = @@current_frame || @@agent
234
+ if useValue
235
+ target.text_field(:name,textfield_name).value = query_string
236
+ else
237
+ target.text_field(:name,textfield_name).set(query_string)
238
+ end
239
+ sleep(wait_secs) if wait_secs > 0
240
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
241
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
242
+
243
+ end
244
+
245
+ ##
246
+ #Action to fill a textarea with text
247
+ def self.fill_textarea(textarea_name, text)
248
+ @@current_form = "//input[@name='#{textarea_name}']/ancestor::form"
249
+ @@agent.text_field(:name,textarea_name).set(text)
250
+ end
251
+
252
+ ##
253
+ #Action for selecting an option from a dropdown box
254
+ def self.select_option(selectlist_name, option)
255
+ @@current_form = "//select[@name='#{selectlist_name}']/ancestor::form"
256
+ @@agent.select_list(:name,selectlist_name).select(option)
257
+ end
258
+
259
+ def self.check_checkbox(checkbox_name)
260
+ @@current_form = "//input[@name='#{checkbox_name}']/ancestor::form"
261
+ @@agent.checkbox(:name,checkbox_name).set(true)
262
+ end
263
+
264
+ def self.check_radiobutton(checkbox_name, index=0)
265
+ @@current_form = "//input[@name='#{checkbox_name}']/ancestor::form"
266
+ @@agent.elements_by_xpath("//input[@name='#{checkbox_name}']")[index].set
267
+ end
268
+
269
+ def self.click_image_map(index=0)
270
+ raise 'NotImplemented'
271
+ end
272
+
273
+ def self.wait(time=1)
274
+ sleep(time)
275
+ @@agent.wait
276
+ end
277
+
278
+ def self.close_firefox
279
+ @@agent.close
280
+ end
281
+ end
282
+ end
283
+ end
284
+ end
285
+ end
@@ -0,0 +1,315 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ module Scrubyt
4
+ ##
5
+ #=<tt>Fetching pages (and related functionality)</tt>
6
+ #
7
+ #Since lot of things are happening during (and before)
8
+ #the fetching of a document, I decided to move out fetching related
9
+ #functionality to a separate class - so if you are looking for anything
10
+ #which is loading a document (even by submitting a form or clicking a link)
11
+ #and related things like setting a proxy etc. you should find it here.
12
+ module Navigation
13
+ module Mechanize
14
+
15
+ def self.included(base)
16
+ base.module_eval do
17
+ @@agent = WWW::Mechanize.new
18
+ @@agent.user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"
19
+ @@current_doc_url = nil
20
+ @@current_doc_protocol = nil
21
+ @@base_dir = nil
22
+ @@host_name = nil
23
+ @@history = []
24
+
25
+ ##
26
+ #Action to fetch a document (either a file or a http address)
27
+ #
28
+ #*parameters*
29
+ #
30
+ #_doc_url_ - the url or file name to fetch
31
+ def self.fetch(doc_url, *args)
32
+ #Refactor this crap!!! with option_accessor stuff
33
+ if args.size > 0
34
+ mechanize_doc = args[0][:mechanize_doc]
35
+ html = args[0][:html]
36
+ resolve = args[0][:resolve]
37
+ basic_auth = args[0][:basic_auth]
38
+ parse_and_set_basic_auth(basic_auth) if basic_auth
39
+ proxy = args[0][:proxy]
40
+ parse_and_set_proxy(proxy) if proxy
41
+ if html
42
+ @@current_doc_protocol = 'string'
43
+ mechanize_doc = page = WWW::Mechanize::Page.new(nil, {'content-type' => 'text/html'}, html)
44
+ end
45
+ else
46
+ mechanize_doc = nil
47
+ resolve = :full
48
+ end
49
+
50
+ @@current_doc_url = doc_url
51
+ @@current_doc_protocol = determine_protocol
52
+
53
+ if mechanize_doc.nil? && @@current_doc_protocol != 'file'
54
+ handle_relative_path(doc_url)
55
+ handle_relative_url(doc_url, resolve)
56
+ Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
57
+
58
+ unless 'file' == @@current_doc_protocol
59
+ @@mechanize_doc = @@agent.get(@@current_doc_url)
60
+ end
61
+ else
62
+ @@mechanize_doc = mechanize_doc
63
+ end
64
+
65
+ if @@current_doc_protocol == 'file'
66
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
67
+ else
68
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc.body))
69
+ store_host_name(self.get_current_doc_url) #if self.get_current_doc_url # in case we're on a new host
70
+ end
71
+ end
72
+
73
+ ##
74
+ #Submit the last form;
75
+ def self.submit(index=nil, sleep_time=nil, type=nil)
76
+ Scrubyt.log :ACTION, 'Submitting form...'
77
+ if index == nil
78
+ #result_page = @@agent.submit(@@current_form)
79
+ result_page = process_submit(@@current_form)
80
+ #----- added by nickmerwin@gmail.com -----
81
+ elsif index.class == String && !type.nil?
82
+ button = @@current_form.buttons.detect{|b| b.name == index or b.value == index}
83
+ #result_page = @@current_form.submit(button)
84
+ result_page = process_submit(@@current_form, button,type)
85
+ #-----------------------------------------
86
+ else
87
+ result_page = @@agent.submit(@@current_form, @@current_form.buttons[index])
88
+ end
89
+ @@current_doc_url = result_page.uri.to_s
90
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
91
+ fetch(@@current_doc_url, :mechanize_doc => result_page)
92
+ end
93
+
94
+ ##
95
+ #Click the link specified by the text
96
+ def self.click_link(link_spec,index = 0,wait_secs=0)
97
+ Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
98
+ if link_spec.is_a? Hash
99
+ clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
100
+ else
101
+ clicked_elem = SimpleExampleLookup.find_node_from_text(@@hpricot_doc, link_spec, false, index)
102
+ end
103
+ clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
104
+ result_page = @@agent.click(clicked_elem)
105
+ @@current_doc_url = result_page.uri.to_s
106
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
107
+ fetch(@@current_doc_url, :mechanize_doc => result_page)
108
+ end
109
+
110
+ def self.click_image_map(index = 0)
111
+ Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
112
+ uri = @@mechanize_doc.search("//area")[index]['href']
113
+ result_page = @@agent.get(uri)
114
+ @@current_doc_url = result_page.uri.to_s
115
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
116
+ fetch(@@current_doc_url, :mechanize_doc => result_page)
117
+ end
118
+
119
+ def self.store_host_name(doc_url)
120
+ @@host_name = 'http://' + @@mechanize_doc.uri.to_s.match(%r{http://(.+?)/+})[0] if @@current_doc_protocol == 'http'
121
+ @@host_name = 'https://' + @@mechanize_doc.uri.to_s.match(%r{https://(.+?)/+})[0] if @@current_doc_protocol == 'https'
122
+ @@host_name = doc_url if @@host_name == nil
123
+ @@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
124
+ @@original_host_name ||= @@host_name
125
+ end #end of method store_host_name
126
+
127
+ def self.parse_and_set_proxy(proxy)
128
+ @@proxy_user = @@proxy_pass = nil
129
+ if proxy.downcase.include?('localhost')
130
+ @@host = 'localhost'
131
+ @@port = proxy.split(':').last
132
+ else
133
+ parts = proxy.split(':')
134
+ if (parts.size > 2)
135
+ user_pass = parts[1].split('@')
136
+ @@proxy_user = parts[0]
137
+ @@proxy_pass = user_pass[0]
138
+ @@host = user_pass[1]
139
+ @@port = parts[2]
140
+ else
141
+ if (parts[0].include?('@'))
142
+ user_host = parts[0].split('@')
143
+ @@proxy_user = user_host[0]
144
+ @@host = user_host[1]
145
+ @@port = parts[1]
146
+ else
147
+ @@host = parts[0]
148
+ @@port = parts[1]
149
+ end
150
+ end
151
+
152
+ if (@@host == nil || @@port == nil)# !@@host =~ /^http/)
153
+ puts "Invalid proxy specification..."
154
+ puts "neither host nor port can be nil!"
155
+ exit
156
+ end
157
+ end
158
+ Scrubyt.log :ACTION, "[ACTION] Setting proxy: host=<#{@@host}>, port=<#{@@port}>, username=<#{@@proxy_user}>, password=<#{@@proxy_pass}>"
159
+ @@agent.set_proxy(@@host, @@port, @@proxy_user, @@proxy_pass)
160
+ end
161
+
162
+ def self.determine_protocol
163
+ old_protocol = @@current_doc_protocol
164
+ new_protocol = case @@current_doc_url
165
+ when /^https/
166
+ 'https'
167
+ when /^http/
168
+ 'http'
169
+ when /^www/
170
+ 'http'
171
+ else
172
+ 'file'
173
+ end
174
+ return 'http' if ((old_protocol == 'http') && new_protocol == 'file')
175
+ return 'https' if ((old_protocol == 'https') && new_protocol == 'file')
176
+ new_protocol
177
+ end
178
+
179
+ def self.handle_relative_path(doc_url)
180
+ if @@base_dir == nil
181
+ @@base_dir = doc_url.scan(/.+\//)[0] if @@current_doc_protocol == 'file'
182
+ else
183
+ @@current_doc_url = ((@@base_dir + doc_url) if doc_url !~ /#{@@base_dir}/)
184
+ end
185
+ end
186
+
187
+ def self.handle_relative_url(doc_url, resolve)
188
+ return if doc_url =~ /^http/
189
+ if doc_url !~ /^\//
190
+ first_char = doc_url[0..0]
191
+ doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
192
+ if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
193
+ current_uri = @@mechanize_doc.uri.to_s
194
+ current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
195
+ if (current_uri.include? '?')
196
+ current_uri = current_uri.scan(/.+\//)[0]
197
+ else
198
+ current_uri += '/' unless current_uri[-1..-1] == '/'
199
+ end
200
+ @@current_doc_url = current_uri + doc_url
201
+ return
202
+ end
203
+ end
204
+ case resolve
205
+ when :full
206
+ @@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
207
+ @@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
208
+ when :host
209
+ base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
210
+ @@current_doc_url = base_host_name + doc_url
211
+ else
212
+ #custom resilving
213
+ @@current_doc_url = resolve + doc_url
214
+ end
215
+ end
216
+
217
+ def self.fill_textfield(textfield_name, query_string, *unused)
218
+ lookup_form_for_tag('input','textfield',textfield_name,query_string)
219
+ if(@@current_form)
220
+ eval("@@current_form['#{textfield_name}'] = '#{query_string}'")
221
+ else
222
+ Scrubyt.log :ERROR, "Couldn't find the form that contains this textfield. Please report a bug!"
223
+ end
224
+ end
225
+
226
+ ##
227
+ #Action to fill a textarea with text
228
+ def self.fill_textarea(textarea_name, text)
229
+ lookup_form_for_tag('textarea','textarea',textarea_name,text)
230
+ eval("@@current_form['#{textarea_name}'] = '#{text}'")
231
+ end
232
+
233
+ ##
234
+ #Action for selecting an option from a dropdown box
235
+ def self.select_option(selectlist_name, option)
236
+ lookup_form_for_tag('select','select list',selectlist_name,option)
237
+ select_list = @@current_form.fields.find {|f| f.name == selectlist_name}
238
+ searched_option = select_list.options.find{|f| f.text.strip == option}
239
+ searched_option.click
240
+ end
241
+
242
+ def self.check_checkbox(checkbox_name)
243
+ lookup_form_for_tag('input','checkbox',checkbox_name, '')
244
+ #@@current_form.checkboxes.name(checkbox_name).check
245
+ checkbox = @@current_form.checkboxes.find {|c| c.name == checkbox_name}
246
+ checkbox.check
247
+ end
248
+
249
+ def self.check_radiobutton(checkbox_name, index=0)
250
+ lookup_form_for_tag('input','radiobutton',checkbox_name, '',index)
251
+ @@current_form.radiobuttons.name(checkbox_name)[index].check
252
+ end
253
+
254
+ #private
255
+ def self.process_submit(current_form, button=nil, type=nil)
256
+ if button == nil
257
+ result_page = @@agent.submit(current_form)
258
+ elsif type
259
+ result_page = current_form.submit(button)
260
+ else
261
+ result_page = @@agent.submit(current_form, button)
262
+ end
263
+ #@@current_doc_url = result_page.uri.to_s
264
+ #Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
265
+ #fetch(@@current_doc_url, :mechanize_doc => result_page)
266
+ result_page
267
+ end
268
+
269
+ def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
270
+ Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
271
+ widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
272
+ form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
273
+ puts "=" * 100
274
+ puts ">>#{Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)}<<"
275
+ puts "=" * 100
276
+ xp = Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)
277
+ form_element = FetchAction.get_mechanize_doc/xp
278
+
279
+ FetchAction.get_mechanize_doc.forms.each do |f|
280
+ @@current_form = f
281
+ break if f.form_node == form_element
282
+ end
283
+
284
+
285
+ #find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
286
+ end
287
+
288
+ def self.find_form_based_on_tag(tag, possible_attrs)
289
+ lookup_attribute_name = nil
290
+ lookup_attribute_value = nil
291
+
292
+ possible_attrs.each { |a|
293
+ lookup_attribute_name = a
294
+ lookup_attribute_value = tag.attributes[a]
295
+ break if lookup_attribute_value != nil
296
+ }
297
+
298
+ #puts lookup_attribute_name
299
+ #puts lookup_attribute_value
300
+
301
+ i = 0
302
+ loop do
303
+ @@current_form = FetchAction.get_mechanize_doc.forms[i]
304
+ #p @@current_form.form_node
305
+ return nil if @@current_form == nil
306
+ #puts ">>#{@@current_form.form_node.attributes[lookup_attribute_name].to_s}<< :: >>#{lookup_attribute_value}<<"
307
+ break if @@current_form.form_node.attributes[lookup_attribute_name].to_s == lookup_attribute_value
308
+ i+= 1
309
+ end
310
+ end
311
+ end
312
+ end
313
+ end
314
+ end
315
+ end
@@ -0,0 +1,63 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Fetching pages (and related functionality)</tt>
4
+ #
5
+ #Since lot of things are happening during (and before)
6
+ #the fetching of a document, I decided to move out fetching related
7
+ #functionality to a separate class - so if you are looking for anything
8
+ #which is loading a document (even by submitting a form or clicking a link)
9
+ #and related things like setting a proxy etc. you should find it here.
10
+ module FetchAction
11
+ @@current_doc_url = nil
12
+ @@current_doc_protocol = nil
13
+ @@base_dir = nil
14
+ @@host_name = nil
15
+ @@history = []
16
+ @@current_form = nil
17
+ @@extractor = nil
18
+
19
+ def self.extractor=(extractor)
20
+ @@extractor = extractor
21
+ end
22
+
23
+ def self.extractor
24
+ return @@extractor
25
+ end
26
+
27
+ ##
28
+ # At any given point, the current document can be queried with this method; Typically used
29
+ # when the navigation is over and the result document is passed to the wrapper
30
+ def self.get_current_doc_url
31
+ @@current_doc_url
32
+ end
33
+
34
+ def self.get_mechanize_doc
35
+ @@mechanize_doc
36
+ end
37
+
38
+ def self.get_hpricot_doc
39
+ @@hpricot_doc
40
+ end
41
+
42
+ def get_host_name
43
+ @@host_name
44
+ end
45
+
46
+ def restore_host_name
47
+ return if @@current_doc_protocol == 'file'
48
+ @@host_name = @@original_host_name
49
+ end
50
+
51
+ def store_page
52
+ @@history.push @@hpricot_doc
53
+ end
54
+
55
+ def restore_page
56
+ @@hpricot_doc = @@history.pop
57
+ end
58
+
59
+ def store_host_name(doc_url)
60
+ FetchAction.store_host_name(doc_url)
61
+ end
62
+ end
63
+ end