pstuteville-scrubyt 0.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +355 -0
- data/COPYING +340 -0
- data/README.rdoc +121 -0
- data/Rakefile +120 -0
- data/VERSION +1 -0
- data/examples/README.txt +1 -0
- data/examples/events/delta/input.html +682 -0
- data/examples/events/delta/test.rb +16 -0
- data/examples/misc/auto_regex/input.html +22 -0
- data/examples/misc/auto_regex/test.rb +14 -0
- data/examples/misc/compound_example/advanced/test.rb +11 -0
- data/examples/misc/compound_example/advanced/tricky_compound.html +9 -0
- data/examples/misc/compound_example/regexp/regexp_compound.html +17 -0
- data/examples/misc/compound_example/regexp/test.rb +11 -0
- data/examples/misc/compound_example/simple/compound.html +5 -0
- data/examples/misc/compound_example/simple/test.rb +11 -0
- data/examples/misc/detail_page/detailpage.html +6 -0
- data/examples/misc/detail_page/index.html +9 -0
- data/examples/misc/detail_page/test.rb +17 -0
- data/examples/misc/google/test.rb +39 -0
- data/examples/misc/identical_examples/data_extractor_export.rb +12 -0
- data/examples/misc/identical_examples/input.html +16 -0
- data/examples/misc/identical_examples/test.rb +15 -0
- data/examples/misc/immediate_attribute_extraction/data_extractor_export.rb +10 -0
- data/examples/misc/immediate_attribute_extraction/input.html +16 -0
- data/examples/misc/immediate_attribute_extraction/test.rb +14 -0
- data/examples/misc/multiple_examples/input.html +7 -0
- data/examples/misc/multiple_examples/test.rb +22 -0
- data/examples/misc/on_click_next/next_page_link.rb +42 -0
- data/examples/misc/on_click_next/page_1.html +10 -0
- data/examples/misc/on_click_next/page_2.html +10 -0
- data/examples/misc/on_click_next/page_3.html +7 -0
- data/examples/misc/rubycorner/test.rb +29 -0
- data/examples/misc/rubyforge_login/test.rb +30 -0
- data/examples/misc/tables/ambigous_records/input.html +17 -0
- data/examples/misc/tables/ambigous_records/test.rb +37 -0
- data/examples/misc/tables/another_plain_table/input.html +15 -0
- data/examples/misc/tables/another_plain_table/test.rb +25 -0
- data/examples/misc/tables/complex_table/input.html +45 -0
- data/examples/misc/tables/complex_table/test.rb +30 -0
- data/examples/misc/tables/grab_rows/input.html +20 -0
- data/examples/misc/tables/grab_rows/test.rb +30 -0
- data/examples/misc/tables/plain_table/input.html +39 -0
- data/examples/misc/tables/plain_table/test.rb +35 -0
- data/examples/misc/tables/plain_table_morepages/2.html +38 -0
- data/examples/misc/tables/plain_table_morepages/3.html +33 -0
- data/examples/misc/tables/plain_table_morepages/input.html +40 -0
- data/examples/misc/tables/plain_table_morepages/test.rb +32 -0
- data/examples/misc/tables/plain_table_morepages_with_image/2.html +40 -0
- data/examples/misc/tables/plain_table_morepages_with_image/3.html +33 -0
- data/examples/misc/tables/plain_table_morepages_with_image/images/right_arrow.png +0 -0
- data/examples/misc/tables/plain_table_morepages_with_image/input.html +42 -0
- data/examples/misc/tables/plain_table_morepages_with_image/test.rb +32 -0
- data/examples/misc/tables/test_select_indices/input.html +46 -0
- data/examples/misc/tables/test_select_indices/test.rb +55 -0
- data/examples/misc/xpath_example_type/input.html +15 -0
- data/examples/misc/xpath_example_type/test.rb +18 -0
- data/examples/misc/yahoo_finance/test.rb +26 -0
- data/examples/social/blog_comment/test.rb +27 -0
- data/examples/social/del.icio.us/test.rb +22 -0
- data/examples/social/digg/test.rb +37 -0
- data/examples/social/dzone/test.rb +28 -0
- data/examples/social/linkedin/linkedin.rb +23 -0
- data/examples/social/reddit/test.rb +23 -0
- data/examples/tones_extractor_export.rb +23 -0
- data/examples/webshops/amazon/002-8212888-3924065.html +5311 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0130796034.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/020161622X.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0321223675.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0465067107.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0470069155.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0470081201.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596005253.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596101325.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596523696.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0672328844.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0764596861.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0974514055.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0976694069.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616606.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616614.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616630.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1590597362.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1594480060.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1932394699.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/2841772101.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/amzn-logo-118w.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/askville-adwidget-bullet.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/askville-logo-sm-adwidget-white-bg.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/book_display_on_website-icon.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no-ns.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes-ns.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-no-tiny.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-yes-tiny.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/buybox-button-find-gifts-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/c7y_badge_rn_1.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/cap-a9-3.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/drop-down-icon-small-arrow.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/endcap-a9-go-2.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gb-open-new.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gc-logo-popover-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gift-cert-roto-pop-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-button-books.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-button.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-orange-trans.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go_button_photo.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/logo-off.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-events-18134.js +1407 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-n2v1-57871.css +364 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-simplePopover-41153.js +749 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-utilities-25439.js +1608 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow_002.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/popover-blurb.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/powered-by-a9.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-3-5.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-0.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-5.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-5-0.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/tagline-adwidget.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/topnav-cart.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel_002.gif +0 -0
- data/examples/webshops/amazon/test.rb +27 -0
- data/examples/webshops/amazon-online/test.rb +34 -0
- data/examples/webshops/barnes_and_noble/test.rb +32 -0
- data/examples/webshops/barnes_offline/barnes_and_noble.html +115 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/10964080.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/10999676.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11018492.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11656534.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11985045.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12052378.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12138286.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12533212.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12533268.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/9583392.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/SearchProduct.css +626 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin3_gtpointup.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_aboutshipping.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_account.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_colon.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_giftreminder.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_help.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_orderstatus.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_wishlist.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/bg.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/btnGoGrn.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot_002.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/dot4.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/dotGold20.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hdCantFind.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hdSearchResults.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_home_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_toy_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/iframeKMP.js +172 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2.html +25 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/070226_mc_lnav_search.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XmlUtil.js +199 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XslStyleSheet.js +1 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/kmp_gen.css +81 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview-core.js +337 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview.css +36 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/productpreview.js +11 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/linePagination.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/logo_bn05.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar.js +34 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar_06.css +136 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/popup_open.js +116 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch3_vline_dots.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch4_search.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch_AdvSearch.jpg +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/subnav_colon.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_Bookclubs_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_bnjr_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_books_hot.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_dvd_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_giftcards_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_home_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_member_cc_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_music_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_pipe.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_textbooksonly_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_usedoop_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_videogames_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo3_rule.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo_fastfree05.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_btn_checkout.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_icon_cart.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_topbot_rule.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/visualcart_prodid.js +401 -0
- data/examples/webshops/barnes_offline/test.rb +30 -0
- data/examples/webshops/buydig/2_files/03AA1BB9089A4A6A92CF23F280D664EB.jpg +0 -0
- data/examples/webshops/buydig/2_files/1008.gif +0 -0
- data/examples/webshops/buydig/2_files/1013.gif +0 -0
- data/examples/webshops/buydig/2_files/1020.gif +0 -0
- data/examples/webshops/buydig/2_files/106CF2FB84B446518397517C3E6D5AD8.jpg +0 -0
- data/examples/webshops/buydig/2_files/13-www.gif +0 -0
- data/examples/webshops/buydig/2_files/1E9BB2E56AB145FC9D6EF952703AF476.jpg +0 -0
- data/examples/webshops/buydig/2_files/1FCDFBE85CDB4D429EC2C8CB24D20457.jpg +0 -0
- data/examples/webshops/buydig/2_files/1pix.gif +0 -0
- data/examples/webshops/buydig/2_files/2014.gif +0 -0
- data/examples/webshops/buydig/2_files/2089.gif +0 -0
- data/examples/webshops/buydig/2_files/24992_medal.gif +0 -0
- data/examples/webshops/buydig/2_files/24BBCBA1397F4DDCBBBBE8456D6D6E5B.jpg +0 -0
- data/examples/webshops/buydig/2_files/281F8A6019B140F38DFD45EB7B69B0FB.jpg +0 -0
- data/examples/webshops/buydig/2_files/2975F866CB2149F7ACBC559C8E24E304.jpg +0 -0
- data/examples/webshops/buydig/2_files/316FC9256DC9460ABC3C5ECAF6C60286.jpg +0 -0
- data/examples/webshops/buydig/2_files/50569327D8B94252B95E449AE470E505.jpg +0 -0
- data/examples/webshops/buydig/2_files/519CDAB404FA4543B76B5F281468ACBF.jpg +0 -0
- data/examples/webshops/buydig/2_files/57D6146419A647BA89C96AF0B5CAB03C.jpg +0 -0
- data/examples/webshops/buydig/2_files/58E3F988E184448B8C0A59874AE123A8.jpg +0 -0
- data/examples/webshops/buydig/2_files/5E5B10197A4E4C9B9ECCD6309DBE4C54.jpg +0 -0
- data/examples/webshops/buydig/2_files/609A249177D04065B37B9161CB0BC92D.jpg +0 -0
- data/examples/webshops/buydig/2_files/676CEE8E53C2445982E991871B4DF613.jpg +0 -0
- data/examples/webshops/buydig/2_files/712BA08FAB524A31A76ABB9E2009FF8E.jpg +0 -0
- data/examples/webshops/buydig/2_files/734BD08D7A5049339393166491D09D21.jpg +0 -0
- data/examples/webshops/buydig/2_files/751E72B7003343248497FE6905F80787.jpg +0 -0
- data/examples/webshops/buydig/2_files/76493D4F02F14EF7B5886510604C7BB4.jpg +0 -0
- data/examples/webshops/buydig/2_files/79521E251278486DB29529C60C9D012A.jpg +0 -0
- data/examples/webshops/buydig/2_files/9C9AF82AC3B54BDC8C705278B50FDFD6.jpg +0 -0
- data/examples/webshops/buydig/2_files/BC3FD8307B9948FDB7EEF156D8629C37.jpg +0 -0
- data/examples/webshops/buydig/2_files/C0DD4574765047D1836F505E69DC8AE5.jpg +0 -0
- data/examples/webshops/buydig/2_files/C143F48515274A44B04F4B3E46306BD2.jpg +0 -0
- data/examples/webshops/buydig/2_files/C6B02E88F729464699DB275D140F4563.jpg +0 -0
- data/examples/webshops/buydig/2_files/CE334D6206DB4FA9AFDF339AEF0AF50F.jpg +0 -0
- data/examples/webshops/buydig/2_files/D66AE0DC865A4021AB300ED3A0C4CD11.jpg +0 -0
- data/examples/webshops/buydig/2_files/DEA2EC2093DC474D96B651068576DAE5.jpg +0 -0
- data/examples/webshops/buydig/2_files/F547677D83844042BF13A4BE6523BB50.jpg +0 -0
- data/examples/webshops/buydig/2_files/Rbbbonlineseal.gif +0 -0
- data/examples/webshops/buydig/2_files/TopSellers_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/TopSellers_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/addToFavorites_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/banner_CorporateSales.gif +0 -0
- data/examples/webshops/buydig/2_files/banner_Shipping.gif +0 -0
- data/examples/webshops/buydig/2_files/bizratehonoree.gif +0 -0
- data/examples/webshops/buydig/2_files/btn_submit.gif +0 -0
- data/examples/webshops/buydig/2_files/checkFlash.js +110 -0
- data/examples/webshops/buydig/2_files/checkFlash2.js +109 -0
- data/examples/webshops/buydig/2_files/cnetcertified.gif +0 -0
- data/examples/webshops/buydig/2_files/credPriceGrabber.gif +0 -0
- data/examples/webshops/buydig/2_files/credShopping.gif +0 -0
- data/examples/webshops/buydig/2_files/credential_paypal.gif +0 -0
- data/examples/webshops/buydig/2_files/credentials.gif +0 -0
- data/examples/webshops/buydig/2_files/dealtime.gif +0 -0
- data/examples/webshops/buydig/2_files/dvxstyle.css +754 -0
- data/examples/webshops/buydig/2_files/footer_021306_1_v1.gif +0 -0
- data/examples/webshops/buydig/2_files/func.js +132 -0
- data/examples/webshops/buydig/2_files/getseal +1 -0
- data/examples/webshops/buydig/2_files/help.gif +0 -0
- data/examples/webshops/buydig/2_files/home.gif +0 -0
- data/examples/webshops/buydig/2_files/java.js +155 -0
- data/examples/webshops/buydig/2_files/leftnv_help.gif +0 -0
- data/examples/webshops/buydig/2_files/logo.gif +0 -0
- data/examples/webshops/buydig/2_files/logo2.gif +0 -0
- data/examples/webshops/buydig/2_files/logo3.gif +0 -0
- data/examples/webshops/buydig/2_files/main.js +227 -0
- data/examples/webshops/buydig/2_files/mastercard_secured.gif +0 -0
- data/examples/webshops/buydig/2_files/newsBox_bkg.jpg +0 -0
- data/examples/webshops/buydig/2_files/newsBox_bottom.jpg +0 -0
- data/examples/webshops/buydig/2_files/newsBox_text.gif +0 -0
- data/examples/webshops/buydig/2_files/newsBox_ttl.jpg +0 -0
- data/examples/webshops/buydig/2_files/noimage75.gif +0 -0
- data/examples/webshops/buydig/2_files/orangeleftfrmbtm.gif +0 -0
- data/examples/webshops/buydig/2_files/pixel153.gif +0 -0
- data/examples/webshops/buydig/2_files/rightnv_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/search_btn_off.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c1.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c2.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c3.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c4.gif +0 -0
- data/examples/webshops/buydig/2_files/search_down.gif +0 -0
- data/examples/webshops/buydig/2_files/search_left.gif +0 -0
- data/examples/webshops/buydig/2_files/search_right.gif +0 -0
- data/examples/webshops/buydig/2_files/search_top.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_bullet.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/spacer.gif +0 -0
- data/examples/webshops/buydig/2_files/style.js +45 -0
- data/examples/webshops/buydig/2_files/styles.html +33 -0
- data/examples/webshops/buydig/2_files/track_orders.jpg +0 -0
- data/examples/webshops/buydig/2_files/urchin +534 -0
- data/examples/webshops/buydig/2_files/verified_by_visa.gif +0 -0
- data/examples/webshops/buydig/2_files/welcome.gif +0 -0
- data/examples/webshops/buydig/2_files/welcome_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/yahoologo.gif +0 -0
- data/examples/webshops/buydig/input.html +1194 -0
- data/examples/webshops/buydig/test.rb +31 -0
- data/examples/webshops/ebay/test.rb +32 -0
- data/examples/webshops/finewines_offline/_finewines.html +1739 -0
- data/examples/webshops/finewines_offline/_finewines_cut.html +371 -0
- data/examples/webshops/finewines_offline/_finewines_files/011064.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/012674.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013268.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013300.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013409.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/014340.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015073.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015255.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015479.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015487.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017038.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017129.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017145.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017152.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017285.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017392.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017400.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/019778.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/019786.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/020503.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021253.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021279.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021337.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021352.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023002.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023135.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023143.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023788.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024166.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024182.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024216.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027268.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027516.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027862.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/028118.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/028936.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033894.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033902.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033910.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033936.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033944.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033951.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/034553.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/034561.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/232439.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/237834.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/268359.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/289082.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/331603.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/369686.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/420257.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/422014.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/460410.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/480533.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/556795.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/597054.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/650606.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/652628.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/653790.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/658450.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/660027.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/660951.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/684514.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/685131.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/686865.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/699330.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703017.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703140.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703850.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/717306.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/900274.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/938225.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/947440.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/951319.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/967893.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/981407.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/981613.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/982421.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/985598.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/986737.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/987503.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/992800.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/VintageslogoEN.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/blanc-up.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/btn_vintages_latest.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/cc_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/common.js +860 -0
- data/examples/webshops/finewines_offline/_finewines_files/drink.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/drinkhold.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ec_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ev_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/hold.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/index-wines-features.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/indexSTYLE.css +398 -0
- data/examples/webshops/finewines_offline/_finewines_files/keyword_search.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/mm_menu.js +1 -0
- data/examples/webshops/finewines_offline/_finewines_files/nr_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ontario_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/popup.js +81 -0
- data/examples/webshops/finewines_offline/_finewines_files/releases_nav.js +229 -0
- data/examples/webshops/finewines_offline/_finewines_files/so_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/spacer.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/top.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/urchin.js +576 -0
- data/examples/webshops/finewines_offline/_finewines_files/wom_en.gif +0 -0
- data/examples/webshops/finewines_offline/test.rb +30 -0
- data/examples/webshops/us1camera/1_files/1pix.gif +0 -0
- data/examples/webshops/us1camera/1_files/1pix_002.gif +0 -0
- data/examples/webshops/us1camera/1_files/CnetCertified.gif +0 -0
- data/examples/webshops/us1camera/1_files/CyberSource.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_002.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_003.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_004.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_005.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_006.gif +0 -0
- data/examples/webshops/us1camera/1_files/PriceGrabber.gif +0 -0
- data/examples/webshops/us1camera/1_files/QSearch.gif +0 -0
- data/examples/webshops/us1camera/1_files/ban-m.jpg +0 -0
- data/examples/webshops/us1camera/1_files/banner1.bin +0 -0
- data/examples/webshops/us1camera/1_files/banner3.bin +0 -0
- data/examples/webshops/us1camera/1_files/block1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block2.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block3.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block4.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block5.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block6.jpg +0 -0
- data/examples/webshops/us1camera/1_files/bos.js +280 -0
- data/examples/webshops/us1camera/1_files/box1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box2.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box3.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box4.jpg +0 -0
- data/examples/webshops/us1camera/1_files/dot.jpg +0 -0
- data/examples/webshops/us1camera/1_files/eDevix.gif +0 -0
- data/examples/webshops/us1camera/1_files/electronics1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/getseal +1 -0
- data/examples/webshops/us1camera/1_files/pride.jpg +0 -0
- data/examples/webshops/us1camera/1_files/search.jpg +0 -0
- data/examples/webshops/us1camera/1_files/sidebutton.jpg +0 -0
- data/examples/webshops/us1camera/1_files/sslroilogic.js +49 -0
- data/examples/webshops/us1camera/1_files/style.css +1 -0
- data/examples/webshops/us1camera/1_files/tl.html +2 -0
- data/examples/webshops/us1camera/input.html +548 -0
- data/examples/webshops/us1camera/test.rb +37 -0
- data/lib/scrubyt/core/navigation/agents/firewatir.rb +285 -0
- data/lib/scrubyt/core/navigation/agents/mechanize.rb +315 -0
- data/lib/scrubyt/core/navigation/fetch_action.rb +63 -0
- data/lib/scrubyt/core/navigation/navigation_actions.rb +107 -0
- data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
- data/lib/scrubyt/core/scraping/constraint.rb +169 -0
- data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
- data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
- data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
- data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
- data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
- data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
- data/lib/scrubyt/core/scraping/pattern.rb +359 -0
- data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
- data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
- data/lib/scrubyt/core/shared/extractor.rb +171 -0
- data/lib/scrubyt/logging.rb +154 -0
- data/lib/scrubyt/output/post_processor.rb +139 -0
- data/lib/scrubyt/output/result.rb +44 -0
- data/lib/scrubyt/output/result_dumper.rb +154 -0
- data/lib/scrubyt/output/result_node.rb +145 -0
- data/lib/scrubyt/output/scrubyt_result.rb +42 -0
- data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
- data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
- data/lib/scrubyt/utils/shared_utils.rb +58 -0
- data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
- data/lib/scrubyt/utils/xpathutils.rb +202 -0
- data/lib/scrubyt.rb +53 -0
- data/pkg/scrubyt-0.4.31.gem +0 -0
- data/resources/allison/LICENSE +184 -0
- data/resources/allison/README +37 -0
- data/resources/allison/allison.css +301 -0
- data/resources/allison/allison.gif +0 -0
- data/resources/allison/allison.js +307 -0
- data/resources/allison/allison.rb +287 -0
- data/resources/allison/cache/BODY +588 -0
- data/resources/allison/cache/CLASS_INDEX +4 -0
- data/resources/allison/cache/CLASS_PAGE +1 -0
- data/resources/allison/cache/FILE_INDEX +4 -0
- data/resources/allison/cache/FILE_PAGE +1 -0
- data/resources/allison/cache/FONTS +1 -0
- data/resources/allison/cache/FR_INDEX_BODY +1 -0
- data/resources/allison/cache/IMGPATH +1 -0
- data/resources/allison/cache/INDEX +1 -0
- data/resources/allison/cache/JAVASCRIPT +307 -0
- data/resources/allison/cache/METHOD_INDEX +4 -0
- data/resources/allison/cache/METHOD_LIST +1 -0
- data/resources/allison/cache/SRC_PAGE +1 -0
- data/resources/allison/cache/STYLE +323 -0
- data/resources/allison/cache/URL +1 -0
- data/scrubyt.gemspec +609 -0
- data/test/blackbox_test.rb +60 -0
- data/test/blackbox_tests/basic/multi_root.expected.xml +8 -0
- data/test/blackbox_tests/basic/multi_root.rb +6 -0
- data/test/blackbox_tests/basic/simple.expected.xml +5 -0
- data/test/blackbox_tests/basic/simple.rb +5 -0
- data/test/blackbox_tests/basic/three_divs.html +12 -0
- data/test/blackbox_tests/detail_page/detail_page_1.html +7 -0
- data/test/blackbox_tests/detail_page/detail_page_2.html +7 -0
- data/test/blackbox_tests/detail_page/main_page_1.html +5 -0
- data/test/blackbox_tests/detail_page/main_page_2.html +6 -0
- data/test/blackbox_tests/detail_page/one_detail_page.expected.xml +7 -0
- data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
- data/test/blackbox_tests/detail_page/two_detail_pages.expected.xml +12 -0
- data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
- data/test/blackbox_tests/next_page/next_page_link.expected.xml +11 -0
- data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
- data/test/blackbox_tests/next_page/page_1.html +11 -0
- data/test/blackbox_tests/next_page/page_2.html +11 -0
- data/test/blackbox_tests/next_page/page_3.html +7 -0
- data/test/blackbox_tests/next_page/page_list_links.expected.xml +11 -0
- data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
- data/test/blackbox_tests/next_page/page_list_links.tofix +7 -0
- data/todo/backlog.txt +73 -0
- data/todo/scenario_ideas.txt +19 -0
- metadata +637 -0
@@ -0,0 +1,285 @@
|
|
1
|
+
require 'firewatir'
|
2
|
+
|
3
|
+
module Scrubyt
|
4
|
+
##
|
5
|
+
#=<tt>Fetching pages (and related functionality)</tt>
|
6
|
+
#
|
7
|
+
#Since lot of things are happening during (and before)
|
8
|
+
#the fetching of a document, I decided to move out fetching related
|
9
|
+
#functionality to a separate class - so if you are looking for anything
|
10
|
+
#which is loading a document (even by submitting a form or clicking a link)
|
11
|
+
#and related things like setting a proxy etc. you should find it here.
|
12
|
+
module Navigation
|
13
|
+
module Firewatir
|
14
|
+
|
15
|
+
def self.included(base)
|
16
|
+
base.module_eval do
|
17
|
+
@@agent = FireWatir::Firefox.new unless defined? @@agent
|
18
|
+
@@current_doc_url = nil
|
19
|
+
@@current_doc_protocol = nil
|
20
|
+
@@base_dir = nil
|
21
|
+
@@host_name = nil
|
22
|
+
@@history = []
|
23
|
+
@@current_form = nil
|
24
|
+
@@current_frame = nil
|
25
|
+
|
26
|
+
##
|
27
|
+
#Action to fetch a document (either a file or a http address)
|
28
|
+
#
|
29
|
+
#*parameters*
|
30
|
+
#
|
31
|
+
#_doc_url_ - the url or file name to fetch
|
32
|
+
def self.fetch(doc_url, *args)
|
33
|
+
#Refactor this crap!!! with option_accessor stuff
|
34
|
+
if args.size > 0
|
35
|
+
mechanize_doc = args[0][:mechanize_doc]
|
36
|
+
resolve = args[0][:resolve]
|
37
|
+
basic_auth = args[0][:basic_auth]
|
38
|
+
#Refactor this whole stuff as well!!! It looks awful...
|
39
|
+
parse_and_set_basic_auth(basic_auth) if basic_auth
|
40
|
+
else
|
41
|
+
mechanize_doc = nil
|
42
|
+
resolve = :full
|
43
|
+
end
|
44
|
+
|
45
|
+
@@current_doc_url = doc_url
|
46
|
+
@@current_doc_protocol = determine_protocol
|
47
|
+
if mechanize_doc.nil?
|
48
|
+
handle_relative_path(doc_url) unless @@current_doc_protocol == 'xpath'
|
49
|
+
handle_relative_url(doc_url, resolve)
|
50
|
+
Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
|
51
|
+
case @@current_doc_protocol
|
52
|
+
when 'file': @@agent.goto("file://"+ @@current_doc_url)
|
53
|
+
else @@agent.goto(@@current_doc_url)
|
54
|
+
end
|
55
|
+
@@mechanize_doc = "<html>#{@@agent.html}</html>"
|
56
|
+
else
|
57
|
+
@@mechanize_doc = mechanize_doc
|
58
|
+
end
|
59
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
|
60
|
+
store_host_name(@@agent.url) # in case we're on a new host
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.use_current_page
|
64
|
+
@@mechanize_doc = "<html>#{@@agent.html}</html>"
|
65
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.frame(attribute, value)
|
69
|
+
if @@current_frame
|
70
|
+
@@current_frame.frame(attribute, value)
|
71
|
+
else
|
72
|
+
@@current_frame = @@agent.frame(attribute, value)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
#Submit the last form;
|
78
|
+
def self.submit(current_form, sleep_time=nil, button=nil, type=nil)
|
79
|
+
if @@current_frame
|
80
|
+
#BRUTAL hax but FW is such a shitty piece of software
|
81
|
+
#this sucks FAIL omg
|
82
|
+
@@current_frame.locate
|
83
|
+
form = Document.new(@@current_frame).all.find{|t| t.tagName=="FORM"}
|
84
|
+
form.submit
|
85
|
+
else
|
86
|
+
@@agent.element_by_xpath(@@current_form).submit
|
87
|
+
end
|
88
|
+
|
89
|
+
if sleep_time
|
90
|
+
sleep sleep_time
|
91
|
+
@@agent.wait
|
92
|
+
end
|
93
|
+
|
94
|
+
@@current_doc_url = @@agent.url
|
95
|
+
@@mechanize_doc = "<html>#{@@agent.html}</html>"
|
96
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
|
97
|
+
end
|
98
|
+
|
99
|
+
##
|
100
|
+
#Click the link specified by the text
|
101
|
+
def self.click_link(link_spec,index = 0,wait_secs=0)
|
102
|
+
Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
|
103
|
+
if link_spec.is_a?(Hash)
|
104
|
+
elem = XPathUtils.generate_XPath(CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index), nil, true)
|
105
|
+
result_page = @@agent.element_by_xpath(elem).click
|
106
|
+
else
|
107
|
+
@@agent.link(:innerHTML, Regexp.escape(link_spec)).click
|
108
|
+
end
|
109
|
+
sleep(wait_secs) if wait_secs > 0
|
110
|
+
@@agent.wait
|
111
|
+
|
112
|
+
# evaluate the results
|
113
|
+
extractor.evaluate_extractor
|
114
|
+
|
115
|
+
@@current_doc_url = @@agent.url
|
116
|
+
@@mechanize_doc = "<html>#{@@agent.html}</html>"
|
117
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
|
118
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.click_by_xpath_if_exists(xpath, wait_secs=0)
|
122
|
+
begin
|
123
|
+
result_page = @@agent.element_by_xpath(xpath).click
|
124
|
+
sleep(wait_secs) if wait_secs > 0
|
125
|
+
@@agent.wait
|
126
|
+
|
127
|
+
extractor.evaluate_extractor
|
128
|
+
|
129
|
+
@@current_doc_url = @@agent.url
|
130
|
+
@@mechanize_doc = "<html>#{@@agent.html}</html>"
|
131
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
|
132
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
133
|
+
rescue Watir::Exception::UnknownObjectException
|
134
|
+
Scrubyt.log :INFO, "XPath #{xpath} doesn't exist in this document"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def self.click_by_xpath(xpath, wait_secs=0)
|
139
|
+
Scrubyt.log :ACTION, "Clicking by XPath : %p" % xpath
|
140
|
+
@@agent.element_by_xpath(xpath).click
|
141
|
+
Scrubyt.log :INFO, "sleeping #{wait_secs}..."
|
142
|
+
sleep(wait_secs) if wait_secs > 0
|
143
|
+
@@agent.wait
|
144
|
+
|
145
|
+
# evaluate the results
|
146
|
+
extractor.evaluate_extractor
|
147
|
+
|
148
|
+
@@current_doc_url = @@agent.url
|
149
|
+
@@mechanize_doc = "<html>#{@@agent.html}</html>"
|
150
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
|
151
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.click_image_map(index = 0)
|
155
|
+
Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
|
156
|
+
uri = @@mechanize_doc.search("//area")[index]['href']
|
157
|
+
result_page = @@agent.get(uri)
|
158
|
+
@@current_doc_url = result_page.uri.to_s
|
159
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
160
|
+
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
161
|
+
end
|
162
|
+
|
163
|
+
def self.store_host_name(doc_url)
|
164
|
+
@@host_name = doc_url.match(/.*\..*?\//)[0] if doc_url.match(/.*\..*?\//)
|
165
|
+
@@original_host_name ||= @@host_name
|
166
|
+
end #end of method store_host_name
|
167
|
+
|
168
|
+
def self.determine_protocol
|
169
|
+
old_protocol = @@current_doc_protocol
|
170
|
+
new_protocol = case @@current_doc_url
|
171
|
+
when /^\/\//
|
172
|
+
'xpath'
|
173
|
+
when /^https/
|
174
|
+
'https'
|
175
|
+
when /^http/
|
176
|
+
'http'
|
177
|
+
when /^www\./
|
178
|
+
'http'
|
179
|
+
else
|
180
|
+
'file'
|
181
|
+
end
|
182
|
+
return 'http' if ((old_protocol == 'http') && new_protocol == 'file')
|
183
|
+
return 'https' if ((old_protocol == 'https') && new_protocol == 'file')
|
184
|
+
new_protocol
|
185
|
+
end
|
186
|
+
|
187
|
+
def self.parse_and_set_basic_auth(basic_auth)
|
188
|
+
login, pass = basic_auth.split('@')
|
189
|
+
Scrubyt.log :ACTION, "Basic authentication: login=<#{login}>, pass=<#{pass}>"
|
190
|
+
@@agent.basic_auth(login, pass)
|
191
|
+
end
|
192
|
+
|
193
|
+
def self.handle_relative_path(doc_url)
|
194
|
+
if @@base_dir == nil || doc_url[0..0] == "/"
|
195
|
+
@@base_dir = doc_url.scan(/.+\//)[0] if @@current_doc_protocol == 'file'
|
196
|
+
else
|
197
|
+
@@current_doc_url = ((@@base_dir + doc_url) if doc_url !~ /#{@@base_dir}/)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def self.handle_relative_url(doc_url, resolve)
|
202
|
+
return if doc_url =~ /^(http:|javascript:)/
|
203
|
+
if doc_url !~ /^\//
|
204
|
+
first_char = doc_url[0..0]
|
205
|
+
doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
|
206
|
+
if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
|
207
|
+
current_uri = @@mechanize_doc.uri.to_s
|
208
|
+
current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
|
209
|
+
if (current_uri.include? '?')
|
210
|
+
current_uri = current_uri.scan(/.+\//)[0]
|
211
|
+
else
|
212
|
+
current_uri += '/' unless current_uri[-1..-1] == '/'
|
213
|
+
end
|
214
|
+
@@current_doc_url = current_uri + doc_url
|
215
|
+
return
|
216
|
+
end
|
217
|
+
end
|
218
|
+
case resolve
|
219
|
+
when :full
|
220
|
+
@@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
|
221
|
+
@@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
|
222
|
+
when :host
|
223
|
+
base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
|
224
|
+
@@current_doc_url = base_host_name + doc_url
|
225
|
+
else
|
226
|
+
#custom resilving
|
227
|
+
@@current_doc_url = resolve + doc_url
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def self.fill_textfield(textfield_name, query_string, wait_secs, useValue)
|
232
|
+
@@current_form = "//input[@name='#{textfield_name}']/ancestor::form"
|
233
|
+
target = @@current_frame || @@agent
|
234
|
+
if useValue
|
235
|
+
target.text_field(:name,textfield_name).value = query_string
|
236
|
+
else
|
237
|
+
target.text_field(:name,textfield_name).set(query_string)
|
238
|
+
end
|
239
|
+
sleep(wait_secs) if wait_secs > 0
|
240
|
+
@@mechanize_doc = "<html>#{@@agent.html}</html>"
|
241
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
##
|
246
|
+
#Action to fill a textarea with text
|
247
|
+
def self.fill_textarea(textarea_name, text)
|
248
|
+
@@current_form = "//input[@name='#{textarea_name}']/ancestor::form"
|
249
|
+
@@agent.text_field(:name,textarea_name).set(text)
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
#Action for selecting an option from a dropdown box
|
254
|
+
def self.select_option(selectlist_name, option)
|
255
|
+
@@current_form = "//select[@name='#{selectlist_name}']/ancestor::form"
|
256
|
+
@@agent.select_list(:name,selectlist_name).select(option)
|
257
|
+
end
|
258
|
+
|
259
|
+
def self.check_checkbox(checkbox_name)
|
260
|
+
@@current_form = "//input[@name='#{checkbox_name}']/ancestor::form"
|
261
|
+
@@agent.checkbox(:name,checkbox_name).set(true)
|
262
|
+
end
|
263
|
+
|
264
|
+
def self.check_radiobutton(checkbox_name, index=0)
|
265
|
+
@@current_form = "//input[@name='#{checkbox_name}']/ancestor::form"
|
266
|
+
@@agent.elements_by_xpath("//input[@name='#{checkbox_name}']")[index].set
|
267
|
+
end
|
268
|
+
|
269
|
+
def self.click_image_map(index=0)
|
270
|
+
raise 'NotImplemented'
|
271
|
+
end
|
272
|
+
|
273
|
+
def self.wait(time=1)
|
274
|
+
sleep(time)
|
275
|
+
@@agent.wait
|
276
|
+
end
|
277
|
+
|
278
|
+
def self.close_firefox
|
279
|
+
@@agent.close
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
@@ -0,0 +1,315 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mechanize'
|
3
|
+
module Scrubyt
|
4
|
+
##
|
5
|
+
#=<tt>Fetching pages (and related functionality)</tt>
|
6
|
+
#
|
7
|
+
#Since lot of things are happening during (and before)
|
8
|
+
#the fetching of a document, I decided to move out fetching related
|
9
|
+
#functionality to a separate class - so if you are looking for anything
|
10
|
+
#which is loading a document (even by submitting a form or clicking a link)
|
11
|
+
#and related things like setting a proxy etc. you should find it here.
|
12
|
+
module Navigation
|
13
|
+
module Mechanize
|
14
|
+
|
15
|
+
def self.included(base)
|
16
|
+
base.module_eval do
|
17
|
+
@@agent = WWW::Mechanize.new
|
18
|
+
@@agent.user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"
|
19
|
+
@@current_doc_url = nil
|
20
|
+
@@current_doc_protocol = nil
|
21
|
+
@@base_dir = nil
|
22
|
+
@@host_name = nil
|
23
|
+
@@history = []
|
24
|
+
|
25
|
+
##
|
26
|
+
#Action to fetch a document (either a file or a http address)
|
27
|
+
#
|
28
|
+
#*parameters*
|
29
|
+
#
|
30
|
+
#_doc_url_ - the url or file name to fetch
|
31
|
+
def self.fetch(doc_url, *args)
|
32
|
+
#Refactor this crap!!! with option_accessor stuff
|
33
|
+
if args.size > 0
|
34
|
+
mechanize_doc = args[0][:mechanize_doc]
|
35
|
+
html = args[0][:html]
|
36
|
+
resolve = args[0][:resolve]
|
37
|
+
basic_auth = args[0][:basic_auth]
|
38
|
+
parse_and_set_basic_auth(basic_auth) if basic_auth
|
39
|
+
proxy = args[0][:proxy]
|
40
|
+
parse_and_set_proxy(proxy) if proxy
|
41
|
+
if html
|
42
|
+
@@current_doc_protocol = 'string'
|
43
|
+
mechanize_doc = page = WWW::Mechanize::Page.new(nil, {'content-type' => 'text/html'}, html)
|
44
|
+
end
|
45
|
+
else
|
46
|
+
mechanize_doc = nil
|
47
|
+
resolve = :full
|
48
|
+
end
|
49
|
+
|
50
|
+
@@current_doc_url = doc_url
|
51
|
+
@@current_doc_protocol = determine_protocol
|
52
|
+
|
53
|
+
if mechanize_doc.nil? && @@current_doc_protocol != 'file'
|
54
|
+
handle_relative_path(doc_url)
|
55
|
+
handle_relative_url(doc_url, resolve)
|
56
|
+
Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
|
57
|
+
|
58
|
+
unless 'file' == @@current_doc_protocol
|
59
|
+
@@mechanize_doc = @@agent.get(@@current_doc_url)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
@@mechanize_doc = mechanize_doc
|
63
|
+
end
|
64
|
+
|
65
|
+
if @@current_doc_protocol == 'file'
|
66
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
|
67
|
+
else
|
68
|
+
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc.body))
|
69
|
+
store_host_name(self.get_current_doc_url) #if self.get_current_doc_url # in case we're on a new host
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
##
|
74
|
+
#Submit the last form;
|
75
|
+
def self.submit(index=nil, sleep_time=nil, type=nil)
|
76
|
+
Scrubyt.log :ACTION, 'Submitting form...'
|
77
|
+
if index == nil
|
78
|
+
#result_page = @@agent.submit(@@current_form)
|
79
|
+
result_page = process_submit(@@current_form)
|
80
|
+
#----- added by nickmerwin@gmail.com -----
|
81
|
+
elsif index.class == String && !type.nil?
|
82
|
+
button = @@current_form.buttons.detect{|b| b.name == index or b.value == index}
|
83
|
+
#result_page = @@current_form.submit(button)
|
84
|
+
result_page = process_submit(@@current_form, button,type)
|
85
|
+
#-----------------------------------------
|
86
|
+
else
|
87
|
+
result_page = @@agent.submit(@@current_form, @@current_form.buttons[index])
|
88
|
+
end
|
89
|
+
@@current_doc_url = result_page.uri.to_s
|
90
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
91
|
+
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
#Click the link specified by the text
|
96
|
+
def self.click_link(link_spec,index = 0,wait_secs=0)
|
97
|
+
Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
|
98
|
+
if link_spec.is_a? Hash
|
99
|
+
clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
|
100
|
+
else
|
101
|
+
clicked_elem = SimpleExampleLookup.find_node_from_text(@@hpricot_doc, link_spec, false, index)
|
102
|
+
end
|
103
|
+
clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
|
104
|
+
result_page = @@agent.click(clicked_elem)
|
105
|
+
@@current_doc_url = result_page.uri.to_s
|
106
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
107
|
+
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
108
|
+
end
|
109
|
+
|
110
|
+
def self.click_image_map(index = 0)
|
111
|
+
Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
|
112
|
+
uri = @@mechanize_doc.search("//area")[index]['href']
|
113
|
+
result_page = @@agent.get(uri)
|
114
|
+
@@current_doc_url = result_page.uri.to_s
|
115
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
116
|
+
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.store_host_name(doc_url)
|
120
|
+
@@host_name = 'http://' + @@mechanize_doc.uri.to_s.match(%r{http://(.+?)/+})[0] if @@current_doc_protocol == 'http'
|
121
|
+
@@host_name = 'https://' + @@mechanize_doc.uri.to_s.match(%r{https://(.+?)/+})[0] if @@current_doc_protocol == 'https'
|
122
|
+
@@host_name = doc_url if @@host_name == nil
|
123
|
+
@@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
|
124
|
+
@@original_host_name ||= @@host_name
|
125
|
+
end #end of method store_host_name
|
126
|
+
|
127
|
+
def self.parse_and_set_proxy(proxy)
|
128
|
+
@@proxy_user = @@proxy_pass = nil
|
129
|
+
if proxy.downcase.include?('localhost')
|
130
|
+
@@host = 'localhost'
|
131
|
+
@@port = proxy.split(':').last
|
132
|
+
else
|
133
|
+
parts = proxy.split(':')
|
134
|
+
if (parts.size > 2)
|
135
|
+
user_pass = parts[1].split('@')
|
136
|
+
@@proxy_user = parts[0]
|
137
|
+
@@proxy_pass = user_pass[0]
|
138
|
+
@@host = user_pass[1]
|
139
|
+
@@port = parts[2]
|
140
|
+
else
|
141
|
+
if (parts[0].include?('@'))
|
142
|
+
user_host = parts[0].split('@')
|
143
|
+
@@proxy_user = user_host[0]
|
144
|
+
@@host = user_host[1]
|
145
|
+
@@port = parts[1]
|
146
|
+
else
|
147
|
+
@@host = parts[0]
|
148
|
+
@@port = parts[1]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
if (@@host == nil || @@port == nil)# !@@host =~ /^http/)
|
153
|
+
puts "Invalid proxy specification..."
|
154
|
+
puts "neither host nor port can be nil!"
|
155
|
+
exit
|
156
|
+
end
|
157
|
+
end
|
158
|
+
Scrubyt.log :ACTION, "[ACTION] Setting proxy: host=<#{@@host}>, port=<#{@@port}>, username=<#{@@proxy_user}>, password=<#{@@proxy_pass}>"
|
159
|
+
@@agent.set_proxy(@@host, @@port, @@proxy_user, @@proxy_pass)
|
160
|
+
end
|
161
|
+
|
162
|
+
def self.determine_protocol
|
163
|
+
old_protocol = @@current_doc_protocol
|
164
|
+
new_protocol = case @@current_doc_url
|
165
|
+
when /^https/
|
166
|
+
'https'
|
167
|
+
when /^http/
|
168
|
+
'http'
|
169
|
+
when /^www/
|
170
|
+
'http'
|
171
|
+
else
|
172
|
+
'file'
|
173
|
+
end
|
174
|
+
return 'http' if ((old_protocol == 'http') && new_protocol == 'file')
|
175
|
+
return 'https' if ((old_protocol == 'https') && new_protocol == 'file')
|
176
|
+
new_protocol
|
177
|
+
end
|
178
|
+
|
179
|
+
def self.handle_relative_path(doc_url)
|
180
|
+
if @@base_dir == nil
|
181
|
+
@@base_dir = doc_url.scan(/.+\//)[0] if @@current_doc_protocol == 'file'
|
182
|
+
else
|
183
|
+
@@current_doc_url = ((@@base_dir + doc_url) if doc_url !~ /#{@@base_dir}/)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def self.handle_relative_url(doc_url, resolve)
|
188
|
+
return if doc_url =~ /^http/
|
189
|
+
if doc_url !~ /^\//
|
190
|
+
first_char = doc_url[0..0]
|
191
|
+
doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
|
192
|
+
if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
|
193
|
+
current_uri = @@mechanize_doc.uri.to_s
|
194
|
+
current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
|
195
|
+
if (current_uri.include? '?')
|
196
|
+
current_uri = current_uri.scan(/.+\//)[0]
|
197
|
+
else
|
198
|
+
current_uri += '/' unless current_uri[-1..-1] == '/'
|
199
|
+
end
|
200
|
+
@@current_doc_url = current_uri + doc_url
|
201
|
+
return
|
202
|
+
end
|
203
|
+
end
|
204
|
+
case resolve
|
205
|
+
when :full
|
206
|
+
@@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
|
207
|
+
@@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
|
208
|
+
when :host
|
209
|
+
base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
|
210
|
+
@@current_doc_url = base_host_name + doc_url
|
211
|
+
else
|
212
|
+
#custom resilving
|
213
|
+
@@current_doc_url = resolve + doc_url
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def self.fill_textfield(textfield_name, query_string, *unused)
|
218
|
+
lookup_form_for_tag('input','textfield',textfield_name,query_string)
|
219
|
+
if(@@current_form)
|
220
|
+
eval("@@current_form['#{textfield_name}'] = '#{query_string}'")
|
221
|
+
else
|
222
|
+
Scrubyt.log :ERROR, "Couldn't find the form that contains this textfield. Please report a bug!"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
##
|
227
|
+
#Action to fill a textarea with text
|
228
|
+
def self.fill_textarea(textarea_name, text)
|
229
|
+
lookup_form_for_tag('textarea','textarea',textarea_name,text)
|
230
|
+
eval("@@current_form['#{textarea_name}'] = '#{text}'")
|
231
|
+
end
|
232
|
+
|
233
|
+
##
|
234
|
+
#Action for selecting an option from a dropdown box
|
235
|
+
def self.select_option(selectlist_name, option)
|
236
|
+
lookup_form_for_tag('select','select list',selectlist_name,option)
|
237
|
+
select_list = @@current_form.fields.find {|f| f.name == selectlist_name}
|
238
|
+
searched_option = select_list.options.find{|f| f.text.strip == option}
|
239
|
+
searched_option.click
|
240
|
+
end
|
241
|
+
|
242
|
+
def self.check_checkbox(checkbox_name)
|
243
|
+
lookup_form_for_tag('input','checkbox',checkbox_name, '')
|
244
|
+
#@@current_form.checkboxes.name(checkbox_name).check
|
245
|
+
checkbox = @@current_form.checkboxes.find {|c| c.name == checkbox_name}
|
246
|
+
checkbox.check
|
247
|
+
end
|
248
|
+
|
249
|
+
def self.check_radiobutton(checkbox_name, index=0)
|
250
|
+
lookup_form_for_tag('input','radiobutton',checkbox_name, '',index)
|
251
|
+
@@current_form.radiobuttons.name(checkbox_name)[index].check
|
252
|
+
end
|
253
|
+
|
254
|
+
#private
|
255
|
+
def self.process_submit(current_form, button=nil, type=nil)
|
256
|
+
if button == nil
|
257
|
+
result_page = @@agent.submit(current_form)
|
258
|
+
elsif type
|
259
|
+
result_page = current_form.submit(button)
|
260
|
+
else
|
261
|
+
result_page = @@agent.submit(current_form, button)
|
262
|
+
end
|
263
|
+
#@@current_doc_url = result_page.uri.to_s
|
264
|
+
#Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
265
|
+
#fetch(@@current_doc_url, :mechanize_doc => result_page)
|
266
|
+
result_page
|
267
|
+
end
|
268
|
+
|
269
|
+
def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
|
270
|
+
Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
|
271
|
+
widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
|
272
|
+
form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
|
273
|
+
puts "=" * 100
|
274
|
+
puts ">>#{Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)}<<"
|
275
|
+
puts "=" * 100
|
276
|
+
xp = Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)
|
277
|
+
form_element = FetchAction.get_mechanize_doc/xp
|
278
|
+
|
279
|
+
FetchAction.get_mechanize_doc.forms.each do |f|
|
280
|
+
@@current_form = f
|
281
|
+
break if f.form_node == form_element
|
282
|
+
end
|
283
|
+
|
284
|
+
|
285
|
+
#find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
|
286
|
+
end
|
287
|
+
|
288
|
+
def self.find_form_based_on_tag(tag, possible_attrs)
|
289
|
+
lookup_attribute_name = nil
|
290
|
+
lookup_attribute_value = nil
|
291
|
+
|
292
|
+
possible_attrs.each { |a|
|
293
|
+
lookup_attribute_name = a
|
294
|
+
lookup_attribute_value = tag.attributes[a]
|
295
|
+
break if lookup_attribute_value != nil
|
296
|
+
}
|
297
|
+
|
298
|
+
#puts lookup_attribute_name
|
299
|
+
#puts lookup_attribute_value
|
300
|
+
|
301
|
+
i = 0
|
302
|
+
loop do
|
303
|
+
@@current_form = FetchAction.get_mechanize_doc.forms[i]
|
304
|
+
#p @@current_form.form_node
|
305
|
+
return nil if @@current_form == nil
|
306
|
+
#puts ">>#{@@current_form.form_node.attributes[lookup_attribute_name].to_s}<< :: >>#{lookup_attribute_value}<<"
|
307
|
+
break if @@current_form.form_node.attributes[lookup_attribute_name].to_s == lookup_attribute_value
|
308
|
+
i+= 1
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
##
|
3
|
+
#=<tt>Fetching pages (and related functionality)</tt>
|
4
|
+
#
|
5
|
+
#Since lot of things are happening during (and before)
|
6
|
+
#the fetching of a document, I decided to move out fetching related
|
7
|
+
#functionality to a separate class - so if you are looking for anything
|
8
|
+
#which is loading a document (even by submitting a form or clicking a link)
|
9
|
+
#and related things like setting a proxy etc. you should find it here.
|
10
|
+
module FetchAction
|
11
|
+
@@current_doc_url = nil
|
12
|
+
@@current_doc_protocol = nil
|
13
|
+
@@base_dir = nil
|
14
|
+
@@host_name = nil
|
15
|
+
@@history = []
|
16
|
+
@@current_form = nil
|
17
|
+
@@extractor = nil
|
18
|
+
|
19
|
+
def self.extractor=(extractor)
|
20
|
+
@@extractor = extractor
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.extractor
|
24
|
+
return @@extractor
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# At any given point, the current document can be queried with this method; Typically used
|
29
|
+
# when the navigation is over and the result document is passed to the wrapper
|
30
|
+
def self.get_current_doc_url
|
31
|
+
@@current_doc_url
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.get_mechanize_doc
|
35
|
+
@@mechanize_doc
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.get_hpricot_doc
|
39
|
+
@@hpricot_doc
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_host_name
|
43
|
+
@@host_name
|
44
|
+
end
|
45
|
+
|
46
|
+
def restore_host_name
|
47
|
+
return if @@current_doc_protocol == 'file'
|
48
|
+
@@host_name = @@original_host_name
|
49
|
+
end
|
50
|
+
|
51
|
+
def store_page
|
52
|
+
@@history.push @@hpricot_doc
|
53
|
+
end
|
54
|
+
|
55
|
+
def restore_page
|
56
|
+
@@hpricot_doc = @@history.pop
|
57
|
+
end
|
58
|
+
|
59
|
+
def store_host_name(doc_url)
|
60
|
+
FetchAction.store_host_name(doc_url)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|