pstuteville-scrubyt 0.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +355 -0
- data/COPYING +340 -0
- data/README.rdoc +121 -0
- data/Rakefile +120 -0
- data/VERSION +1 -0
- data/examples/README.txt +1 -0
- data/examples/events/delta/input.html +682 -0
- data/examples/events/delta/test.rb +16 -0
- data/examples/misc/auto_regex/input.html +22 -0
- data/examples/misc/auto_regex/test.rb +14 -0
- data/examples/misc/compound_example/advanced/test.rb +11 -0
- data/examples/misc/compound_example/advanced/tricky_compound.html +9 -0
- data/examples/misc/compound_example/regexp/regexp_compound.html +17 -0
- data/examples/misc/compound_example/regexp/test.rb +11 -0
- data/examples/misc/compound_example/simple/compound.html +5 -0
- data/examples/misc/compound_example/simple/test.rb +11 -0
- data/examples/misc/detail_page/detailpage.html +6 -0
- data/examples/misc/detail_page/index.html +9 -0
- data/examples/misc/detail_page/test.rb +17 -0
- data/examples/misc/google/test.rb +39 -0
- data/examples/misc/identical_examples/data_extractor_export.rb +12 -0
- data/examples/misc/identical_examples/input.html +16 -0
- data/examples/misc/identical_examples/test.rb +15 -0
- data/examples/misc/immediate_attribute_extraction/data_extractor_export.rb +10 -0
- data/examples/misc/immediate_attribute_extraction/input.html +16 -0
- data/examples/misc/immediate_attribute_extraction/test.rb +14 -0
- data/examples/misc/multiple_examples/input.html +7 -0
- data/examples/misc/multiple_examples/test.rb +22 -0
- data/examples/misc/on_click_next/next_page_link.rb +42 -0
- data/examples/misc/on_click_next/page_1.html +10 -0
- data/examples/misc/on_click_next/page_2.html +10 -0
- data/examples/misc/on_click_next/page_3.html +7 -0
- data/examples/misc/rubycorner/test.rb +29 -0
- data/examples/misc/rubyforge_login/test.rb +30 -0
- data/examples/misc/tables/ambigous_records/input.html +17 -0
- data/examples/misc/tables/ambigous_records/test.rb +37 -0
- data/examples/misc/tables/another_plain_table/input.html +15 -0
- data/examples/misc/tables/another_plain_table/test.rb +25 -0
- data/examples/misc/tables/complex_table/input.html +45 -0
- data/examples/misc/tables/complex_table/test.rb +30 -0
- data/examples/misc/tables/grab_rows/input.html +20 -0
- data/examples/misc/tables/grab_rows/test.rb +30 -0
- data/examples/misc/tables/plain_table/input.html +39 -0
- data/examples/misc/tables/plain_table/test.rb +35 -0
- data/examples/misc/tables/plain_table_morepages/2.html +38 -0
- data/examples/misc/tables/plain_table_morepages/3.html +33 -0
- data/examples/misc/tables/plain_table_morepages/input.html +40 -0
- data/examples/misc/tables/plain_table_morepages/test.rb +32 -0
- data/examples/misc/tables/plain_table_morepages_with_image/2.html +40 -0
- data/examples/misc/tables/plain_table_morepages_with_image/3.html +33 -0
- data/examples/misc/tables/plain_table_morepages_with_image/images/right_arrow.png +0 -0
- data/examples/misc/tables/plain_table_morepages_with_image/input.html +42 -0
- data/examples/misc/tables/plain_table_morepages_with_image/test.rb +32 -0
- data/examples/misc/tables/test_select_indices/input.html +46 -0
- data/examples/misc/tables/test_select_indices/test.rb +55 -0
- data/examples/misc/xpath_example_type/input.html +15 -0
- data/examples/misc/xpath_example_type/test.rb +18 -0
- data/examples/misc/yahoo_finance/test.rb +26 -0
- data/examples/social/blog_comment/test.rb +27 -0
- data/examples/social/del.icio.us/test.rb +22 -0
- data/examples/social/digg/test.rb +37 -0
- data/examples/social/dzone/test.rb +28 -0
- data/examples/social/linkedin/linkedin.rb +23 -0
- data/examples/social/reddit/test.rb +23 -0
- data/examples/tones_extractor_export.rb +23 -0
- data/examples/webshops/amazon/002-8212888-3924065.html +5311 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0130796034.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/020161622X.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0321223675.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0465067107.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0470069155.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0470081201.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596005253.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596101325.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596523696.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0672328844.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0764596861.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0974514055.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0976694069.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616606.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616614.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616630.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1590597362.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1594480060.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1932394699.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/2841772101.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/amzn-logo-118w.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/askville-adwidget-bullet.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/askville-logo-sm-adwidget-white-bg.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/book_display_on_website-icon.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no-ns.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes-ns.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-no-tiny.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-yes-tiny.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/buybox-button-find-gifts-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/c7y_badge_rn_1.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/cap-a9-3.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/drop-down-icon-small-arrow.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/endcap-a9-go-2.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gb-open-new.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gc-logo-popover-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gift-cert-roto-pop-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-button-books.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-button.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-orange-trans.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go_button_photo.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/logo-off.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-events-18134.js +1407 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-n2v1-57871.css +364 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-simplePopover-41153.js +749 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-utilities-25439.js +1608 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow_002.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/popover-blurb.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/powered-by-a9.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-3-5.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-0.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-5.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-5-0.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/tagline-adwidget.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/topnav-cart.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel_002.gif +0 -0
- data/examples/webshops/amazon/test.rb +27 -0
- data/examples/webshops/amazon-online/test.rb +34 -0
- data/examples/webshops/barnes_and_noble/test.rb +32 -0
- data/examples/webshops/barnes_offline/barnes_and_noble.html +115 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/10964080.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/10999676.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11018492.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11656534.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11985045.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12052378.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12138286.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12533212.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12533268.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/9583392.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/SearchProduct.css +626 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin3_gtpointup.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_aboutshipping.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_account.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_colon.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_giftreminder.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_help.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_orderstatus.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_wishlist.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/bg.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/btnGoGrn.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot_002.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/dot4.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/dotGold20.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hdCantFind.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hdSearchResults.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_home_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_toy_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/iframeKMP.js +172 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2.html +25 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/070226_mc_lnav_search.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XmlUtil.js +199 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XslStyleSheet.js +1 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/kmp_gen.css +81 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview-core.js +337 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview.css +36 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/productpreview.js +11 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/linePagination.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/logo_bn05.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar.js +34 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar_06.css +136 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/popup_open.js +116 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch3_vline_dots.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch4_search.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch_AdvSearch.jpg +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/subnav_colon.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_Bookclubs_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_bnjr_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_books_hot.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_dvd_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_giftcards_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_home_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_member_cc_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_music_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_pipe.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_textbooksonly_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_usedoop_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_videogames_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo3_rule.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo_fastfree05.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_btn_checkout.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_icon_cart.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_topbot_rule.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/visualcart_prodid.js +401 -0
- data/examples/webshops/barnes_offline/test.rb +30 -0
- data/examples/webshops/buydig/2_files/03AA1BB9089A4A6A92CF23F280D664EB.jpg +0 -0
- data/examples/webshops/buydig/2_files/1008.gif +0 -0
- data/examples/webshops/buydig/2_files/1013.gif +0 -0
- data/examples/webshops/buydig/2_files/1020.gif +0 -0
- data/examples/webshops/buydig/2_files/106CF2FB84B446518397517C3E6D5AD8.jpg +0 -0
- data/examples/webshops/buydig/2_files/13-www.gif +0 -0
- data/examples/webshops/buydig/2_files/1E9BB2E56AB145FC9D6EF952703AF476.jpg +0 -0
- data/examples/webshops/buydig/2_files/1FCDFBE85CDB4D429EC2C8CB24D20457.jpg +0 -0
- data/examples/webshops/buydig/2_files/1pix.gif +0 -0
- data/examples/webshops/buydig/2_files/2014.gif +0 -0
- data/examples/webshops/buydig/2_files/2089.gif +0 -0
- data/examples/webshops/buydig/2_files/24992_medal.gif +0 -0
- data/examples/webshops/buydig/2_files/24BBCBA1397F4DDCBBBBE8456D6D6E5B.jpg +0 -0
- data/examples/webshops/buydig/2_files/281F8A6019B140F38DFD45EB7B69B0FB.jpg +0 -0
- data/examples/webshops/buydig/2_files/2975F866CB2149F7ACBC559C8E24E304.jpg +0 -0
- data/examples/webshops/buydig/2_files/316FC9256DC9460ABC3C5ECAF6C60286.jpg +0 -0
- data/examples/webshops/buydig/2_files/50569327D8B94252B95E449AE470E505.jpg +0 -0
- data/examples/webshops/buydig/2_files/519CDAB404FA4543B76B5F281468ACBF.jpg +0 -0
- data/examples/webshops/buydig/2_files/57D6146419A647BA89C96AF0B5CAB03C.jpg +0 -0
- data/examples/webshops/buydig/2_files/58E3F988E184448B8C0A59874AE123A8.jpg +0 -0
- data/examples/webshops/buydig/2_files/5E5B10197A4E4C9B9ECCD6309DBE4C54.jpg +0 -0
- data/examples/webshops/buydig/2_files/609A249177D04065B37B9161CB0BC92D.jpg +0 -0
- data/examples/webshops/buydig/2_files/676CEE8E53C2445982E991871B4DF613.jpg +0 -0
- data/examples/webshops/buydig/2_files/712BA08FAB524A31A76ABB9E2009FF8E.jpg +0 -0
- data/examples/webshops/buydig/2_files/734BD08D7A5049339393166491D09D21.jpg +0 -0
- data/examples/webshops/buydig/2_files/751E72B7003343248497FE6905F80787.jpg +0 -0
- data/examples/webshops/buydig/2_files/76493D4F02F14EF7B5886510604C7BB4.jpg +0 -0
- data/examples/webshops/buydig/2_files/79521E251278486DB29529C60C9D012A.jpg +0 -0
- data/examples/webshops/buydig/2_files/9C9AF82AC3B54BDC8C705278B50FDFD6.jpg +0 -0
- data/examples/webshops/buydig/2_files/BC3FD8307B9948FDB7EEF156D8629C37.jpg +0 -0
- data/examples/webshops/buydig/2_files/C0DD4574765047D1836F505E69DC8AE5.jpg +0 -0
- data/examples/webshops/buydig/2_files/C143F48515274A44B04F4B3E46306BD2.jpg +0 -0
- data/examples/webshops/buydig/2_files/C6B02E88F729464699DB275D140F4563.jpg +0 -0
- data/examples/webshops/buydig/2_files/CE334D6206DB4FA9AFDF339AEF0AF50F.jpg +0 -0
- data/examples/webshops/buydig/2_files/D66AE0DC865A4021AB300ED3A0C4CD11.jpg +0 -0
- data/examples/webshops/buydig/2_files/DEA2EC2093DC474D96B651068576DAE5.jpg +0 -0
- data/examples/webshops/buydig/2_files/F547677D83844042BF13A4BE6523BB50.jpg +0 -0
- data/examples/webshops/buydig/2_files/Rbbbonlineseal.gif +0 -0
- data/examples/webshops/buydig/2_files/TopSellers_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/TopSellers_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/addToFavorites_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/banner_CorporateSales.gif +0 -0
- data/examples/webshops/buydig/2_files/banner_Shipping.gif +0 -0
- data/examples/webshops/buydig/2_files/bizratehonoree.gif +0 -0
- data/examples/webshops/buydig/2_files/btn_submit.gif +0 -0
- data/examples/webshops/buydig/2_files/checkFlash.js +110 -0
- data/examples/webshops/buydig/2_files/checkFlash2.js +109 -0
- data/examples/webshops/buydig/2_files/cnetcertified.gif +0 -0
- data/examples/webshops/buydig/2_files/credPriceGrabber.gif +0 -0
- data/examples/webshops/buydig/2_files/credShopping.gif +0 -0
- data/examples/webshops/buydig/2_files/credential_paypal.gif +0 -0
- data/examples/webshops/buydig/2_files/credentials.gif +0 -0
- data/examples/webshops/buydig/2_files/dealtime.gif +0 -0
- data/examples/webshops/buydig/2_files/dvxstyle.css +754 -0
- data/examples/webshops/buydig/2_files/footer_021306_1_v1.gif +0 -0
- data/examples/webshops/buydig/2_files/func.js +132 -0
- data/examples/webshops/buydig/2_files/getseal +1 -0
- data/examples/webshops/buydig/2_files/help.gif +0 -0
- data/examples/webshops/buydig/2_files/home.gif +0 -0
- data/examples/webshops/buydig/2_files/java.js +155 -0
- data/examples/webshops/buydig/2_files/leftnv_help.gif +0 -0
- data/examples/webshops/buydig/2_files/logo.gif +0 -0
- data/examples/webshops/buydig/2_files/logo2.gif +0 -0
- data/examples/webshops/buydig/2_files/logo3.gif +0 -0
- data/examples/webshops/buydig/2_files/main.js +227 -0
- data/examples/webshops/buydig/2_files/mastercard_secured.gif +0 -0
- data/examples/webshops/buydig/2_files/newsBox_bkg.jpg +0 -0
- data/examples/webshops/buydig/2_files/newsBox_bottom.jpg +0 -0
- data/examples/webshops/buydig/2_files/newsBox_text.gif +0 -0
- data/examples/webshops/buydig/2_files/newsBox_ttl.jpg +0 -0
- data/examples/webshops/buydig/2_files/noimage75.gif +0 -0
- data/examples/webshops/buydig/2_files/orangeleftfrmbtm.gif +0 -0
- data/examples/webshops/buydig/2_files/pixel153.gif +0 -0
- data/examples/webshops/buydig/2_files/rightnv_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/search_btn_off.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c1.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c2.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c3.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c4.gif +0 -0
- data/examples/webshops/buydig/2_files/search_down.gif +0 -0
- data/examples/webshops/buydig/2_files/search_left.gif +0 -0
- data/examples/webshops/buydig/2_files/search_right.gif +0 -0
- data/examples/webshops/buydig/2_files/search_top.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_bullet.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/spacer.gif +0 -0
- data/examples/webshops/buydig/2_files/style.js +45 -0
- data/examples/webshops/buydig/2_files/styles.html +33 -0
- data/examples/webshops/buydig/2_files/track_orders.jpg +0 -0
- data/examples/webshops/buydig/2_files/urchin +534 -0
- data/examples/webshops/buydig/2_files/verified_by_visa.gif +0 -0
- data/examples/webshops/buydig/2_files/welcome.gif +0 -0
- data/examples/webshops/buydig/2_files/welcome_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/yahoologo.gif +0 -0
- data/examples/webshops/buydig/input.html +1194 -0
- data/examples/webshops/buydig/test.rb +31 -0
- data/examples/webshops/ebay/test.rb +32 -0
- data/examples/webshops/finewines_offline/_finewines.html +1739 -0
- data/examples/webshops/finewines_offline/_finewines_cut.html +371 -0
- data/examples/webshops/finewines_offline/_finewines_files/011064.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/012674.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013268.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013300.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013409.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/014340.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015073.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015255.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015479.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015487.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017038.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017129.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017145.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017152.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017285.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017392.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017400.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/019778.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/019786.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/020503.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021253.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021279.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021337.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021352.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023002.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023135.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023143.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023788.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024166.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024182.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024216.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027268.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027516.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027862.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/028118.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/028936.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033894.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033902.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033910.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033936.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033944.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033951.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/034553.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/034561.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/232439.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/237834.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/268359.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/289082.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/331603.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/369686.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/420257.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/422014.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/460410.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/480533.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/556795.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/597054.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/650606.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/652628.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/653790.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/658450.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/660027.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/660951.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/684514.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/685131.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/686865.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/699330.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703017.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703140.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703850.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/717306.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/900274.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/938225.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/947440.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/951319.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/967893.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/981407.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/981613.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/982421.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/985598.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/986737.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/987503.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/992800.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/VintageslogoEN.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/blanc-up.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/btn_vintages_latest.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/cc_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/common.js +860 -0
- data/examples/webshops/finewines_offline/_finewines_files/drink.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/drinkhold.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ec_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ev_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/hold.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/index-wines-features.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/indexSTYLE.css +398 -0
- data/examples/webshops/finewines_offline/_finewines_files/keyword_search.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/mm_menu.js +1 -0
- data/examples/webshops/finewines_offline/_finewines_files/nr_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ontario_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/popup.js +81 -0
- data/examples/webshops/finewines_offline/_finewines_files/releases_nav.js +229 -0
- data/examples/webshops/finewines_offline/_finewines_files/so_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/spacer.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/top.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/urchin.js +576 -0
- data/examples/webshops/finewines_offline/_finewines_files/wom_en.gif +0 -0
- data/examples/webshops/finewines_offline/test.rb +30 -0
- data/examples/webshops/us1camera/1_files/1pix.gif +0 -0
- data/examples/webshops/us1camera/1_files/1pix_002.gif +0 -0
- data/examples/webshops/us1camera/1_files/CnetCertified.gif +0 -0
- data/examples/webshops/us1camera/1_files/CyberSource.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_002.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_003.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_004.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_005.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_006.gif +0 -0
- data/examples/webshops/us1camera/1_files/PriceGrabber.gif +0 -0
- data/examples/webshops/us1camera/1_files/QSearch.gif +0 -0
- data/examples/webshops/us1camera/1_files/ban-m.jpg +0 -0
- data/examples/webshops/us1camera/1_files/banner1.bin +0 -0
- data/examples/webshops/us1camera/1_files/banner3.bin +0 -0
- data/examples/webshops/us1camera/1_files/block1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block2.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block3.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block4.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block5.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block6.jpg +0 -0
- data/examples/webshops/us1camera/1_files/bos.js +280 -0
- data/examples/webshops/us1camera/1_files/box1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box2.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box3.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box4.jpg +0 -0
- data/examples/webshops/us1camera/1_files/dot.jpg +0 -0
- data/examples/webshops/us1camera/1_files/eDevix.gif +0 -0
- data/examples/webshops/us1camera/1_files/electronics1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/getseal +1 -0
- data/examples/webshops/us1camera/1_files/pride.jpg +0 -0
- data/examples/webshops/us1camera/1_files/search.jpg +0 -0
- data/examples/webshops/us1camera/1_files/sidebutton.jpg +0 -0
- data/examples/webshops/us1camera/1_files/sslroilogic.js +49 -0
- data/examples/webshops/us1camera/1_files/style.css +1 -0
- data/examples/webshops/us1camera/1_files/tl.html +2 -0
- data/examples/webshops/us1camera/input.html +548 -0
- data/examples/webshops/us1camera/test.rb +37 -0
- data/lib/scrubyt/core/navigation/agents/firewatir.rb +285 -0
- data/lib/scrubyt/core/navigation/agents/mechanize.rb +315 -0
- data/lib/scrubyt/core/navigation/fetch_action.rb +63 -0
- data/lib/scrubyt/core/navigation/navigation_actions.rb +107 -0
- data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
- data/lib/scrubyt/core/scraping/constraint.rb +169 -0
- data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
- data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
- data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
- data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
- data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
- data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
- data/lib/scrubyt/core/scraping/pattern.rb +359 -0
- data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
- data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
- data/lib/scrubyt/core/shared/extractor.rb +171 -0
- data/lib/scrubyt/logging.rb +154 -0
- data/lib/scrubyt/output/post_processor.rb +139 -0
- data/lib/scrubyt/output/result.rb +44 -0
- data/lib/scrubyt/output/result_dumper.rb +154 -0
- data/lib/scrubyt/output/result_node.rb +145 -0
- data/lib/scrubyt/output/scrubyt_result.rb +42 -0
- data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
- data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
- data/lib/scrubyt/utils/shared_utils.rb +58 -0
- data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
- data/lib/scrubyt/utils/xpathutils.rb +202 -0
- data/lib/scrubyt.rb +53 -0
- data/pkg/scrubyt-0.4.31.gem +0 -0
- data/resources/allison/LICENSE +184 -0
- data/resources/allison/README +37 -0
- data/resources/allison/allison.css +301 -0
- data/resources/allison/allison.gif +0 -0
- data/resources/allison/allison.js +307 -0
- data/resources/allison/allison.rb +287 -0
- data/resources/allison/cache/BODY +588 -0
- data/resources/allison/cache/CLASS_INDEX +4 -0
- data/resources/allison/cache/CLASS_PAGE +1 -0
- data/resources/allison/cache/FILE_INDEX +4 -0
- data/resources/allison/cache/FILE_PAGE +1 -0
- data/resources/allison/cache/FONTS +1 -0
- data/resources/allison/cache/FR_INDEX_BODY +1 -0
- data/resources/allison/cache/IMGPATH +1 -0
- data/resources/allison/cache/INDEX +1 -0
- data/resources/allison/cache/JAVASCRIPT +307 -0
- data/resources/allison/cache/METHOD_INDEX +4 -0
- data/resources/allison/cache/METHOD_LIST +1 -0
- data/resources/allison/cache/SRC_PAGE +1 -0
- data/resources/allison/cache/STYLE +323 -0
- data/resources/allison/cache/URL +1 -0
- data/scrubyt.gemspec +609 -0
- data/test/blackbox_test.rb +60 -0
- data/test/blackbox_tests/basic/multi_root.expected.xml +8 -0
- data/test/blackbox_tests/basic/multi_root.rb +6 -0
- data/test/blackbox_tests/basic/simple.expected.xml +5 -0
- data/test/blackbox_tests/basic/simple.rb +5 -0
- data/test/blackbox_tests/basic/three_divs.html +12 -0
- data/test/blackbox_tests/detail_page/detail_page_1.html +7 -0
- data/test/blackbox_tests/detail_page/detail_page_2.html +7 -0
- data/test/blackbox_tests/detail_page/main_page_1.html +5 -0
- data/test/blackbox_tests/detail_page/main_page_2.html +6 -0
- data/test/blackbox_tests/detail_page/one_detail_page.expected.xml +7 -0
- data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
- data/test/blackbox_tests/detail_page/two_detail_pages.expected.xml +12 -0
- data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
- data/test/blackbox_tests/next_page/next_page_link.expected.xml +11 -0
- data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
- data/test/blackbox_tests/next_page/page_1.html +11 -0
- data/test/blackbox_tests/next_page/page_2.html +11 -0
- data/test/blackbox_tests/next_page/page_3.html +7 -0
- data/test/blackbox_tests/next_page/page_list_links.expected.xml +11 -0
- data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
- data/test/blackbox_tests/next_page/page_list_links.tofix +7 -0
- data/todo/backlog.txt +73 -0
- data/todo/scenario_ideas.txt +19 -0
- metadata +637 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
class ResultNode < Array
|
3
|
+
OUTPUT_OPTIONS = [:write_text]
|
4
|
+
|
5
|
+
attr_accessor :name, :result, :options, :generated_by_leaf
|
6
|
+
|
7
|
+
def initialize(name, result=nil, options={})
|
8
|
+
@name = name
|
9
|
+
@result = result
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
def write_text
|
14
|
+
@options[:write_text].nil? ? @generated_by_leaf : @options[:write_text]
|
15
|
+
end
|
16
|
+
|
17
|
+
def has_content?
|
18
|
+
return true if result.is_a? String
|
19
|
+
write_text || (inject(false) { |one_child_has_content, child| one_child_has_content || child.has_content? })
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
return "" if result.nil?
|
24
|
+
text = (@result.is_a? String) ? @result : @result.inner_html.gsub(/<.*?>/, '')
|
25
|
+
text = SharedUtils.unescape_entities(text)
|
26
|
+
text.strip!
|
27
|
+
if (@options[:default] && ((text == '') || (text == @options[:default])))
|
28
|
+
@options[:default]
|
29
|
+
else
|
30
|
+
text
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def inspect
|
35
|
+
to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_libxml
|
39
|
+
libxml_node = XML::Node.new(name)
|
40
|
+
self.each { |child| libxml_node << child.to_libxml if child.has_content? }
|
41
|
+
libxml_node << to_s if write_text
|
42
|
+
libxml_node
|
43
|
+
end
|
44
|
+
|
45
|
+
#note: see ruby_extensions.rb for String#write
|
46
|
+
def to_xml
|
47
|
+
to_xml_lines.join("\n")
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_hash(delimiter=',')
|
51
|
+
result = []
|
52
|
+
flat_hash_inner = lambda {|e, hash|
|
53
|
+
hash[e.name.to_sym] = hash[e.name.to_sym] ? hash[e.name.to_sym] + delimiter + e.to_s : e.to_s if ((e.write_text && !e.to_s.empty?) || e.options[:default])
|
54
|
+
e.each {|c| flat_hash_inner.call(c, hash) }
|
55
|
+
hash
|
56
|
+
}
|
57
|
+
self.each {|e| result << flat_hash_inner.call(e, {}) }
|
58
|
+
result
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_flat_hash()
|
62
|
+
hash_result = self.to_hash('@@@@@@')
|
63
|
+
merged_hash = hash_result.delete_at 0
|
64
|
+
hash_result.each do |hash|
|
65
|
+
merged_hash.keys.each do |key|
|
66
|
+
merged_hash[key] += "@@@@@@#{hash[key]}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
result_sets = merged_hash.values.map!{|x| x.split('@@@@@@')}.transpose
|
70
|
+
final_result = []
|
71
|
+
|
72
|
+
result_sets.each do |rs|
|
73
|
+
temp_result = {}
|
74
|
+
merged_hash.keys.each do |k|
|
75
|
+
temp_result[k] = rs[merged_hash.keys.index(k)]
|
76
|
+
end
|
77
|
+
final_result << temp_result
|
78
|
+
end
|
79
|
+
final_result
|
80
|
+
end
|
81
|
+
|
82
|
+
def to_flat_xml(delimiter=nil)
|
83
|
+
lines = []
|
84
|
+
hash_result = delimiter ? self.to_hash(delimiter) : self.to_hash
|
85
|
+
merged_hash = hash_result.delete_at 0
|
86
|
+
|
87
|
+
hash_result.each do |hash|
|
88
|
+
merged_hash.keys.each do |key|
|
89
|
+
merged_hash[key] += "#{delimiter}#{hash[key]}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
if delimiter
|
94
|
+
result_sets = merged_hash.values.map!{|x| x.split(delimiter)}.transpose
|
95
|
+
final_result = []
|
96
|
+
|
97
|
+
result_sets.each do |rs|
|
98
|
+
temp_result = {}
|
99
|
+
merged_hash.keys.each do |k|
|
100
|
+
temp_result[k] = rs[merged_hash.keys.index(k)]
|
101
|
+
end
|
102
|
+
final_result << temp_result
|
103
|
+
end
|
104
|
+
hash_result = final_result
|
105
|
+
end
|
106
|
+
|
107
|
+
hash_result.each do |hash|
|
108
|
+
lines << "<item>"
|
109
|
+
hash.each do |key, value|
|
110
|
+
xml_tag = key.to_s
|
111
|
+
value = '' if value == '#empty#'
|
112
|
+
lines << " <#{xml_tag}>#{REXML::Text.normalize(value)}</#{xml_tag}>"
|
113
|
+
end
|
114
|
+
lines << "</item>"
|
115
|
+
end
|
116
|
+
return lines.join("\n")
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
def to_xml_lines
|
121
|
+
lines = []
|
122
|
+
children = self.select{ |child| child.has_content? }
|
123
|
+
if children.empty?
|
124
|
+
if result.is_a? String
|
125
|
+
lines << "<#{name}>#{result}</#{name}>"
|
126
|
+
elsif write_text && !to_s.empty?
|
127
|
+
lines << "<#{name}>#{ERB::Util.html_escape(to_s)}</#{name}>"
|
128
|
+
else
|
129
|
+
if @options[:default]
|
130
|
+
lines << "<#{name}>#{@options[:default]}</#{name}>"
|
131
|
+
else
|
132
|
+
lines << "<#{name}/>"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
else
|
136
|
+
lines << "<#{name}>"
|
137
|
+
lines << " #{ERB::Util.html_escape(to_s)}" if write_text && !to_s.empty?
|
138
|
+
children.each do |child|
|
139
|
+
lines.push(*child.to_xml_lines.map{ |line| " #{line}" })
|
140
|
+
end
|
141
|
+
lines << "</#{name}>"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
class ScrubytResult < ResultNode
|
3
|
+
attr_accessor :root_patterns, :source_file, :source_proc
|
4
|
+
|
5
|
+
def export
|
6
|
+
#Temporary solution; the real one will be back later - or not
|
7
|
+
result = <<-EXPLANATION
|
8
|
+
|
9
|
+
=== Extractor tree ===
|
10
|
+
|
11
|
+
export() is not working at the moment, due to the removal or ParseTree, ruby2ruby and RubyInline.
|
12
|
+
For now, in case you are using examples, you can replace them by hand based on the output below.
|
13
|
+
So if your pattern in the learning extractor looks like
|
14
|
+
|
15
|
+
book "Ruby Cookbook"
|
16
|
+
|
17
|
+
and you see the following below:
|
18
|
+
|
19
|
+
[book] /table[1]/tr/td[2]
|
20
|
+
|
21
|
+
then replace "Ruby Cookbook" with "/table[1]/tr/td[2]" (and all the other XPaths) and you are ready!
|
22
|
+
|
23
|
+
EXPLANATION
|
24
|
+
|
25
|
+
tree_builder = lambda do |node, level|
|
26
|
+
result += current_level = (" " * (level == 0 ? 0 : level-1) +
|
27
|
+
"|\n" * (level == 0 ? 0 : 1) +
|
28
|
+
" " * (level == 0 ? 0 : level-1) +
|
29
|
+
"+-- " * (level == 0 ? 0 : 1) +
|
30
|
+
"[#{node.name}]")
|
31
|
+
result += " #{node.filters[0].xpath}" if node.type == :tree
|
32
|
+
result += "\n"
|
33
|
+
|
34
|
+
node.children.each {|c| tree_builder[c, level+1]}
|
35
|
+
end
|
36
|
+
|
37
|
+
tree_builder[root_patterns[0],0]
|
38
|
+
|
39
|
+
result += "\n"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
#=<tt>Lookup of compund examples</tt>
|
3
|
+
#There are two types of string examples in scRUBYt! right now:
|
4
|
+
#the simple example and the compound example.
|
5
|
+
#
|
6
|
+
#This class is responsible for finding elements matched by compound examples.
|
7
|
+
#In the futre probably more sophisticated matching algorithms will be added
|
8
|
+
#(e.g. match the n-th which matches the text, or element that matches the
|
9
|
+
#text but also contains a specific attribute etc.)
|
10
|
+
class CompoundExampleLookup
|
11
|
+
def self.find_node_from_compund_example(doc, compound_example, next_link=false, index = 0)
|
12
|
+
@partial_results = []
|
13
|
+
self.lookup_compound_example(doc, compound_example, index)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
#Lookup the first element which is matched by this compund example
|
18
|
+
#
|
19
|
+
#A compound example is specified with :contains, :begins_with and
|
20
|
+
#:ends_with descriptors - which can be both regexps or strings
|
21
|
+
#
|
22
|
+
#Example:
|
23
|
+
#
|
24
|
+
#flight_info :begins_with => 'Arrival', :contains => /\d\d-\d+/, :ends_with => '20:00'
|
25
|
+
def self.lookup_compound_example(doc, compound_example, index)
|
26
|
+
compound_example.each do |k,v|
|
27
|
+
v = Regexp.escape(v) if v.is_a? String
|
28
|
+
case k
|
29
|
+
when :contains
|
30
|
+
v = /#{v}/
|
31
|
+
when :begins_with
|
32
|
+
v = /^\s*#{v}/
|
33
|
+
when :ends_with
|
34
|
+
v = /#{v}\s*$/
|
35
|
+
end
|
36
|
+
if (@partial_results.empty?)
|
37
|
+
@partial_results = SharedUtils.traverse_for_match(doc, v)
|
38
|
+
else
|
39
|
+
refine_partial_results(v)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@partial_results[index]
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.refine_partial_results(regexp)
|
46
|
+
@partial_results = @partial_results.select {|pr| pr.inner_html.gsub(/<.*?>/, '') =~ regexp}
|
47
|
+
end
|
48
|
+
|
49
|
+
end #End of class CompoundExampleLookup
|
50
|
+
end #End of module Scrubyt
|
@@ -0,0 +1,85 @@
|
|
1
|
+
class Module
|
2
|
+
def option_reader(key_default_hash)
|
3
|
+
key_default_hash.each do |key, default|
|
4
|
+
define_method(key) {
|
5
|
+
if @options[key].nil?
|
6
|
+
if default.is_a? Proc
|
7
|
+
instance_eval(&default)
|
8
|
+
else
|
9
|
+
default
|
10
|
+
end
|
11
|
+
else
|
12
|
+
@options[key]
|
13
|
+
end
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def option_writer(*keys)
|
19
|
+
keys.each do |key|
|
20
|
+
define_method("#{key.to_s}=".to_sym) { |value|
|
21
|
+
@options[key] = value
|
22
|
+
}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def option(key, default=nil, writable=false)
|
27
|
+
option_reader(key => default)
|
28
|
+
option_writer(key) if writable
|
29
|
+
end
|
30
|
+
|
31
|
+
def option_accessor(key_default_hash)
|
32
|
+
key_default_hash.each do |key, default|
|
33
|
+
option(key, default, true)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class Range
|
39
|
+
def <=>(other)
|
40
|
+
self.begin <=> other.begin
|
41
|
+
end
|
42
|
+
|
43
|
+
def +(amount)
|
44
|
+
(self.begin + amount)..(self.end + amount)
|
45
|
+
end
|
46
|
+
|
47
|
+
def -(amount)
|
48
|
+
(self.begin - amount)..(self.end - amount)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
module Math
|
53
|
+
def self.min(a, b)
|
54
|
+
a < b ? a : b
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.max(a, b)
|
58
|
+
a > b ? a : b
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
#dec 16: Dropped - causes some errors w/ Rails
|
63
|
+
#just some hack here to allow current examples' syntax:
|
64
|
+
#table_data.to_xml.write(open('result.xml', 'w'), 1)
|
65
|
+
#class String
|
66
|
+
# def write(stringio, add_indent=0)
|
67
|
+
# stringio.write((self.split("\n").collect { |line| (' ' * add_indent) + line }).join("\n"))
|
68
|
+
# end
|
69
|
+
#end
|
70
|
+
|
71
|
+
#hack to simulate ancestor::tag selector of XPAth
|
72
|
+
module Hpricot
|
73
|
+
class Elem
|
74
|
+
def ancestors(tag = nil)
|
75
|
+
element=self
|
76
|
+
path=Hpricot::Elements.new
|
77
|
+
while element.class != Hpricot::Doc do
|
78
|
+
return element if (tag && (tag ==element.name))
|
79
|
+
path.push element
|
80
|
+
element = element.parent
|
81
|
+
end
|
82
|
+
path
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
##
|
3
|
+
#=<tt>Utilities shared between the other utility classes (XPathUtils, SimpleExampleLookup,...)</tt>
|
4
|
+
#
|
5
|
+
class SharedUtils
|
6
|
+
#Entities to replace - need to make this more complete, or install htmlentities or similar package
|
7
|
+
ENTITIES = {
|
8
|
+
'quot' => '"',
|
9
|
+
'apos' => "'",
|
10
|
+
'amp' => '&',
|
11
|
+
'lt' => '<',
|
12
|
+
'gt' => '>',
|
13
|
+
'nbsp' => ' '}
|
14
|
+
|
15
|
+
def self.prepare_text_for_comparison(text)
|
16
|
+
unescape_entities text
|
17
|
+
text.strip!
|
18
|
+
text
|
19
|
+
end
|
20
|
+
|
21
|
+
#Unescape the entities in the HTML!
|
22
|
+
def self.unescape_entities(text)
|
23
|
+
ENTITIES.each {|e,s| text.gsub!(/\&#{e};/) {"#{s}"} }
|
24
|
+
text
|
25
|
+
end
|
26
|
+
|
27
|
+
#Entry point for finding the elements specified by examples
|
28
|
+
def self.traverse_for_match(node, regexp)
|
29
|
+
results = []
|
30
|
+
traverse_for_match_inner = lambda { |node, regexp|
|
31
|
+
ft = prepare_text_for_comparison(node.inner_html.gsub(/<.*?>/, ''))
|
32
|
+
if ft =~ regexp
|
33
|
+
node.instance_eval do
|
34
|
+
@match_data = $~
|
35
|
+
def match_data
|
36
|
+
@match_data
|
37
|
+
end
|
38
|
+
end
|
39
|
+
results << node
|
40
|
+
results.delete node.parent if node.is_a? Hpricot::Elem
|
41
|
+
end
|
42
|
+
node.children.each { |child| traverse_for_match_inner.call(child, regexp) if (child.is_a? Hpricot::Elem) } if ! node.children.nil?
|
43
|
+
}
|
44
|
+
traverse_for_match_inner.call(node,regexp)
|
45
|
+
results
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.get_backtrace
|
49
|
+
begin
|
50
|
+
raise
|
51
|
+
rescue Exception => ex
|
52
|
+
backtrace = ex.backtrace
|
53
|
+
end
|
54
|
+
backtrace.slice!(0)
|
55
|
+
backtrace
|
56
|
+
end
|
57
|
+
end #end of class SharedUtils
|
58
|
+
end #end of module Scrubyt
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
#=<tt>Lookup of simple examples</tt>
|
3
|
+
#There are two types of string examples in scRUBYt! right now:
|
4
|
+
#the simple example and the compound example.
|
5
|
+
#
|
6
|
+
#This class is responsible for finding elements matched by simple examples.
|
7
|
+
#In the futre probably more sophisticated matching algorithms will be added
|
8
|
+
#(e.g. match the n-th which matches the text, or element that matches the
|
9
|
+
#text but also contains a specific attribute etc.)
|
10
|
+
class SimpleExampleLookup
|
11
|
+
#From the example text defined by the user, find the lowest possible node which contains the text 'text'.
|
12
|
+
#The text can be also a mixed content text, e.g.
|
13
|
+
#
|
14
|
+
# <a>Bon <b>nuit</b>, monsieur!</a>
|
15
|
+
#
|
16
|
+
#In this case, <a>'s text is considered to be "Bon nuit, monsieur"
|
17
|
+
def self.find_node_from_text(doc, text, next_link=false, index = 0)
|
18
|
+
text.gsub!('»', '»')
|
19
|
+
#Process immediate attribute extraction (like "go to google.com/@href")
|
20
|
+
if text =~ /.+\/@.+$/
|
21
|
+
text = text.scan(/^(.+?)\/@.+$/)[0][0]
|
22
|
+
elsif text =~ /.+\[\d+\]$/
|
23
|
+
res = text.scan(/(.+)\[(\d+)\]$/)
|
24
|
+
text = res[0][0]
|
25
|
+
index = res[0][1].to_i
|
26
|
+
elsif text =~ /.+\[.+\]$/
|
27
|
+
final_element_name = text.scan(/^(.+?)\[/)[0][0]
|
28
|
+
text = text.scan(/\[(.+?)\]/)[0][0]
|
29
|
+
end
|
30
|
+
if final_element_name
|
31
|
+
text = Regexp.escape(text) if text.is_a? String
|
32
|
+
result = SharedUtils.traverse_for_match(doc,/#{text}/)[index]
|
33
|
+
result = XPathUtils.traverse_up_until_name(result,final_element_name)
|
34
|
+
else
|
35
|
+
text = Regexp.escape(text) if text.is_a? String
|
36
|
+
result = SharedUtils.traverse_for_match(doc,/^#{text}$/)[index]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end #End of class SimpleExampleLookup
|
40
|
+
end #End of module Scrubyt
|
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
module Scrubyt
|
5
|
+
##
|
6
|
+
#=<tt>Various XPath utility functions</tt>
|
7
|
+
class XPathUtils
|
8
|
+
|
9
|
+
#Find the LCA (Lowest Common Ancestor) of two nodes
|
10
|
+
def self.lowest_common_ancestor(node1, node2)
|
11
|
+
path1 = traverse_up(node1)
|
12
|
+
path2 = traverse_up(node2)
|
13
|
+
return node1.parent if path1 == path2
|
14
|
+
|
15
|
+
closure = nil
|
16
|
+
while (!path1.empty? && !path2.empty?)
|
17
|
+
closure = path1.pop
|
18
|
+
return closure.parent if (closure != path2.pop)
|
19
|
+
end
|
20
|
+
path1.size > path2.size ? path1.last.parent : path2.last.parent
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
#Generate XPath for the given node
|
25
|
+
#
|
26
|
+
#*parameters*
|
27
|
+
#
|
28
|
+
#_node_ - The node we are looking up the XPath for
|
29
|
+
#
|
30
|
+
#_stopnode_ - The Xpath generation is stopped and the XPath that
|
31
|
+
#was generated so far is returned if this node is reached.
|
32
|
+
#
|
33
|
+
#_write_indices_ - whether the index inside the parent shuold be
|
34
|
+
#added, as in html[1]/body[1]/table[2]/tr[1]/td[8]
|
35
|
+
def self.generate_XPath(node, stopnode=nil, write_indices=false)
|
36
|
+
path = []
|
37
|
+
indices = []
|
38
|
+
found = false
|
39
|
+
while !node.nil? && node.class != Hpricot::Doc do
|
40
|
+
if node == stopnode
|
41
|
+
found = true
|
42
|
+
break
|
43
|
+
end
|
44
|
+
path.push node.name
|
45
|
+
indices.push find_index(node) if write_indices
|
46
|
+
node = node.parent
|
47
|
+
end
|
48
|
+
#This condition ensures that if there is a stopnode, and we did not found it along the way,
|
49
|
+
#we return nil (since the stopnode is not contained in the path at all)
|
50
|
+
return nil if stopnode != nil && !found
|
51
|
+
result = ""
|
52
|
+
if write_indices
|
53
|
+
path.reverse.zip(indices.reverse).each { |node,index| result += "#{node}[#{index}]/" }
|
54
|
+
else
|
55
|
+
path.reverse.each{ |node| result += "#{node}/" }
|
56
|
+
end
|
57
|
+
"/" + result.chop
|
58
|
+
end
|
59
|
+
|
60
|
+
#Generate an XPath of the node with indices, relatively to the given
|
61
|
+
#relative_root.
|
62
|
+
#
|
63
|
+
#For example if the elem's absolute XPath is /a/b/c,
|
64
|
+
#and the relative root's Xpath is a/b, the result of the function will
|
65
|
+
#be /c.
|
66
|
+
def self.generate_relative_XPath( elem,relative_root )
|
67
|
+
return nil if (elem == relative_root)
|
68
|
+
generate_XPath(elem, relative_root, true)
|
69
|
+
end
|
70
|
+
|
71
|
+
#Generate a generalized XPath (i.e. without indices) of the node,
|
72
|
+
#relatively to the given relative_root.
|
73
|
+
#
|
74
|
+
#For example if the elem's absolute XPath is /a[1]/b[3]/c[5],
|
75
|
+
#and the relative root's Xpath is a[1]/b[3], the result of the function will
|
76
|
+
#be /c.
|
77
|
+
def self.generate_generalized_relative_XPath( elem,relative_root )
|
78
|
+
return nil if (elem == relative_root)
|
79
|
+
generate_XPath(elem, relative_root, false)
|
80
|
+
end
|
81
|
+
|
82
|
+
#Find an image based on the src of the example
|
83
|
+
#
|
84
|
+
#*parameters*
|
85
|
+
#
|
86
|
+
#_doc_ - The containing document
|
87
|
+
#
|
88
|
+
#_example_ - The value of the src attribute of the img tag
|
89
|
+
#This is convenient, since if the users rigth-clicks an image and
|
90
|
+
#copies image location, this string will be copied to the clipboard
|
91
|
+
#and thus can be easily pasted as an examle
|
92
|
+
#
|
93
|
+
#_index_ - there might be more images with the same src on the page -
|
94
|
+
#most typically the user will need the 0th - but if this is not the
|
95
|
+
#case, there is the possibility to override this
|
96
|
+
def self.find_image(doc, example, index=0)
|
97
|
+
if example =~ /\.(jpg|png|gif|jpeg)(\[\d+\])$/
|
98
|
+
res = example.scan(/(.+)\[(\d+)\]$/)
|
99
|
+
example = res[0][0]
|
100
|
+
index = res[0][1].to_i
|
101
|
+
end
|
102
|
+
(doc/"//img[@src='#{example}']")[index]
|
103
|
+
end
|
104
|
+
|
105
|
+
##
|
106
|
+
#Used to find the parent of a node with the given name - for example
|
107
|
+
#find the <form> node which is the parent of the <input> node
|
108
|
+
def self.traverse_up_until_name(node, name)
|
109
|
+
while node.class != Hpricot::Doc do
|
110
|
+
#raise "The element is nil! This probably means the widget with the specified name ('#{name}') does not exist" unless node
|
111
|
+
return nil unless node
|
112
|
+
break if node.name == name
|
113
|
+
node = node.parent
|
114
|
+
end
|
115
|
+
node
|
116
|
+
end
|
117
|
+
|
118
|
+
##
|
119
|
+
#Used when automatically looking up href attributes (for detail or next links)
|
120
|
+
#If the detail pattern did not extract a link, we first look up it's
|
121
|
+
#children - and if we don't find a link, traverse up
|
122
|
+
def self.find_nearest_node_with_attribute(node, attribute)
|
123
|
+
@node = nil
|
124
|
+
return node if node.is_a? Hpricot::Elem and node[attribute]
|
125
|
+
first_child_node_with_attribute(node, attribute)
|
126
|
+
first_parent_node_with_attribute(node, attribute) if !@node
|
127
|
+
@node
|
128
|
+
end
|
129
|
+
|
130
|
+
##
|
131
|
+
#Generalre relative XPath from two XPaths: a parent one, (which points higher in the tree),
|
132
|
+
#and a child one. The result of the method is the relative XPath of the node pointed to
|
133
|
+
#by the second XPath to the node pointed to by the firs XPath.
|
134
|
+
def self.generate_relative_XPath_from_XPaths(parent_xpath, child_xpath)
|
135
|
+
original_child_xpath_parts = child_xpath.split('/').reject{|s|s==""}
|
136
|
+
pairs = to_general_XPath(child_xpath).split('/').reject{|s|s==""}.zip to_general_XPath(parent_xpath).split('/').reject{|s|s==""}
|
137
|
+
i = 0
|
138
|
+
pairs.each_with_index do |pair,index|
|
139
|
+
i = index
|
140
|
+
break if pair[0] != pair[1]
|
141
|
+
end
|
142
|
+
"/" + original_child_xpath_parts[i..-1].join('/')
|
143
|
+
end
|
144
|
+
|
145
|
+
def self.to_full_XPath(doc, xpath, generalize)
|
146
|
+
elem = doc/xpath
|
147
|
+
elem = elem.map[0] if elem.is_a? Hpricot::Elements
|
148
|
+
XPathUtils.generate_XPath(elem, nil, generalize)
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
#Find the index of the child inside the parent
|
153
|
+
#For example:
|
154
|
+
#
|
155
|
+
# tr
|
156
|
+
# / | \
|
157
|
+
# td td td
|
158
|
+
# 0 1 2
|
159
|
+
#
|
160
|
+
#The last row contains the indices of the td's from the
|
161
|
+
#tow above.
|
162
|
+
#
|
163
|
+
#Note that in classic XPath, the indices start with 1 (rather
|
164
|
+
#than 0).
|
165
|
+
def self.find_index(node)
|
166
|
+
c = 0
|
167
|
+
node.parent.children.each do |child|
|
168
|
+
if child.class == Hpricot::Elem
|
169
|
+
c += 1 if (child.name == node.name)
|
170
|
+
break if (node == child)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
c
|
174
|
+
end
|
175
|
+
|
176
|
+
def self.traverse_up(node, stopnode=nil)
|
177
|
+
path = []
|
178
|
+
while node.class != Hpricot::Doc do
|
179
|
+
break if node == stopnode
|
180
|
+
path.push node
|
181
|
+
node = node.parent
|
182
|
+
end
|
183
|
+
path
|
184
|
+
end
|
185
|
+
|
186
|
+
def self.first_child_node_with_attribute(node, attribute)
|
187
|
+
return if !node.instance_of? Hpricot::Elem || @node
|
188
|
+
@node = node if node.attributes[attribute]
|
189
|
+
node.children.each { |child| first_child_node_with_attribute(child, attribute) }
|
190
|
+
end
|
191
|
+
|
192
|
+
def self.first_parent_node_with_attribute(node, attribute)
|
193
|
+
return if !node.instance_of? Hpricot::Elem || @node
|
194
|
+
@node = node if node.attributes[attribute]
|
195
|
+
first_parent_node_with_attribute(node.parent, attribute)
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.to_general_XPath(xpath)
|
199
|
+
xpath.gsub(/\[.+?\]/) {""}
|
200
|
+
end #End of method to_general_XPath
|
201
|
+
end #End of class XPathUtils
|
202
|
+
end #End of module Scrubyt
|
data/lib/scrubyt.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
if RUBY_VERSION < '1.9'
|
2
|
+
$KCODE = "u"
|
3
|
+
require "jcode"
|
4
|
+
end
|
5
|
+
|
6
|
+
#ruby core
|
7
|
+
require "open-uri"
|
8
|
+
require "erb"
|
9
|
+
|
10
|
+
#gems
|
11
|
+
require "rexml/text"
|
12
|
+
require "rubygems"
|
13
|
+
require "mechanize"
|
14
|
+
require "hpricot"
|
15
|
+
|
16
|
+
#scrubyt
|
17
|
+
require "#{File.dirname(__FILE__)}/scrubyt/logging"
|
18
|
+
require "#{File.dirname(__FILE__)}/scrubyt/utils/ruby_extensions.rb"
|
19
|
+
require "#{File.dirname(__FILE__)}/scrubyt/utils/xpathutils.rb"
|
20
|
+
require "#{File.dirname(__FILE__)}/scrubyt/utils/shared_utils.rb"
|
21
|
+
require "#{File.dirname(__FILE__)}/scrubyt/utils/simple_example_lookup.rb"
|
22
|
+
require "#{File.dirname(__FILE__)}/scrubyt/utils/compound_example_lookup.rb"
|
23
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/constraint_adder.rb"
|
24
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/constraint.rb"
|
25
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/result_indexer.rb"
|
26
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/pre_filter_document.rb"
|
27
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/compound_example.rb"
|
28
|
+
require "#{File.dirname(__FILE__)}/scrubyt/output/result_node.rb"
|
29
|
+
require "#{File.dirname(__FILE__)}/scrubyt/output/scrubyt_result.rb"
|
30
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/mechanize.rb"
|
31
|
+
|
32
|
+
# -- Making Firewatir optional --
|
33
|
+
begin
|
34
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/firewatir.rb"
|
35
|
+
rescue LoadError
|
36
|
+
puts "The gem firewatir is not installed, you'll be able to use Mechanize as the agent only"
|
37
|
+
end
|
38
|
+
# --
|
39
|
+
|
40
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/navigation_actions.rb"
|
41
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/fetch_action.rb"
|
42
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/shared/extractor.rb"
|
43
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/base_filter.rb"
|
44
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/attribute_filter.rb"
|
45
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/constant_filter.rb"
|
46
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/script_filter.rb"
|
47
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/text_filter.rb"
|
48
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/detail_page_filter.rb"
|
49
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/download_filter.rb"
|
50
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/html_subtree_filter.rb"
|
51
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/regexp_filter.rb"
|
52
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/tree_filter.rb"
|
53
|
+
require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/pattern.rb"
|
Binary file
|