pstuteville-scrubyt 0.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +355 -0
- data/COPYING +340 -0
- data/README.rdoc +121 -0
- data/Rakefile +120 -0
- data/VERSION +1 -0
- data/examples/README.txt +1 -0
- data/examples/events/delta/input.html +682 -0
- data/examples/events/delta/test.rb +16 -0
- data/examples/misc/auto_regex/input.html +22 -0
- data/examples/misc/auto_regex/test.rb +14 -0
- data/examples/misc/compound_example/advanced/test.rb +11 -0
- data/examples/misc/compound_example/advanced/tricky_compound.html +9 -0
- data/examples/misc/compound_example/regexp/regexp_compound.html +17 -0
- data/examples/misc/compound_example/regexp/test.rb +11 -0
- data/examples/misc/compound_example/simple/compound.html +5 -0
- data/examples/misc/compound_example/simple/test.rb +11 -0
- data/examples/misc/detail_page/detailpage.html +6 -0
- data/examples/misc/detail_page/index.html +9 -0
- data/examples/misc/detail_page/test.rb +17 -0
- data/examples/misc/google/test.rb +39 -0
- data/examples/misc/identical_examples/data_extractor_export.rb +12 -0
- data/examples/misc/identical_examples/input.html +16 -0
- data/examples/misc/identical_examples/test.rb +15 -0
- data/examples/misc/immediate_attribute_extraction/data_extractor_export.rb +10 -0
- data/examples/misc/immediate_attribute_extraction/input.html +16 -0
- data/examples/misc/immediate_attribute_extraction/test.rb +14 -0
- data/examples/misc/multiple_examples/input.html +7 -0
- data/examples/misc/multiple_examples/test.rb +22 -0
- data/examples/misc/on_click_next/next_page_link.rb +42 -0
- data/examples/misc/on_click_next/page_1.html +10 -0
- data/examples/misc/on_click_next/page_2.html +10 -0
- data/examples/misc/on_click_next/page_3.html +7 -0
- data/examples/misc/rubycorner/test.rb +29 -0
- data/examples/misc/rubyforge_login/test.rb +30 -0
- data/examples/misc/tables/ambigous_records/input.html +17 -0
- data/examples/misc/tables/ambigous_records/test.rb +37 -0
- data/examples/misc/tables/another_plain_table/input.html +15 -0
- data/examples/misc/tables/another_plain_table/test.rb +25 -0
- data/examples/misc/tables/complex_table/input.html +45 -0
- data/examples/misc/tables/complex_table/test.rb +30 -0
- data/examples/misc/tables/grab_rows/input.html +20 -0
- data/examples/misc/tables/grab_rows/test.rb +30 -0
- data/examples/misc/tables/plain_table/input.html +39 -0
- data/examples/misc/tables/plain_table/test.rb +35 -0
- data/examples/misc/tables/plain_table_morepages/2.html +38 -0
- data/examples/misc/tables/plain_table_morepages/3.html +33 -0
- data/examples/misc/tables/plain_table_morepages/input.html +40 -0
- data/examples/misc/tables/plain_table_morepages/test.rb +32 -0
- data/examples/misc/tables/plain_table_morepages_with_image/2.html +40 -0
- data/examples/misc/tables/plain_table_morepages_with_image/3.html +33 -0
- data/examples/misc/tables/plain_table_morepages_with_image/images/right_arrow.png +0 -0
- data/examples/misc/tables/plain_table_morepages_with_image/input.html +42 -0
- data/examples/misc/tables/plain_table_morepages_with_image/test.rb +32 -0
- data/examples/misc/tables/test_select_indices/input.html +46 -0
- data/examples/misc/tables/test_select_indices/test.rb +55 -0
- data/examples/misc/xpath_example_type/input.html +15 -0
- data/examples/misc/xpath_example_type/test.rb +18 -0
- data/examples/misc/yahoo_finance/test.rb +26 -0
- data/examples/social/blog_comment/test.rb +27 -0
- data/examples/social/del.icio.us/test.rb +22 -0
- data/examples/social/digg/test.rb +37 -0
- data/examples/social/dzone/test.rb +28 -0
- data/examples/social/linkedin/linkedin.rb +23 -0
- data/examples/social/reddit/test.rb +23 -0
- data/examples/tones_extractor_export.rb +23 -0
- data/examples/webshops/amazon/002-8212888-3924065.html +5311 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0130796034.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/020161622X.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0321223675.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0465067107.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0470069155.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0470081201.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596005253.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596101325.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0596523696.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0672328844.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0764596861.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0974514055.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0976694069.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616606.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616614.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/0977616630.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1590597362.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1594480060.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/1932394699.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/2841772101.jpg +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/amzn-logo-118w.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/askville-adwidget-bullet.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/askville-logo-sm-adwidget-white-bg.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/book_display_on_website-icon.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no-ns.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-no.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes-ns.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-inactive-yes.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-no-tiny.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/btn-yes-tiny.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/buybox-button-find-gifts-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/c7y_badge_rn_1.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/cap-a9-3.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/drop-down-icon-small-arrow.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/endcap-a9-go-2.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gb-open-new.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gc-logo-popover-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/gift-cert-roto-pop-a.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-button-books.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-button.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go-orange-trans.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/go_button_photo.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/logo-off.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-events-18134.js +1407 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-n2v1-57871.css +364 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-simplePopover-41153.js +749 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/n2CoreLibs-utilities-25439.js +1608 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/orange-arrow_002.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/popover-blurb.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/powered-by-a9.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-3-5.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-0.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-4-5.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/stars-5-0.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/tagline-adwidget.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/topnav-cart.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel.gif +0 -0
- data/examples/webshops/amazon/002-8212888-3924065_files/transparent-pixel_002.gif +0 -0
- data/examples/webshops/amazon/test.rb +27 -0
- data/examples/webshops/amazon-online/test.rb +34 -0
- data/examples/webshops/barnes_and_noble/test.rb +32 -0
- data/examples/webshops/barnes_offline/barnes_and_noble.html +115 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/10964080.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/10999676.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11018492.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11656534.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/11985045.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12052378.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12138286.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12533212.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/12533268.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/9583392.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/SearchProduct.css +626 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin3_gtpointup.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_aboutshipping.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_account.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_colon.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_giftreminder.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_help.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_orderstatus.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/admin_wishlist.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/bg.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/btnGoGrn.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/cleardot_002.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/dot4.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/dotGold20.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hdCantFind.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hdSearchResults.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_home_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/hgg_tab_toy_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/iframeKMP.js +172 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2.html +25 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/070226_mc_lnav_search.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XmlUtil.js +199 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/XslStyleSheet.js +1 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/kmp_gen.css +81 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview-core.js +337 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/product-preview.css +36 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/kmp_iframe_cds2_data/productpreview.js +11 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/linePagination.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/logo_bn05.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar.js +34 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/navbar_06.css +136 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/popup_open.js +116 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch3_vline_dots.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch4_search.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/qsearch_AdvSearch.jpg +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/subnav_colon.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_Bookclubs_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_bnjr_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_books_hot.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_dvd_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_giftcards_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_home_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_member_cc_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_music_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_pipe.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_textbooksonly_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_usedoop_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/tab_videogames_cold.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo3_rule.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/toppromo_fastfree05.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_btn_checkout.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_icon_cart.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/vcart_topbot_rule.gif +0 -0
- data/examples/webshops/barnes_offline/barnes_and_noble_files/visualcart_prodid.js +401 -0
- data/examples/webshops/barnes_offline/test.rb +30 -0
- data/examples/webshops/buydig/2_files/03AA1BB9089A4A6A92CF23F280D664EB.jpg +0 -0
- data/examples/webshops/buydig/2_files/1008.gif +0 -0
- data/examples/webshops/buydig/2_files/1013.gif +0 -0
- data/examples/webshops/buydig/2_files/1020.gif +0 -0
- data/examples/webshops/buydig/2_files/106CF2FB84B446518397517C3E6D5AD8.jpg +0 -0
- data/examples/webshops/buydig/2_files/13-www.gif +0 -0
- data/examples/webshops/buydig/2_files/1E9BB2E56AB145FC9D6EF952703AF476.jpg +0 -0
- data/examples/webshops/buydig/2_files/1FCDFBE85CDB4D429EC2C8CB24D20457.jpg +0 -0
- data/examples/webshops/buydig/2_files/1pix.gif +0 -0
- data/examples/webshops/buydig/2_files/2014.gif +0 -0
- data/examples/webshops/buydig/2_files/2089.gif +0 -0
- data/examples/webshops/buydig/2_files/24992_medal.gif +0 -0
- data/examples/webshops/buydig/2_files/24BBCBA1397F4DDCBBBBE8456D6D6E5B.jpg +0 -0
- data/examples/webshops/buydig/2_files/281F8A6019B140F38DFD45EB7B69B0FB.jpg +0 -0
- data/examples/webshops/buydig/2_files/2975F866CB2149F7ACBC559C8E24E304.jpg +0 -0
- data/examples/webshops/buydig/2_files/316FC9256DC9460ABC3C5ECAF6C60286.jpg +0 -0
- data/examples/webshops/buydig/2_files/50569327D8B94252B95E449AE470E505.jpg +0 -0
- data/examples/webshops/buydig/2_files/519CDAB404FA4543B76B5F281468ACBF.jpg +0 -0
- data/examples/webshops/buydig/2_files/57D6146419A647BA89C96AF0B5CAB03C.jpg +0 -0
- data/examples/webshops/buydig/2_files/58E3F988E184448B8C0A59874AE123A8.jpg +0 -0
- data/examples/webshops/buydig/2_files/5E5B10197A4E4C9B9ECCD6309DBE4C54.jpg +0 -0
- data/examples/webshops/buydig/2_files/609A249177D04065B37B9161CB0BC92D.jpg +0 -0
- data/examples/webshops/buydig/2_files/676CEE8E53C2445982E991871B4DF613.jpg +0 -0
- data/examples/webshops/buydig/2_files/712BA08FAB524A31A76ABB9E2009FF8E.jpg +0 -0
- data/examples/webshops/buydig/2_files/734BD08D7A5049339393166491D09D21.jpg +0 -0
- data/examples/webshops/buydig/2_files/751E72B7003343248497FE6905F80787.jpg +0 -0
- data/examples/webshops/buydig/2_files/76493D4F02F14EF7B5886510604C7BB4.jpg +0 -0
- data/examples/webshops/buydig/2_files/79521E251278486DB29529C60C9D012A.jpg +0 -0
- data/examples/webshops/buydig/2_files/9C9AF82AC3B54BDC8C705278B50FDFD6.jpg +0 -0
- data/examples/webshops/buydig/2_files/BC3FD8307B9948FDB7EEF156D8629C37.jpg +0 -0
- data/examples/webshops/buydig/2_files/C0DD4574765047D1836F505E69DC8AE5.jpg +0 -0
- data/examples/webshops/buydig/2_files/C143F48515274A44B04F4B3E46306BD2.jpg +0 -0
- data/examples/webshops/buydig/2_files/C6B02E88F729464699DB275D140F4563.jpg +0 -0
- data/examples/webshops/buydig/2_files/CE334D6206DB4FA9AFDF339AEF0AF50F.jpg +0 -0
- data/examples/webshops/buydig/2_files/D66AE0DC865A4021AB300ED3A0C4CD11.jpg +0 -0
- data/examples/webshops/buydig/2_files/DEA2EC2093DC474D96B651068576DAE5.jpg +0 -0
- data/examples/webshops/buydig/2_files/F547677D83844042BF13A4BE6523BB50.jpg +0 -0
- data/examples/webshops/buydig/2_files/Rbbbonlineseal.gif +0 -0
- data/examples/webshops/buydig/2_files/TopSellers_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/TopSellers_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/addToFavorites_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/banner_CorporateSales.gif +0 -0
- data/examples/webshops/buydig/2_files/banner_Shipping.gif +0 -0
- data/examples/webshops/buydig/2_files/bizratehonoree.gif +0 -0
- data/examples/webshops/buydig/2_files/btn_submit.gif +0 -0
- data/examples/webshops/buydig/2_files/checkFlash.js +110 -0
- data/examples/webshops/buydig/2_files/checkFlash2.js +109 -0
- data/examples/webshops/buydig/2_files/cnetcertified.gif +0 -0
- data/examples/webshops/buydig/2_files/credPriceGrabber.gif +0 -0
- data/examples/webshops/buydig/2_files/credShopping.gif +0 -0
- data/examples/webshops/buydig/2_files/credential_paypal.gif +0 -0
- data/examples/webshops/buydig/2_files/credentials.gif +0 -0
- data/examples/webshops/buydig/2_files/dealtime.gif +0 -0
- data/examples/webshops/buydig/2_files/dvxstyle.css +754 -0
- data/examples/webshops/buydig/2_files/footer_021306_1_v1.gif +0 -0
- data/examples/webshops/buydig/2_files/func.js +132 -0
- data/examples/webshops/buydig/2_files/getseal +1 -0
- data/examples/webshops/buydig/2_files/help.gif +0 -0
- data/examples/webshops/buydig/2_files/home.gif +0 -0
- data/examples/webshops/buydig/2_files/java.js +155 -0
- data/examples/webshops/buydig/2_files/leftnv_help.gif +0 -0
- data/examples/webshops/buydig/2_files/logo.gif +0 -0
- data/examples/webshops/buydig/2_files/logo2.gif +0 -0
- data/examples/webshops/buydig/2_files/logo3.gif +0 -0
- data/examples/webshops/buydig/2_files/main.js +227 -0
- data/examples/webshops/buydig/2_files/mastercard_secured.gif +0 -0
- data/examples/webshops/buydig/2_files/newsBox_bkg.jpg +0 -0
- data/examples/webshops/buydig/2_files/newsBox_bottom.jpg +0 -0
- data/examples/webshops/buydig/2_files/newsBox_text.gif +0 -0
- data/examples/webshops/buydig/2_files/newsBox_ttl.jpg +0 -0
- data/examples/webshops/buydig/2_files/noimage75.gif +0 -0
- data/examples/webshops/buydig/2_files/orangeleftfrmbtm.gif +0 -0
- data/examples/webshops/buydig/2_files/pixel153.gif +0 -0
- data/examples/webshops/buydig/2_files/rightnv_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/search_btn_off.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c1.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c2.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c3.gif +0 -0
- data/examples/webshops/buydig/2_files/search_c4.gif +0 -0
- data/examples/webshops/buydig/2_files/search_down.gif +0 -0
- data/examples/webshops/buydig/2_files/search_left.gif +0 -0
- data/examples/webshops/buydig/2_files/search_right.gif +0 -0
- data/examples/webshops/buydig/2_files/search_top.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_bottom.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_bullet.gif +0 -0
- data/examples/webshops/buydig/2_files/siteLinks_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/spacer.gif +0 -0
- data/examples/webshops/buydig/2_files/style.js +45 -0
- data/examples/webshops/buydig/2_files/styles.html +33 -0
- data/examples/webshops/buydig/2_files/track_orders.jpg +0 -0
- data/examples/webshops/buydig/2_files/urchin +534 -0
- data/examples/webshops/buydig/2_files/verified_by_visa.gif +0 -0
- data/examples/webshops/buydig/2_files/welcome.gif +0 -0
- data/examples/webshops/buydig/2_files/welcome_ttl.gif +0 -0
- data/examples/webshops/buydig/2_files/yahoologo.gif +0 -0
- data/examples/webshops/buydig/input.html +1194 -0
- data/examples/webshops/buydig/test.rb +31 -0
- data/examples/webshops/ebay/test.rb +32 -0
- data/examples/webshops/finewines_offline/_finewines.html +1739 -0
- data/examples/webshops/finewines_offline/_finewines_cut.html +371 -0
- data/examples/webshops/finewines_offline/_finewines_files/011064.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/012674.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013268.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013300.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/013409.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/014340.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015073.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015255.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015479.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/015487.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017038.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017129.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017145.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017152.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017285.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017392.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/017400.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/019778.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/019786.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/020503.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021253.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021279.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021337.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/021352.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023002.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023135.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023143.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/023788.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024166.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024182.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/024216.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027268.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027516.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/027862.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/028118.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/028936.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033894.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033902.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033910.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033936.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033944.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/033951.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/034553.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/034561.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/232439.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/237834.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/268359.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/289082.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/331603.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/369686.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/420257.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/422014.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/460410.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/480533.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/556795.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/597054.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/650606.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/652628.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/653790.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/658450.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/660027.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/660951.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/684514.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/685131.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/686865.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/699330.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703017.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703140.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/703850.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/717306.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/900274.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/938225.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/947440.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/951319.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/967893.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/981407.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/981613.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/982421.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/985598.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/986737.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/987503.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/992800.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/VintageslogoEN.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/blanc-up.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/btn_vintages_latest.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/cc_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/common.js +860 -0
- data/examples/webshops/finewines_offline/_finewines_files/drink.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/drinkhold.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ec_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ev_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/hold.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/index-wines-features.jpg +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/indexSTYLE.css +398 -0
- data/examples/webshops/finewines_offline/_finewines_files/keyword_search.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/mm_menu.js +1 -0
- data/examples/webshops/finewines_offline/_finewines_files/nr_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/ontario_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/popup.js +81 -0
- data/examples/webshops/finewines_offline/_finewines_files/releases_nav.js +229 -0
- data/examples/webshops/finewines_offline/_finewines_files/so_en.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/spacer.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/top.gif +0 -0
- data/examples/webshops/finewines_offline/_finewines_files/urchin.js +576 -0
- data/examples/webshops/finewines_offline/_finewines_files/wom_en.gif +0 -0
- data/examples/webshops/finewines_offline/test.rb +30 -0
- data/examples/webshops/us1camera/1_files/1pix.gif +0 -0
- data/examples/webshops/us1camera/1_files/1pix_002.gif +0 -0
- data/examples/webshops/us1camera/1_files/CnetCertified.gif +0 -0
- data/examples/webshops/us1camera/1_files/CyberSource.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_002.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_003.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_004.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_005.gif +0 -0
- data/examples/webshops/us1camera/1_files/Images50_006.gif +0 -0
- data/examples/webshops/us1camera/1_files/PriceGrabber.gif +0 -0
- data/examples/webshops/us1camera/1_files/QSearch.gif +0 -0
- data/examples/webshops/us1camera/1_files/ban-m.jpg +0 -0
- data/examples/webshops/us1camera/1_files/banner1.bin +0 -0
- data/examples/webshops/us1camera/1_files/banner3.bin +0 -0
- data/examples/webshops/us1camera/1_files/block1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block2.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block3.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block4.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block5.jpg +0 -0
- data/examples/webshops/us1camera/1_files/block6.jpg +0 -0
- data/examples/webshops/us1camera/1_files/bos.js +280 -0
- data/examples/webshops/us1camera/1_files/box1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box2.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box3.jpg +0 -0
- data/examples/webshops/us1camera/1_files/box4.jpg +0 -0
- data/examples/webshops/us1camera/1_files/dot.jpg +0 -0
- data/examples/webshops/us1camera/1_files/eDevix.gif +0 -0
- data/examples/webshops/us1camera/1_files/electronics1.jpg +0 -0
- data/examples/webshops/us1camera/1_files/getseal +1 -0
- data/examples/webshops/us1camera/1_files/pride.jpg +0 -0
- data/examples/webshops/us1camera/1_files/search.jpg +0 -0
- data/examples/webshops/us1camera/1_files/sidebutton.jpg +0 -0
- data/examples/webshops/us1camera/1_files/sslroilogic.js +49 -0
- data/examples/webshops/us1camera/1_files/style.css +1 -0
- data/examples/webshops/us1camera/1_files/tl.html +2 -0
- data/examples/webshops/us1camera/input.html +548 -0
- data/examples/webshops/us1camera/test.rb +37 -0
- data/lib/scrubyt/core/navigation/agents/firewatir.rb +285 -0
- data/lib/scrubyt/core/navigation/agents/mechanize.rb +315 -0
- data/lib/scrubyt/core/navigation/fetch_action.rb +63 -0
- data/lib/scrubyt/core/navigation/navigation_actions.rb +107 -0
- data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
- data/lib/scrubyt/core/scraping/constraint.rb +169 -0
- data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
- data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
- data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
- data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
- data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
- data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
- data/lib/scrubyt/core/scraping/pattern.rb +359 -0
- data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
- data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
- data/lib/scrubyt/core/shared/extractor.rb +171 -0
- data/lib/scrubyt/logging.rb +154 -0
- data/lib/scrubyt/output/post_processor.rb +139 -0
- data/lib/scrubyt/output/result.rb +44 -0
- data/lib/scrubyt/output/result_dumper.rb +154 -0
- data/lib/scrubyt/output/result_node.rb +145 -0
- data/lib/scrubyt/output/scrubyt_result.rb +42 -0
- data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
- data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
- data/lib/scrubyt/utils/shared_utils.rb +58 -0
- data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
- data/lib/scrubyt/utils/xpathutils.rb +202 -0
- data/lib/scrubyt.rb +53 -0
- data/pkg/scrubyt-0.4.31.gem +0 -0
- data/resources/allison/LICENSE +184 -0
- data/resources/allison/README +37 -0
- data/resources/allison/allison.css +301 -0
- data/resources/allison/allison.gif +0 -0
- data/resources/allison/allison.js +307 -0
- data/resources/allison/allison.rb +287 -0
- data/resources/allison/cache/BODY +588 -0
- data/resources/allison/cache/CLASS_INDEX +4 -0
- data/resources/allison/cache/CLASS_PAGE +1 -0
- data/resources/allison/cache/FILE_INDEX +4 -0
- data/resources/allison/cache/FILE_PAGE +1 -0
- data/resources/allison/cache/FONTS +1 -0
- data/resources/allison/cache/FR_INDEX_BODY +1 -0
- data/resources/allison/cache/IMGPATH +1 -0
- data/resources/allison/cache/INDEX +1 -0
- data/resources/allison/cache/JAVASCRIPT +307 -0
- data/resources/allison/cache/METHOD_INDEX +4 -0
- data/resources/allison/cache/METHOD_LIST +1 -0
- data/resources/allison/cache/SRC_PAGE +1 -0
- data/resources/allison/cache/STYLE +323 -0
- data/resources/allison/cache/URL +1 -0
- data/scrubyt.gemspec +609 -0
- data/test/blackbox_test.rb +60 -0
- data/test/blackbox_tests/basic/multi_root.expected.xml +8 -0
- data/test/blackbox_tests/basic/multi_root.rb +6 -0
- data/test/blackbox_tests/basic/simple.expected.xml +5 -0
- data/test/blackbox_tests/basic/simple.rb +5 -0
- data/test/blackbox_tests/basic/three_divs.html +12 -0
- data/test/blackbox_tests/detail_page/detail_page_1.html +7 -0
- data/test/blackbox_tests/detail_page/detail_page_2.html +7 -0
- data/test/blackbox_tests/detail_page/main_page_1.html +5 -0
- data/test/blackbox_tests/detail_page/main_page_2.html +6 -0
- data/test/blackbox_tests/detail_page/one_detail_page.expected.xml +7 -0
- data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
- data/test/blackbox_tests/detail_page/two_detail_pages.expected.xml +12 -0
- data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
- data/test/blackbox_tests/next_page/next_page_link.expected.xml +11 -0
- data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
- data/test/blackbox_tests/next_page/page_1.html +11 -0
- data/test/blackbox_tests/next_page/page_2.html +11 -0
- data/test/blackbox_tests/next_page/page_3.html +7 -0
- data/test/blackbox_tests/next_page/page_list_links.expected.xml +11 -0
- data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
- data/test/blackbox_tests/next_page/page_list_links.tofix +7 -0
- data/todo/backlog.txt +73 -0
- data/todo/scenario_ideas.txt +19 -0
- metadata +637 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
$lib_path = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$:.unshift $lib_path
|
3
|
+
|
4
|
+
require 'scrubyt'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
def perform_test(test_path, detailed = false)
|
8
|
+
out = $stdout
|
9
|
+
$stdout = StringIO.new unless detailed
|
10
|
+
cwd = Dir.getwd
|
11
|
+
Dir.chdir(File.dirname(test_path))
|
12
|
+
|
13
|
+
out.puts "Test: #{test_path}" if detailed
|
14
|
+
out.puts "========== Print Output ==========" if detailed
|
15
|
+
|
16
|
+
begin
|
17
|
+
expected_xml = File.read(File.basename(test_path)[0..-4] + ".expected.xml")
|
18
|
+
|
19
|
+
scrubyt_result_native = Scrubyt::Extractor.load(File.basename(test_path))
|
20
|
+
|
21
|
+
exported_code = scrubyt_result_native.export({:template => 'lambda'})
|
22
|
+
scrubyt_result_exported = Scrubyt::Extractor.define(&eval(exported_code))
|
23
|
+
ensure
|
24
|
+
if detailed
|
25
|
+
out.puts "========== Native Extractor =========="
|
26
|
+
out.puts IO.read(File.basename(test_path))
|
27
|
+
out.puts "========== Exported Extractor =========="
|
28
|
+
out.puts exported_code
|
29
|
+
out.puts "========== Expected =========="
|
30
|
+
out.puts expected_xml
|
31
|
+
out.puts "========== Result (native) =========="
|
32
|
+
out.puts scrubyt_result_native.to_xml
|
33
|
+
out.puts "========== Result (exported) =========="
|
34
|
+
out.puts scrubyt_result_exported.to_xml
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
assert_equal expected_xml, scrubyt_result_native.to_xml
|
39
|
+
assert_equal expected_xml, scrubyt_result_exported.to_xml
|
40
|
+
ensure
|
41
|
+
Dir.chdir(cwd)
|
42
|
+
$stdout = out
|
43
|
+
end
|
44
|
+
|
45
|
+
if $0 == __FILE__ && ARGV[0]
|
46
|
+
include Test::Unit::Assertions
|
47
|
+
perform_test(ARGV[0], true)
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
|
51
|
+
class BlackboxTest < Test::Unit::TestCase
|
52
|
+
tests = Dir.glob(File.join(File.dirname(__FILE__), 'blackbox_tests', '**', '*.rb'))
|
53
|
+
tests = tests.sort
|
54
|
+
|
55
|
+
tests.each do |test_path|
|
56
|
+
define_method("test_#{test_path.gsub('/', '_')}") do
|
57
|
+
perform_test(test_path)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/todo/backlog.txt
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
scRUBYt! Backlog
|
2
|
+
|
3
|
+
[NEW] Rails runner - possibility to upload (or type into a form) an extractor, launch it and get back the result
|
4
|
+
[NEW] when crawling to more pages, save the XMLs one by one rather than once at the end
|
5
|
+
|
6
|
+
[NEW] select indices should evaluate as much results as needed (if possible - if there is :last for example, this is not possiblle)
|
7
|
+
|
8
|
+
[IDEA] One thing I’d like to suggest is the use of a proxy to record interactions with a webpage and automatically allow the user to select items to parse or record a user’s interactions, generate the right script to get the user to that page (including POSTs / GETS) and let the user then select items to parse out of the target page.
|
9
|
+
Discuss this with the others
|
10
|
+
|
11
|
+
[NEW] exporting of crawling-to-next-page pattern (xpath, '.*' => '.*/page{num}')
|
12
|
+
TODO: CHEATSHEET!
|
13
|
+
TODO: all_img is substitute for
|
14
|
+
all_img '//img' etc.
|
15
|
+
i.e. if a pattern name is also a html tag, behave like this
|
16
|
+
TODO: lotsa unit tests
|
17
|
+
TODO: to_csv, to_txt, to_rss, to_ruport, to_class, to_db, to_whatever (Tim Fletcher)
|
18
|
+
TODO: simple xslt transformation
|
19
|
+
TODO: ensure_value_is_greater_than 10
|
20
|
+
|
21
|
+
TODO: Performance optimization (check that nothing is loaded twice, probably just reference the
|
22
|
+
input document instead of using it as a hash key twice and hash value once etc.)
|
23
|
+
Discuss this with the others
|
24
|
+
|
25
|
+
TODO: Possibility of explicitly define example type - for example it may be possible that
|
26
|
+
I want to match an XPath on the page and thus the example may look like '/html/body/table[1]'
|
27
|
+
yet it is not an XPath but a String example; in these cases, possibility to say:
|
28
|
+
|
29
|
+
xpath_stuff '/html/body/table[1]', :type => :string
|
30
|
+
TODO: Compound pattern, e.g.:
|
31
|
+
<item>
|
32
|
+
<price>$1.99</price>
|
33
|
+
<shipping>$8.01</shipping>
|
34
|
+
<total>$10.00</total> --> This is the compund pattern
|
35
|
+
</item>
|
36
|
+
|
37
|
+
This should look something like:
|
38
|
+
item do
|
39
|
+
price '$1.99'
|
40
|
+
shipping '$8.01'
|
41
|
+
total 'price', 'shipping', :type => compound, :function => :add_prices
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_prices(price, shipping)
|
45
|
+
price(1..price.size).to_i + price(1..shipping.size).to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
TODO: setup Joomla (move it to extractors.scrubyt.org)
|
49
|
+
TODO: refactor the whole constraint system
|
50
|
+
TODO: unit tests!!!
|
51
|
+
|
52
|
+
TODO: Widgets module - e.g. Widgets::Dropdown
|
53
|
+
usage (victor from mexico):
|
54
|
+
|
55
|
+
Widgets::Dropdown('houses').each_value do |house|
|
56
|
+
Widgets::Dropdown('districts').each_value do |district|
|
57
|
+
scrape ( house, district )
|
58
|
+
|
59
|
+
TODO: RSS feed on the recent changes
|
60
|
+
|
61
|
+
TODO: to_db(pattern_name)
|
62
|
+
e.g.
|
63
|
+
|
64
|
+
root
|
65
|
+
--- book
|
66
|
+
--- title
|
67
|
+
--- author
|
68
|
+
|
69
|
+
to_db(book) will create a table book with columns title and author
|
70
|
+
TODO: self correcting wrappers; see Assaf's ideas (after learning, save the CSS selectors, IDs, classes etc) and
|
71
|
+
re-learn from those)
|
72
|
+
|
73
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
check out OPENKAPOW!!!
|
2
|
+
|
3
|
+
- ibm spidering article rewrite
|
4
|
+
- table to csv
|
5
|
+
- cia factbook
|
6
|
+
- book price comparison
|
7
|
+
- cnn news headlines
|
8
|
+
- google news
|
9
|
+
- Scenario "antisocial": watch del.icio.us, dzone and digg articles on one page!
|
10
|
+
* (possibly requires more wrappers in one extractor - or more extractors?) + some nice
|
11
|
+
* XSLT with CSS
|
12
|
+
* something like:
|
13
|
+
.) scenario.watch(:delicious, 'Peter's super article')
|
14
|
+
.) scenario.watch(:digg, 'xxx')
|
15
|
+
- dreamhost spam subjects
|
16
|
+
- James Brit 'coffee shop' example
|
17
|
+
|
18
|
+
|
19
|
+
|