scrubber-scrubyt 0.4.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/CHANGELOG +343 -0
  2. data/COPYING +340 -0
  3. data/README +99 -0
  4. data/Rakefile +101 -0
  5. data/lib/scrubyt/core/navigation/agents/firewatir.rb +249 -0
  6. data/lib/scrubyt/core/navigation/agents/mechanize.rb +253 -0
  7. data/lib/scrubyt/core/navigation/fetch_action.rb +54 -0
  8. data/lib/scrubyt/core/navigation/navigation_actions.rb +95 -0
  9. data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
  10. data/lib/scrubyt/core/scraping/constraint.rb +169 -0
  11. data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
  12. data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
  13. data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
  14. data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
  15. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
  16. data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
  17. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
  18. data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
  19. data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
  20. data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
  21. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
  22. data/lib/scrubyt/core/scraping/pattern.rb +359 -0
  23. data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
  24. data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
  25. data/lib/scrubyt/core/shared/extractor.rb +167 -0
  26. data/lib/scrubyt/logging.rb +154 -0
  27. data/lib/scrubyt/output/post_processor.rb +139 -0
  28. data/lib/scrubyt/output/result.rb +44 -0
  29. data/lib/scrubyt/output/result_dumper.rb +154 -0
  30. data/lib/scrubyt/output/result_node.rb +140 -0
  31. data/lib/scrubyt/output/scrubyt_result.rb +42 -0
  32. data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
  33. data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
  34. data/lib/scrubyt/utils/shared_utils.rb +58 -0
  35. data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
  36. data/lib/scrubyt/utils/xpathutils.rb +202 -0
  37. data/lib/scrubyt.rb +43 -0
  38. data/test/blackbox_test.rb +60 -0
  39. data/test/blackbox_tests/basic/multi_root.rb +6 -0
  40. data/test/blackbox_tests/basic/simple.rb +5 -0
  41. data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
  42. data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
  43. data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
  44. data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
  45. metadata +115 -0
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scrubber-scrubyt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.11
5
+ platform: ruby
6
+ authors:
7
+ - Peter Szinek
8
+ - Glenn Gillen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-01-31 00:00:00 -08:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: hpricot
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.5"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mechanize
27
+ version_requirement:
28
+ version_requirements: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.6.3
33
+ version:
34
+ description: scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!
35
+ email:
36
+ - peter@rubyrailways.com
37
+ - glenn.gillen@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - COPYING
46
+ - README
47
+ - CHANGELOG
48
+ - Rakefile
49
+ - lib/scrubyt/core/navigation/agents/firewatir.rb
50
+ - lib/scrubyt/core/navigation/agents/mechanize.rb
51
+ - lib/scrubyt/core/navigation/fetch_action.rb
52
+ - lib/scrubyt/core/navigation/navigation_actions.rb
53
+ - lib/scrubyt/core/scraping/compound_example.rb
54
+ - lib/scrubyt/core/scraping/constraint.rb
55
+ - lib/scrubyt/core/scraping/constraint_adder.rb
56
+ - lib/scrubyt/core/scraping/filters/attribute_filter.rb
57
+ - lib/scrubyt/core/scraping/filters/base_filter.rb
58
+ - lib/scrubyt/core/scraping/filters/constant_filter.rb
59
+ - lib/scrubyt/core/scraping/filters/detail_page_filter.rb
60
+ - lib/scrubyt/core/scraping/filters/download_filter.rb
61
+ - lib/scrubyt/core/scraping/filters/html_subtree_filter.rb
62
+ - lib/scrubyt/core/scraping/filters/regexp_filter.rb
63
+ - lib/scrubyt/core/scraping/filters/script_filter.rb
64
+ - lib/scrubyt/core/scraping/filters/text_filter.rb
65
+ - lib/scrubyt/core/scraping/filters/tree_filter.rb
66
+ - lib/scrubyt/core/scraping/pattern.rb
67
+ - lib/scrubyt/core/scraping/pre_filter_document.rb
68
+ - lib/scrubyt/core/scraping/result_indexer.rb
69
+ - lib/scrubyt/core/shared/extractor.rb
70
+ - lib/scrubyt/logging.rb
71
+ - lib/scrubyt/output/post_processor.rb
72
+ - lib/scrubyt/output/result.rb
73
+ - lib/scrubyt/output/result_dumper.rb
74
+ - lib/scrubyt/output/result_node.rb
75
+ - lib/scrubyt/output/scrubyt_result.rb
76
+ - lib/scrubyt/utils/compound_example_lookup.rb
77
+ - lib/scrubyt/utils/ruby_extensions.rb
78
+ - lib/scrubyt/utils/shared_utils.rb
79
+ - lib/scrubyt/utils/simple_example_lookup.rb
80
+ - lib/scrubyt/utils/xpathutils.rb
81
+ - lib/scrubyt.rb
82
+ has_rdoc: true
83
+ homepage: http://scrubyt.org/
84
+ post_install_message:
85
+ rdoc_options: []
86
+
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: "0"
94
+ version:
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: "0"
100
+ version:
101
+ requirements: []
102
+
103
+ rubyforge_project: scrubyt
104
+ rubygems_version: 1.2.0
105
+ signing_key:
106
+ specification_version: 2
107
+ summary: A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)
108
+ test_files:
109
+ - test/blackbox_test.rb
110
+ - test/blackbox_tests/basic/multi_root.rb
111
+ - test/blackbox_tests/basic/simple.rb
112
+ - test/blackbox_tests/detail_page/one_detail_page.rb
113
+ - test/blackbox_tests/detail_page/two_detail_pages.rb
114
+ - test/blackbox_tests/next_page/next_page_link.rb
115
+ - test/blackbox_tests/next_page/page_list_links.rb