scrubber-scrubyt 0.4.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/CHANGELOG +343 -0
  2. data/COPYING +340 -0
  3. data/README +99 -0
  4. data/Rakefile +101 -0
  5. data/lib/scrubyt/core/navigation/agents/firewatir.rb +249 -0
  6. data/lib/scrubyt/core/navigation/agents/mechanize.rb +253 -0
  7. data/lib/scrubyt/core/navigation/fetch_action.rb +54 -0
  8. data/lib/scrubyt/core/navigation/navigation_actions.rb +95 -0
  9. data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
  10. data/lib/scrubyt/core/scraping/constraint.rb +169 -0
  11. data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
  12. data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
  13. data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
  14. data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
  15. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
  16. data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
  17. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
  18. data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
  19. data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
  20. data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
  21. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
  22. data/lib/scrubyt/core/scraping/pattern.rb +359 -0
  23. data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
  24. data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
  25. data/lib/scrubyt/core/shared/extractor.rb +167 -0
  26. data/lib/scrubyt/logging.rb +154 -0
  27. data/lib/scrubyt/output/post_processor.rb +139 -0
  28. data/lib/scrubyt/output/result.rb +44 -0
  29. data/lib/scrubyt/output/result_dumper.rb +154 -0
  30. data/lib/scrubyt/output/result_node.rb +140 -0
  31. data/lib/scrubyt/output/scrubyt_result.rb +42 -0
  32. data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
  33. data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
  34. data/lib/scrubyt/utils/shared_utils.rb +58 -0
  35. data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
  36. data/lib/scrubyt/utils/xpathutils.rb +202 -0
  37. data/lib/scrubyt.rb +43 -0
  38. data/test/blackbox_test.rb +60 -0
  39. data/test/blackbox_tests/basic/multi_root.rb +6 -0
  40. data/test/blackbox_tests/basic/simple.rb +5 -0
  41. data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
  42. data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
  43. data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
  44. data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
  45. metadata +115 -0
@@ -0,0 +1,50 @@
1
+ module Scrubyt
2
+ #=<tt>Lookup of compund examples</tt>
3
+ #There are two types of string examples in scRUBYt! right now:
4
+ #the simple example and the compound example.
5
+ #
6
+ #This class is responsible for finding elements matched by compound examples.
7
+ #In the futre probably more sophisticated matching algorithms will be added
8
+ #(e.g. match the n-th which matches the text, or element that matches the
9
+ #text but also contains a specific attribute etc.)
10
+ class CompoundExampleLookup
11
+ def self.find_node_from_compund_example(doc, compound_example, next_link=false, index = 0)
12
+ @partial_results = []
13
+ self.lookup_compound_example(doc, compound_example, index)
14
+ end
15
+
16
+ private
17
+ #Lookup the first element which is matched by this compund example
18
+ #
19
+ #A compound example is specified with :contains, :begins_with and
20
+ #:ends_with descriptors - which can be both regexps or strings
21
+ #
22
+ #Example:
23
+ #
24
+ #flight_info :begins_with => 'Arrival', :contains => /\d\d-\d+/, :ends_with => '20:00'
25
+ def self.lookup_compound_example(doc, compound_example, index)
26
+ compound_example.each do |k,v|
27
+ v = Regexp.escape(v) if v.is_a? String
28
+ case k
29
+ when :contains
30
+ v = /#{v}/
31
+ when :begins_with
32
+ v = /^\s*#{v}/
33
+ when :ends_with
34
+ v = /#{v}\s*$/
35
+ end
36
+ if (@partial_results.empty?)
37
+ @partial_results = SharedUtils.traverse_for_match(doc, v)
38
+ else
39
+ refine_partial_results(v)
40
+ end
41
+ end
42
+ @partial_results[index]
43
+ end
44
+
45
+ def self.refine_partial_results(regexp)
46
+ @partial_results = @partial_results.select {|pr| pr.inner_html.gsub(/<.*?>/, '') =~ regexp}
47
+ end
48
+
49
+ end #End of class CompoundExampleLookup
50
+ end #End of module Scrubyt
@@ -0,0 +1,85 @@
1
+ class Module
2
+ def option_reader(key_default_hash)
3
+ key_default_hash.each do |key, default|
4
+ define_method(key) {
5
+ if @options[key].nil?
6
+ if default.is_a? Proc
7
+ instance_eval(&default)
8
+ else
9
+ default
10
+ end
11
+ else
12
+ @options[key]
13
+ end
14
+ }
15
+ end
16
+ end
17
+
18
+ def option_writer(*keys)
19
+ keys.each do |key|
20
+ define_method("#{key.to_s}=".to_sym) { |value|
21
+ @options[key] = value
22
+ }
23
+ end
24
+ end
25
+
26
+ def option(key, default=nil, writable=false)
27
+ option_reader(key => default)
28
+ option_writer(key) if writable
29
+ end
30
+
31
+ def option_accessor(key_default_hash)
32
+ key_default_hash.each do |key, default|
33
+ option(key, default, true)
34
+ end
35
+ end
36
+ end
37
+
38
+ class Range
39
+ def <=>(other)
40
+ self.begin <=> other.begin
41
+ end
42
+
43
+ def +(amount)
44
+ (self.begin + amount)..(self.end + amount)
45
+ end
46
+
47
+ def -(amount)
48
+ (self.begin - amount)..(self.end - amount)
49
+ end
50
+ end
51
+
52
+ module Math
53
+ def self.min(a, b)
54
+ a < b ? a : b
55
+ end
56
+
57
+ def self.max(a, b)
58
+ a > b ? a : b
59
+ end
60
+ end
61
+
62
+ #dec 16: Dropped - causes some errors w/ Rails
63
+ #just some hack here to allow current examples' syntax:
64
+ #table_data.to_xml.write(open('result.xml', 'w'), 1)
65
+ #class String
66
+ # def write(stringio, add_indent=0)
67
+ # stringio.write((self.split("\n").collect { |line| (' ' * add_indent) + line }).join("\n"))
68
+ # end
69
+ #end
70
+
71
+ #hack to simulate ancestor::tag selector of XPAth
72
+ module Hpricot
73
+ class Elem
74
+ def ancestors(tag = nil)
75
+ element=self
76
+ path=Hpricot::Elements.new
77
+ while element.class != Hpricot::Doc do
78
+ return element if (tag && (tag ==element.name))
79
+ path.push element
80
+ element = element.parent
81
+ end
82
+ path
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,58 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Utilities shared between the other utility classes (XPathUtils, SimpleExampleLookup,...)</tt>
4
+ #
5
+ class SharedUtils
6
+ #Entities to replace - need to make this more complete, or install htmlentities or similar package
7
+ ENTITIES = {
8
+ 'quot' => '"',
9
+ 'apos' => "'",
10
+ 'amp' => '&',
11
+ 'lt' => '<',
12
+ 'gt' => '>',
13
+ 'nbsp' => ' '}
14
+
15
+ def self.prepare_text_for_comparison(text)
16
+ unescape_entities text
17
+ text.strip!
18
+ text
19
+ end
20
+
21
+ #Unescape the entities in the HTML!
22
+ def self.unescape_entities(text)
23
+ ENTITIES.each {|e,s| text.gsub!(/\&#{e};/) {"#{s}"} }
24
+ text
25
+ end
26
+
27
+ #Entry point for finding the elements specified by examples
28
+ def self.traverse_for_match(node, regexp)
29
+ results = []
30
+ traverse_for_match_inner = lambda { |node, regexp|
31
+ ft = prepare_text_for_comparison(node.inner_html.gsub(/<.*?>/, ''))
32
+ if ft =~ regexp
33
+ node.instance_eval do
34
+ @match_data = $~
35
+ def match_data
36
+ @match_data
37
+ end
38
+ end
39
+ results << node
40
+ results.delete node.parent if node.is_a? Hpricot::Elem
41
+ end
42
+ node.children.each { |child| traverse_for_match_inner.call(child, regexp) if (child.is_a? Hpricot::Elem) }
43
+ }
44
+ traverse_for_match_inner.call(node,regexp)
45
+ results
46
+ end
47
+
48
+ def self.get_backtrace
49
+ begin
50
+ raise
51
+ rescue Exception => ex
52
+ backtrace = ex.backtrace
53
+ end
54
+ backtrace.slice!(0)
55
+ backtrace
56
+ end
57
+ end #end of class SharedUtils
58
+ end #end of module Scrubyt
@@ -0,0 +1,40 @@
1
+ module Scrubyt
2
+ #=<tt>Lookup of simple examples</tt>
3
+ #There are two types of string examples in scRUBYt! right now:
4
+ #the simple example and the compound example.
5
+ #
6
+ #This class is responsible for finding elements matched by simple examples.
7
+ #In the futre probably more sophisticated matching algorithms will be added
8
+ #(e.g. match the n-th which matches the text, or element that matches the
9
+ #text but also contains a specific attribute etc.)
10
+ class SimpleExampleLookup
11
+ #From the example text defined by the user, find the lowest possible node which contains the text 'text'.
12
+ #The text can be also a mixed content text, e.g.
13
+ #
14
+ # <a>Bon <b>nuit</b>, monsieur!</a>
15
+ #
16
+ #In this case, <a>'s text is considered to be "Bon nuit, monsieur"
17
+ def self.find_node_from_text(doc, text, next_link=false, index = 0)
18
+ text.gsub!('»', '&#187;')
19
+ #Process immediate attribute extraction (like "go to google.com/@href")
20
+ if text =~ /.+\/@.+$/
21
+ text = text.scan(/^(.+?)\/@.+$/)[0][0]
22
+ elsif text =~ /.+\[\d+\]$/
23
+ res = text.scan(/(.+)\[(\d+)\]$/)
24
+ text = res[0][0]
25
+ index = res[0][1].to_i
26
+ elsif text =~ /.+\[.+\]$/
27
+ final_element_name = text.scan(/^(.+?)\[/)[0][0]
28
+ text = text.scan(/\[(.+?)\]/)[0][0]
29
+ end
30
+ if final_element_name
31
+ text = Regexp.escape(text) if text.is_a? String
32
+ result = SharedUtils.traverse_for_match(doc,/#{text}/)[index]
33
+ result = XPathUtils.traverse_up_until_name(result,final_element_name)
34
+ else
35
+ text = Regexp.escape(text) if text.is_a? String
36
+ result = SharedUtils.traverse_for_match(doc,/^#{text}$/)[index]
37
+ end
38
+ end
39
+ end #End of class SimpleExampleLookup
40
+ end #End of module Scrubyt
@@ -0,0 +1,202 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+
4
+ module Scrubyt
5
+ ##
6
+ #=<tt>Various XPath utility functions</tt>
7
+ class XPathUtils
8
+
9
+ #Find the LCA (Lowest Common Ancestor) of two nodes
10
+ def self.lowest_common_ancestor(node1, node2)
11
+ path1 = traverse_up(node1)
12
+ path2 = traverse_up(node2)
13
+ return node1.parent if path1 == path2
14
+
15
+ closure = nil
16
+ while (!path1.empty? && !path2.empty?)
17
+ closure = path1.pop
18
+ return closure.parent if (closure != path2.pop)
19
+ end
20
+ path1.size > path2.size ? path1.last.parent : path2.last.parent
21
+ end
22
+
23
+ ##
24
+ #Generate XPath for the given node
25
+ #
26
+ #*parameters*
27
+ #
28
+ #_node_ - The node we are looking up the XPath for
29
+ #
30
+ #_stopnode_ - The Xpath generation is stopped and the XPath that
31
+ #was generated so far is returned if this node is reached.
32
+ #
33
+ #_write_indices_ - whether the index inside the parent shuold be
34
+ #added, as in html[1]/body[1]/table[2]/tr[1]/td[8]
35
+ def self.generate_XPath(node, stopnode=nil, write_indices=false)
36
+ path = []
37
+ indices = []
38
+ found = false
39
+ while !node.nil? && node.class != Hpricot::Doc do
40
+ if node == stopnode
41
+ found = true
42
+ break
43
+ end
44
+ path.push node.name
45
+ indices.push find_index(node) if write_indices
46
+ node = node.parent
47
+ end
48
+ #This condition ensures that if there is a stopnode, and we did not found it along the way,
49
+ #we return nil (since the stopnode is not contained in the path at all)
50
+ return nil if stopnode != nil && !found
51
+ result = ""
52
+ if write_indices
53
+ path.reverse.zip(indices.reverse).each { |node,index| result += "#{node}[#{index}]/" }
54
+ else
55
+ path.reverse.each{ |node| result += "#{node}/" }
56
+ end
57
+ "/" + result.chop
58
+ end
59
+
60
+ #Generate an XPath of the node with indices, relatively to the given
61
+ #relative_root.
62
+ #
63
+ #For example if the elem's absolute XPath is /a/b/c,
64
+ #and the relative root's Xpath is a/b, the result of the function will
65
+ #be /c.
66
+ def self.generate_relative_XPath( elem,relative_root )
67
+ return nil if (elem == relative_root)
68
+ generate_XPath(elem, relative_root, true)
69
+ end
70
+
71
+ #Generate a generalized XPath (i.e. without indices) of the node,
72
+ #relatively to the given relative_root.
73
+ #
74
+ #For example if the elem's absolute XPath is /a[1]/b[3]/c[5],
75
+ #and the relative root's Xpath is a[1]/b[3], the result of the function will
76
+ #be /c.
77
+ def self.generate_generalized_relative_XPath( elem,relative_root )
78
+ return nil if (elem == relative_root)
79
+ generate_XPath(elem, relative_root, false)
80
+ end
81
+
82
+ #Find an image based on the src of the example
83
+ #
84
+ #*parameters*
85
+ #
86
+ #_doc_ - The containing document
87
+ #
88
+ #_example_ - The value of the src attribute of the img tag
89
+ #This is convenient, since if the users rigth-clicks an image and
90
+ #copies image location, this string will be copied to the clipboard
91
+ #and thus can be easily pasted as an examle
92
+ #
93
+ #_index_ - there might be more images with the same src on the page -
94
+ #most typically the user will need the 0th - but if this is not the
95
+ #case, there is the possibility to override this
96
+ def self.find_image(doc, example, index=0)
97
+ if example =~ /\.(jpg|png|gif|jpeg)(\[\d+\])$/
98
+ res = example.scan(/(.+)\[(\d+)\]$/)
99
+ example = res[0][0]
100
+ index = res[0][1].to_i
101
+ end
102
+ (doc/"//img[@src='#{example}']")[index]
103
+ end
104
+
105
+ ##
106
+ #Used to find the parent of a node with the given name - for example
107
+ #find the <form> node which is the parent of the <input> node
108
+ def self.traverse_up_until_name(node, name)
109
+ while node.class != Hpricot::Doc do
110
+ #raise "The element is nil! This probably means the widget with the specified name ('#{name}') does not exist" unless node
111
+ return nil unless node
112
+ break if node.name == name
113
+ node = node.parent
114
+ end
115
+ node
116
+ end
117
+
118
+ ##
119
+ #Used when automatically looking up href attributes (for detail or next links)
120
+ #If the detail pattern did not extract a link, we first look up it's
121
+ #children - and if we don't find a link, traverse up
122
+ def self.find_nearest_node_with_attribute(node, attribute)
123
+ @node = nil
124
+ return node if node.is_a? Hpricot::Elem and node[attribute]
125
+ first_child_node_with_attribute(node, attribute)
126
+ first_parent_node_with_attribute(node, attribute) if !@node
127
+ @node
128
+ end
129
+
130
+ ##
131
+ #Generalre relative XPath from two XPaths: a parent one, (which points higher in the tree),
132
+ #and a child one. The result of the method is the relative XPath of the node pointed to
133
+ #by the second XPath to the node pointed to by the firs XPath.
134
+ def self.generate_relative_XPath_from_XPaths(parent_xpath, child_xpath)
135
+ original_child_xpath_parts = child_xpath.split('/').reject{|s|s==""}
136
+ pairs = to_general_XPath(child_xpath).split('/').reject{|s|s==""}.zip to_general_XPath(parent_xpath).split('/').reject{|s|s==""}
137
+ i = 0
138
+ pairs.each_with_index do |pair,index|
139
+ i = index
140
+ break if pair[0] != pair[1]
141
+ end
142
+ "/" + original_child_xpath_parts[i..-1].join('/')
143
+ end
144
+
145
+ def self.to_full_XPath(doc, xpath, generalize)
146
+ elem = doc/xpath
147
+ elem = elem.map[0] if elem.is_a? Hpricot::Elements
148
+ XPathUtils.generate_XPath(elem, nil, generalize)
149
+ end
150
+
151
+ private
152
+ #Find the index of the child inside the parent
153
+ #For example:
154
+ #
155
+ # tr
156
+ # / | \
157
+ # td td td
158
+ # 0 1 2
159
+ #
160
+ #The last row contains the indices of the td's from the
161
+ #tow above.
162
+ #
163
+ #Note that in classic XPath, the indices start with 1 (rather
164
+ #than 0).
165
+ def self.find_index(node)
166
+ c = 0
167
+ node.parent.children.each do |child|
168
+ if child.class == Hpricot::Elem
169
+ c += 1 if (child.name == node.name)
170
+ break if (node == child)
171
+ end
172
+ end
173
+ c
174
+ end
175
+
176
+ def self.traverse_up(node, stopnode=nil)
177
+ path = []
178
+ while node.class != Hpricot::Doc do
179
+ break if node == stopnode
180
+ path.push node
181
+ node = node.parent
182
+ end
183
+ path
184
+ end
185
+
186
+ def self.first_child_node_with_attribute(node, attribute)
187
+ return if !node.instance_of? Hpricot::Elem || @node
188
+ @node = node if node.attributes[attribute]
189
+ node.children.each { |child| first_child_node_with_attribute(child, attribute) }
190
+ end
191
+
192
+ def self.first_parent_node_with_attribute(node, attribute)
193
+ return if !node.instance_of? Hpricot::Elem || @node
194
+ @node = node if node.attributes[attribute]
195
+ first_parent_node_with_attribute(node.parent, attribute)
196
+ end
197
+
198
+ def self.to_general_XPath(xpath)
199
+ xpath.gsub(/\[.+?\]/) {""}
200
+ end #End of method to_general_XPath
201
+ end #End of class XPathUtils
202
+ end #End of module Scrubyt
data/lib/scrubyt.rb ADDED
@@ -0,0 +1,43 @@
1
+ $KCODE = "u"
2
+ require "jcode"
3
+
4
+ #ruby core
5
+ require "open-uri"
6
+ require "erb"
7
+
8
+ #gems
9
+ require "rexml/text"
10
+ require "rubygems"
11
+ require "mechanize"
12
+ require "hpricot"
13
+
14
+ #scrubyt
15
+ require "#{File.dirname(__FILE__)}/scrubyt/logging"
16
+ require "#{File.dirname(__FILE__)}/scrubyt/utils/ruby_extensions.rb"
17
+ require "#{File.dirname(__FILE__)}/scrubyt/utils/xpathutils.rb"
18
+ require "#{File.dirname(__FILE__)}/scrubyt/utils/shared_utils.rb"
19
+ require "#{File.dirname(__FILE__)}/scrubyt/utils/simple_example_lookup.rb"
20
+ require "#{File.dirname(__FILE__)}/scrubyt/utils/compound_example_lookup.rb"
21
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/constraint_adder.rb"
22
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/constraint.rb"
23
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/result_indexer.rb"
24
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/pre_filter_document.rb"
25
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/compound_example.rb"
26
+ require "#{File.dirname(__FILE__)}/scrubyt/output/result_node.rb"
27
+ require "#{File.dirname(__FILE__)}/scrubyt/output/scrubyt_result.rb"
28
+ require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/mechanize.rb"
29
+ require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/firewatir.rb"
30
+ require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/navigation_actions.rb"
31
+ require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/fetch_action.rb"
32
+ require "#{File.dirname(__FILE__)}/scrubyt/core/shared/extractor.rb"
33
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/base_filter.rb"
34
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/attribute_filter.rb"
35
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/constant_filter.rb"
36
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/script_filter.rb"
37
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/text_filter.rb"
38
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/detail_page_filter.rb"
39
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/download_filter.rb"
40
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/html_subtree_filter.rb"
41
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/regexp_filter.rb"
42
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/filters/tree_filter.rb"
43
+ require "#{File.dirname(__FILE__)}/scrubyt/core/scraping/pattern.rb"
@@ -0,0 +1,60 @@
1
+ $lib_path = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $:.unshift $lib_path
3
+
4
+ require 'scrubyt'
5
+ require 'test/unit'
6
+
7
+ def perform_test(test_path, detailed = false)
8
+ out = $stdout
9
+ $stdout = StringIO.new unless detailed
10
+ cwd = Dir.getwd
11
+ Dir.chdir(File.dirname(test_path))
12
+
13
+ out.puts "Test: #{test_path}" if detailed
14
+ out.puts "========== Print Output ==========" if detailed
15
+
16
+ begin
17
+ expected_xml = File.read(File.basename(test_path)[0..-4] + ".expected.xml")
18
+
19
+ scrubyt_result_native = Scrubyt::Extractor.load(File.basename(test_path))
20
+
21
+ exported_code = scrubyt_result_native.export({:template => 'lambda'})
22
+ scrubyt_result_exported = Scrubyt::Extractor.define(&eval(exported_code))
23
+ ensure
24
+ if detailed
25
+ out.puts "========== Native Extractor =========="
26
+ out.puts IO.read(File.basename(test_path))
27
+ out.puts "========== Exported Extractor =========="
28
+ out.puts exported_code
29
+ out.puts "========== Expected =========="
30
+ out.puts expected_xml
31
+ out.puts "========== Result (native) =========="
32
+ out.puts scrubyt_result_native.to_xml
33
+ out.puts "========== Result (exported) =========="
34
+ out.puts scrubyt_result_exported.to_xml
35
+ end
36
+ end
37
+
38
+ assert_equal expected_xml, scrubyt_result_native.to_xml
39
+ assert_equal expected_xml, scrubyt_result_exported.to_xml
40
+ ensure
41
+ Dir.chdir(cwd)
42
+ $stdout = out
43
+ end
44
+
45
+ if $0 == __FILE__ && ARGV[0]
46
+ include Test::Unit::Assertions
47
+ perform_test(ARGV[0], true)
48
+ exit
49
+ end
50
+
51
+ class BlackboxTest < Test::Unit::TestCase
52
+ tests = Dir.glob(File.join(File.dirname(__FILE__), 'blackbox_tests', '**', '*.rb'))
53
+ tests = tests.sort
54
+
55
+ tests.each do |test_path|
56
+ define_method("test_#{test_path.gsub('/', '_')}") do
57
+ perform_test(test_path)
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,6 @@
1
+ lambda do
2
+ fetch(File.join(File.dirname(__FILE__), "three_divs.html"))
3
+
4
+ entry '1'
5
+ data '4'
6
+ end
@@ -0,0 +1,5 @@
1
+ lambda do
2
+ fetch(File.join(File.dirname(__FILE__), "three_divs.html"))
3
+
4
+ entry '1'
5
+ end
@@ -0,0 +1,9 @@
1
+ lambda do
2
+ fetch(File.join(File.dirname(__FILE__), "main_page_1.html"))
3
+
4
+ main 'Main 1' do
5
+ xyz_detail do
6
+ detail 'Detail 1'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ lambda do
2
+ fetch(File.join(File.dirname(__FILE__), "main_page_2.html"))
3
+
4
+ main 'Main 1' do
5
+ xyz_detail do
6
+ detail 'Detail 1'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,7 @@
1
+ lambda do
2
+ fetch(File.join(File.dirname(__FILE__), "page_1.html"))
3
+
4
+ entry '1'
5
+
6
+ next_page 'Next'
7
+ end
@@ -0,0 +1,7 @@
1
+ lambda do
2
+ fetch(File.join(File.dirname(__FILE__), "page_1.html"))
3
+
4
+ entry '1'
5
+
6
+ page_list 'Page 2'
7
+ end