scrubyt 0.1.0 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +34 -0
- data/COPYING +340 -0
- data/README +34 -5
- data/Rakefile +6 -5
- data/lib/scrubyt.rb +1 -0
- data/lib/scrubyt/constraint.rb +12 -24
- data/lib/scrubyt/constraint_adder.rb +3 -17
- data/lib/scrubyt/export.rb +33 -17
- data/lib/scrubyt/extractor.rb +74 -23
- data/lib/scrubyt/filter.rb +52 -37
- data/lib/scrubyt/pattern.rb +74 -30
- data/lib/scrubyt/post_processor.rb +58 -0
- data/lib/scrubyt/result.rb +2 -2
- data/lib/scrubyt/result_dumper.rb +6 -0
- data/lib/scrubyt/xpathutils.rb +52 -15
- data/test/unittests/constraint_test.rb +0 -3
- data/test/unittests/extractor_test.rb +11 -13
- data/test/unittests/xpathutils_test.rb +31 -31
- metadata +8 -5
data/lib/scrubyt.rb
CHANGED
data/lib/scrubyt/constraint.rb
CHANGED
@@ -36,12 +36,11 @@ module Scrubyt
|
|
36
36
|
#2b) Do it on the XML level - most probably this solution will be implemented
|
37
37
|
|
38
38
|
# Different constraint types
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_NODE = 5
|
39
|
+
CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN = 0
|
40
|
+
CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_ATTRIBUTE = 1
|
41
|
+
CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ATTRIBUTE = 2
|
42
|
+
CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_ANCESTOR_NODE = 3
|
43
|
+
CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_NODE = 4
|
45
44
|
|
46
45
|
|
47
46
|
attr_reader :type, :target, :parent_filter
|
@@ -52,22 +51,12 @@ module Scrubyt
|
|
52
51
|
#(child pattern, or child pattern of a child pattern, etc.) denoted by "ancestor"
|
53
52
|
#'Has an ancestor pattern' means that the ancestor pattern actually extracts something
|
54
53
|
#(just by looking at the wrapper model, the ancestor pattern is always present)
|
55
|
-
#
|
56
|
-
|
57
|
-
|
54
|
+
#Note that from this type of constraint there is no 'ensure_absence' version, since
|
55
|
+
#I could not think about an use case for that
|
56
|
+
def self.add_ensure_presence_of_pattern(parent_filter, ancestor)
|
57
|
+
Constraint.new(parent_filter, ancestor, CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN)
|
58
58
|
end
|
59
59
|
|
60
|
-
#Add 'ensure presence of ancestor pattern' constraint
|
61
|
-
|
62
|
-
#If this type of constraint is added to a pattern, it must NOT have an ancestor pattern
|
63
|
-
#(child pattern, or child pattern of a child pattern, etc.) denoted by "ancestor"
|
64
|
-
#'Has an ancestor pattern' means that the ancestor pattern actually extracts something
|
65
|
-
#(just by looking at the wrapper model, the ancestor pattern is always present)
|
66
|
-
#ON result level!!!
|
67
|
-
def self.add_ensure_absence_of_ancestor_pattern(parent_filter, ancestor)
|
68
|
-
Constraint.new(parent_filter, ancestor, CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_PATTERN)
|
69
|
-
end
|
70
|
-
|
71
60
|
#Add 'ensure absence of attribute' constraint
|
72
61
|
|
73
62
|
#If this type of constraint is added to a pattern, the HTML node it targets
|
@@ -127,10 +116,9 @@ module Scrubyt
|
|
127
116
|
#content of the pattern
|
128
117
|
def check(result)
|
129
118
|
case @type
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
puts "CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_PATTERN"
|
119
|
+
#checked after evaluation, so here always return true
|
120
|
+
when CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN
|
121
|
+
return true
|
134
122
|
when CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_ATTRIBUTE
|
135
123
|
attribute_present(result)
|
136
124
|
when CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ATTRIBUTE
|
@@ -11,15 +11,8 @@ module Scrubyt
|
|
11
11
|
#functions with their documentation in Scrubyt::Constraint.rb
|
12
12
|
class ConstraintAdder
|
13
13
|
|
14
|
-
def self.
|
15
|
-
|
16
|
-
pattern.filters[0].ensure_presence_of_ancestor_pattern(ancestor_node_name)
|
17
|
-
pattern #To make chaining possible
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.ensure_absence_of_ancestor_pattern(pattern, ancestor_node_name)
|
21
|
-
data = self.prepare_ensure_ancestor_pattern(pattern, sym_root, sym_ancestor)
|
22
|
-
pattern.filters[0].ensure_absence_of_ancestor_pattern(ancestor_node_name)
|
14
|
+
def self.ensure_presence_of_pattern(pattern, ancestor_node_name)
|
15
|
+
pattern.filters[0].ensure_presence_of_pattern(ancestor_node_name)
|
23
16
|
pattern #To make chaining possible
|
24
17
|
end
|
25
18
|
|
@@ -74,13 +67,6 @@ private
|
|
74
67
|
end
|
75
68
|
end
|
76
69
|
return attribute_pairs
|
77
|
-
end
|
78
|
-
|
79
|
-
def self.prepare_ensure_ancestor_pattern(pattern, root, ancestor)
|
80
|
-
context_pattern = find_by_name(pattern.root_pattern, root)
|
81
|
-
target_pattern = find_by_name(pattern.root_pattern, ancestor)
|
82
|
-
return [context_pattern, target_pattern]
|
83
|
-
end
|
84
|
-
|
70
|
+
end #end of method prepare_attributes
|
85
71
|
end #end of class ConstraintAddere
|
86
72
|
end #end of module Scrubyt
|
data/lib/scrubyt/export.rb
CHANGED
@@ -80,7 +80,7 @@ module Scrubyt
|
|
80
80
|
|
81
81
|
private
|
82
82
|
def self.export_header(output_file)
|
83
|
-
@result += "require '
|
83
|
+
@result += "require 'rubygems'\nrequire 'scrubyt'\n\n"
|
84
84
|
end
|
85
85
|
|
86
86
|
def self.cleanup_result
|
@@ -142,19 +142,21 @@ private
|
|
142
142
|
@name_to_xpath_map = {}
|
143
143
|
create_name_to_xpath_map(pattern)
|
144
144
|
#Replace the examples which are quoted with " and '
|
145
|
-
@name_to_xpath_map.each do |name,
|
146
|
-
replace_example_with_xpath(name,
|
147
|
-
replace_example_with_xpath(name,
|
145
|
+
@name_to_xpath_map.each do |name, xpaths|
|
146
|
+
replace_example_with_xpath(name, xpaths, %q{"})
|
147
|
+
replace_example_with_xpath(name, xpaths, %q{'})
|
148
148
|
end
|
149
149
|
#Finally, add XPaths to pattern which had no example at the beginning (the XPath was
|
150
150
|
#generated from the child patterns
|
151
|
-
@name_to_xpath_map.each do |name,
|
152
|
-
|
153
|
-
|
154
|
-
@full_definition.
|
155
|
-
|
156
|
-
|
157
|
-
|
151
|
+
@name_to_xpath_map.each do |name, xpaths|
|
152
|
+
xpaths.each do |xpath|
|
153
|
+
comma = @full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0].sub('do'){}.strip == '' ? '' : ','
|
154
|
+
if (@full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0]).include?('{')
|
155
|
+
@full_definition.sub!("P.#{name}") {"P.#{name}('#{xpath}')"}
|
156
|
+
else
|
157
|
+
@full_definition.sub!("P.#{name}") {"P.#{name} \"#{xpath}\"#{comma}"}
|
158
|
+
end
|
159
|
+
end
|
158
160
|
end
|
159
161
|
@result += @full_definition
|
160
162
|
end
|
@@ -169,18 +171,32 @@ private
|
|
169
171
|
|
170
172
|
|
171
173
|
def self.create_name_to_xpath_map(pattern)
|
172
|
-
@name_to_xpath_map[pattern.name] =
|
174
|
+
@name_to_xpath_map[pattern.name] = []
|
175
|
+
pattern.filters.each do |filter|
|
176
|
+
@name_to_xpath_map[pattern.name] << filter.xpath if pattern.filters[0].xpath != nil
|
177
|
+
end
|
173
178
|
pattern.children.each {|child| create_name_to_xpath_map child}
|
174
179
|
end
|
175
180
|
|
176
|
-
def self.replace_example_with_xpath(name,
|
177
|
-
|
178
|
-
|
179
|
-
|
181
|
+
def self.replace_example_with_xpath(name, xpaths, left_delimiter, right_delimiter=left_delimiter)
|
182
|
+
return if name=='root'
|
183
|
+
full_line = @full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0]
|
184
|
+
examples = full_line.split(",")
|
185
|
+
examples.reject! {|exa| exa.strip!; exa[0..0] != %q{"} && exa[0..0] != %q{'} }
|
186
|
+
all_xpaths = ""
|
187
|
+
examples.each do |e|
|
188
|
+
index = examples.index(e)
|
189
|
+
xpath = xpaths[index]
|
190
|
+
return if xpath == nil
|
191
|
+
all_xpaths += ", " if index > 0
|
192
|
+
all_xpaths += '"' + xpath + '"'
|
193
|
+
end
|
194
|
+
replacing_xpath = full_line.include?('{') ? "P.#{name}('#{all_xpaths}')" :
|
195
|
+
"P.#{name} #{all_xpaths}"
|
180
196
|
@full_definition.sub!(/P\.#{name}\s+#{left_delimiter}(.*)#{right_delimiter}/) do
|
181
197
|
@name_to_xpath_map.delete("#{name}")
|
182
198
|
replacing_xpath
|
183
|
-
end
|
199
|
+
end
|
184
200
|
end
|
185
201
|
|
186
202
|
end
|
data/lib/scrubyt/extractor.rb
CHANGED
@@ -4,6 +4,7 @@ require 'rubygems'
|
|
4
4
|
require 'mechanize'
|
5
5
|
require 'hpricot'
|
6
6
|
require 'pp'
|
7
|
+
require 'set'
|
7
8
|
|
8
9
|
module Scrubyt
|
9
10
|
##
|
@@ -43,6 +44,8 @@ module Scrubyt
|
|
43
44
|
else
|
44
45
|
evaluate_wrapper(root_pattern)
|
45
46
|
end
|
47
|
+
ensure_all_postconditions(root_pattern)
|
48
|
+
PostProcessor.remove_multiple_filter_duplicates(root_pattern)
|
46
49
|
#Return the root pattern
|
47
50
|
root_pattern
|
48
51
|
end
|
@@ -104,39 +107,35 @@ module Scrubyt
|
|
104
107
|
|
105
108
|
##
|
106
109
|
#Action to fetch a document (either a file or a http address)
|
107
|
-
#
|
110
|
+
#
|
108
111
|
#*parameters*
|
109
112
|
#
|
110
113
|
#_doc_url_ - the url or file name to fetch
|
111
114
|
def self.fetch(doc_url, mechanize_doc=nil)
|
112
|
-
puts "fetching: #{doc_url}"
|
113
115
|
if (mechanize_doc == nil)
|
114
116
|
@@current_doc_url = doc_url
|
115
117
|
@@current_doc_protocol = ((doc_url =~ /^http/ || doc_url =~ /^www/) ? :http : :file)
|
116
118
|
if @@base_dir == nil
|
117
119
|
@@base_dir = doc_url.scan(/.+\//)[0] if @@current_doc_protocol == :file
|
118
|
-
else
|
120
|
+
else
|
119
121
|
@@current_doc_url = ((@@base_dir + doc_url) if doc_url !~ /#{@@base_dir}/)
|
120
122
|
end
|
121
123
|
|
122
|
-
if @@host_name
|
123
|
-
if
|
124
|
-
@@
|
125
|
-
@@
|
126
|
-
end
|
127
|
-
else
|
128
|
-
@@current_doc_url = (@@host_name + doc_url) if doc_url !~ /#{@@host_name}/
|
124
|
+
if @@host_name != nil
|
125
|
+
if doc_url !~ /#{@@host_name}/
|
126
|
+
@@current_doc_url = (@@host_name + doc_url)
|
127
|
+
@@current_doc_url.gsub!(/([^:])\/\//) {"#{$1}/"}
|
128
|
+
end
|
129
129
|
end
|
130
|
-
|
130
|
+
puts "[ACTION] fetching document: #{@@current_doc_url}"
|
131
131
|
@@mechanize_doc = @@agent.get(@@current_doc_url) if @@current_doc_protocol == :http
|
132
132
|
else
|
133
133
|
@@current_doc_url = doc_url
|
134
134
|
@@mechanize_doc = mechanize_doc
|
135
|
+
@@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0]
|
136
|
+
@@host_name = doc_url if @@host_name == nil
|
135
137
|
end
|
136
|
-
@@hpricot_doc =
|
137
|
-
out = open('kamaty.html', 'w')
|
138
|
-
out.write @@hpricot_doc.to_s
|
139
|
-
out.close
|
138
|
+
@@hpricot_doc = Hpricot(open(@@current_doc_url))#.to_original_html
|
140
139
|
end
|
141
140
|
|
142
141
|
##
|
@@ -149,7 +148,7 @@ module Scrubyt
|
|
149
148
|
#
|
150
149
|
#_query_string_ - the string that should be entered into the textfield
|
151
150
|
def self.fill_textfield(textfield_name, query_string)
|
152
|
-
puts
|
151
|
+
puts "[ACTION] typing #{query_string} into the textfield named '#{textfield_name}'"
|
153
152
|
textfield = (@@hpricot_doc/"input[@name=#{textfield_name}]").map()[0]
|
154
153
|
formname = Scrubyt::XPathUtils.traverse_up_until_name(textfield, 'form').attributes['name']
|
155
154
|
@@current_form = @@mechanize_doc.forms.with.name(formname).first
|
@@ -158,16 +157,16 @@ module Scrubyt
|
|
158
157
|
|
159
158
|
#Submit the last form;
|
160
159
|
def self.submit
|
161
|
-
puts '
|
160
|
+
puts '[ACTION] submitting form...'
|
162
161
|
result_page = @@agent.submit(@@current_form)#, @@current_form.buttons.first)
|
163
162
|
@@current_doc_url = result_page.uri.to_s
|
164
163
|
fetch(@@current_doc_url, result_page)
|
165
164
|
end
|
166
165
|
|
167
|
-
def self.click_link(link_text)
|
168
|
-
puts
|
169
|
-
puts /^#{Regexp.escape(link_text)}$/
|
170
|
-
p /^#{Regexp.escape(link_text)}$/
|
166
|
+
def self.click_link(link_text)
|
167
|
+
puts "[ACTION] clicking link: #{link_text}"
|
168
|
+
#puts /^#{Regexp.escape(link_text)}$/
|
169
|
+
#p /^#{Regexp.escape(link_text)}$/
|
171
170
|
link = @@mechanize_doc.links.text(/^#{Regexp.escape(link_text)}$/)
|
172
171
|
result_page = @@agent.click(link)
|
173
172
|
@@current_doc_url = result_page.uri.to_s
|
@@ -178,10 +177,62 @@ module Scrubyt
|
|
178
177
|
#############
|
179
178
|
|
180
179
|
private
|
180
|
+
def self.ensure_all_postconditions(pattern)
|
181
|
+
ensure_postconditions(pattern)
|
182
|
+
pattern.children.each {|child| ensure_all_postconditions(child)}
|
183
|
+
end
|
184
|
+
|
185
|
+
def self.ensure_postconditions(pattern)
|
186
|
+
#holds the name of those child patterns which have to be present as children of the input parameter
|
187
|
+
epop_names = pattern.get_constraints.select {|c| c.type == Scrubyt::Constraint::CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN}.map {|c| c.target}
|
188
|
+
return if epop_names.empty?
|
189
|
+
#all_parent_values holds instances extracted by pattern
|
190
|
+
all_parent_values = []
|
191
|
+
pattern.result.childmap.each { |h| all_parent_values << h.values }
|
192
|
+
all_parent_values.flatten!
|
193
|
+
#indices of result instances (of pattern) we are going to remove
|
194
|
+
results_to_remove = Set.new
|
195
|
+
pattern.children.each do |child_pattern|
|
196
|
+
#all_child_values holds instances extracted by child_pattern
|
197
|
+
all_child_values = []
|
198
|
+
child_pattern.result.childmap.each { |h| all_child_values << h.values }
|
199
|
+
all_child_values.flatten!
|
200
|
+
|
201
|
+
#populate results_to_remove
|
202
|
+
i = 0
|
203
|
+
all_parent_values.each do |parent_value|
|
204
|
+
#Hey! Not just the direct children but all the ancestors
|
205
|
+
@found_ancestor = false
|
206
|
+
check_ancestors(parent_value, all_child_values)
|
207
|
+
|
208
|
+
results_to_remove << i if (!@found_ancestor && (epop_names.include? child_pattern.name))
|
209
|
+
i += 1
|
210
|
+
end
|
211
|
+
end
|
212
|
+
#based on results_to_remove, populate the array 'rejected' which holds the actual instances
|
213
|
+
#(and not indices, as in the case of results_to_remove!). In other words, we are mapping
|
214
|
+
#results_to_remove indices to their actual instances
|
215
|
+
rejected = []
|
216
|
+
i = -1
|
217
|
+
pattern.result.childmap.each do |h|
|
218
|
+
h.each { |k,v| rejected = v.reject {|e| i += 1; !results_to_remove.include? i } }
|
219
|
+
end
|
220
|
+
|
221
|
+
#Correct the statistics
|
222
|
+
pattern.get_instance_count[pattern.name] -= rejected.size
|
223
|
+
|
224
|
+
#Finally, do the actual delete!
|
225
|
+
pattern.result.childmap.each { |h| h.each { |k,v| rejected.each { |r| v.delete(r)} } }
|
226
|
+
end
|
227
|
+
|
228
|
+
def self.check_ancestors(parent_value, all_child_values)
|
229
|
+
parent_value.children.each { |child| @found_ancestor = true if all_child_values.include? child }
|
230
|
+
parent_value.children.each { |child| check_ancestors(child, all_child_values) if child.is_a? Hpricot::Elem }
|
231
|
+
end
|
232
|
+
|
181
233
|
def self.evaluate_wrapper(pattern)
|
182
234
|
pattern.evaluate
|
183
235
|
pattern.children.each { |child| evaluate_wrapper child }
|
184
|
-
end
|
185
|
-
|
236
|
+
end #end of method evaluate_wrapper
|
186
237
|
end #end of class Extractor
|
187
238
|
end #end of module Scrubyt
|
data/lib/scrubyt/filter.rb
CHANGED
@@ -2,7 +2,7 @@ module Scrubyt
|
|
2
2
|
##
|
3
3
|
#=<tt>Filter out relevant pieces from the parent pattern</tt>
|
4
4
|
#
|
5
|
-
#A Scrubyt
|
5
|
+
#A Scrubyt extractor is almost like a waterfall: water is pouring from the top until
|
6
6
|
#it reaches the bottom. The biggest difference is that instead of water, a HTML
|
7
7
|
#document travels through the space.
|
8
8
|
#
|
@@ -15,12 +15,12 @@ module Scrubyt
|
|
15
15
|
#The working of a filter will be explained most easily by the help of an example.
|
16
16
|
#Let's consider that we would like to extract information from a webshop; Concretely
|
17
17
|
#we are interested in the name of the items and the URL pointing to the image of the
|
18
|
-
#item
|
18
|
+
#item.
|
19
19
|
#
|
20
|
-
#To accomplish this
|
20
|
+
#To accomplish this, first we select the items with the pattern item (a pattern is
|
21
21
|
#a logical grouping of fillters; see Pattern documentation) Then our new
|
22
|
-
#context is the result extracted by the item pattern; For every pattern, further
|
23
|
-
#extract the name and the image of the item; and finally,
|
22
|
+
#context is the result extracted by the 'item' pattern; For every 'item' pattern, further
|
23
|
+
#extract the name and the image of the item; and finally, extract the href attribute
|
24
24
|
#of the image. Let's see an illustration:
|
25
25
|
#
|
26
26
|
# root --> This pattern is called a 'root pattern', It is invisible to you
|
@@ -46,15 +46,18 @@ module Scrubyt
|
|
46
46
|
#Regexp example, like /\d+@*\d+[a-z]/
|
47
47
|
EXAMPLE_TYPE_REGEXP = 4
|
48
48
|
|
49
|
-
attr_accessor :example_type, :parent_pattern, :temp_sink,
|
49
|
+
attr_accessor :example_type, :parent_pattern, :temp_sink,
|
50
|
+
:constraints, :xpath, :regexp, :example, :source, :sink
|
50
51
|
|
51
|
-
def initialize(parent_pattern, *args)
|
52
|
+
def initialize(parent_pattern, example=nil, *args)
|
52
53
|
@parent_pattern = parent_pattern
|
53
54
|
#If the example type is not explicitly defined in the pattern definition,
|
54
55
|
#try to determine it automatically from the example
|
55
|
-
@example_type = (args[0] == nil ? Filter.determine_example_type(
|
56
|
+
@example_type = (args[0] == nil ? Filter.determine_example_type(example) :
|
56
57
|
args[0][:example_type])
|
57
|
-
@
|
58
|
+
@sink = [] #output of a filter
|
59
|
+
@source = [] #input of a filter
|
60
|
+
@example = example
|
58
61
|
@xpath = nil #The xpath to evaluate this filter
|
59
62
|
#temp sinks are used for the initial run when determining the XPaths for examples;
|
60
63
|
@temp_sink = nil
|
@@ -64,14 +67,15 @@ module Scrubyt
|
|
64
67
|
#Evaluate this filter. This method shoulf not be called directly - as the pattern hierarchy
|
65
68
|
#is evaluated, every pattern evaluates its filters and then they are calling this method
|
66
69
|
def evaluate(source)
|
70
|
+
@parent_pattern.root_pattern.already_evaluated_sources ||= {}
|
67
71
|
case @parent_pattern.type
|
68
|
-
when Scrubyt::Pattern::PATTERN_TYPE_TREE
|
72
|
+
when Scrubyt::Pattern::PATTERN_TYPE_TREE
|
69
73
|
result = source/@xpath
|
70
74
|
result.class == Hpricot::Elements ? result.map : [result]
|
71
75
|
when Scrubyt::Pattern::PATTERN_TYPE_ATTRIBUTE
|
72
|
-
[source.attributes[@
|
76
|
+
[source.attributes[@example]]
|
73
77
|
when Scrubyt::Pattern::PATTERN_TYPE_REGEXP
|
74
|
-
source.inner_text.scan(@
|
78
|
+
source.inner_text.scan(@example).flatten
|
75
79
|
end
|
76
80
|
end
|
77
81
|
|
@@ -81,38 +85,49 @@ module Scrubyt
|
|
81
85
|
def generate_XPath_for_example
|
82
86
|
case @example_type
|
83
87
|
when EXAMPLE_TYPE_XPATH
|
84
|
-
@xpath = @
|
88
|
+
@xpath = @example
|
85
89
|
when EXAMPLE_TYPE_STRING
|
86
|
-
@temp_sink = XPathUtils.find_node_from_text( @parent_pattern.root_pattern.source[0], @
|
90
|
+
@temp_sink = XPathUtils.find_node_from_text( @parent_pattern.root_pattern.filters[0].source[0], @example )
|
87
91
|
@xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(@temp_sink, nil, false) :
|
88
92
|
XPathUtils.generate_XPath(@temp_sink, nil, true)
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
93
|
+
puts @xpath
|
94
|
+
when EXAMPLE_TYPE_CHILDREN
|
95
|
+
current_example_index = 0
|
96
|
+
loop do
|
97
|
+
all_child_temp_sinks = []
|
98
|
+
@parent_pattern.children.each do |child_pattern|
|
99
|
+
all_child_temp_sinks << child_pattern.filters[current_example_index].temp_sink
|
94
100
|
end
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
101
|
+
|
102
|
+
result = all_child_temp_sinks.pop
|
103
|
+
if all_child_temp_sinks.empty?
|
104
|
+
result = result.parent
|
105
|
+
else
|
106
|
+
all_child_temp_sinks.each do |child_sink|
|
107
|
+
result = XPathUtils.lowest_common_ancestor(result, child_sink)
|
108
|
+
end
|
102
109
|
end
|
110
|
+
xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(result, nil, false) :
|
111
|
+
XPathUtils.generate_XPath(result, nil, true)
|
112
|
+
if @parent_pattern.filters.size < current_example_index + 1
|
113
|
+
@parent_pattern.filters << Scrubyt::Filter.new(@parent_pattern)
|
114
|
+
end
|
115
|
+
@parent_pattern.filters[current_example_index].xpath = xpath
|
116
|
+
@parent_pattern.filters[current_example_index].temp_sink = result
|
117
|
+
@parent_pattern.children.each do |child_pattern|
|
118
|
+
child_pattern.filters[current_example_index].xpath =
|
119
|
+
child_pattern.generalize ? XPathUtils.generate_generalized_relative_XPath(child_pattern.filters[current_example_index].temp_sink, result) :
|
120
|
+
XPathUtils.generate_relative_XPath(child_pattern.filters[current_example_index].temp_sink, result)
|
121
|
+
end
|
122
|
+
if @parent_pattern.children[0].examples == nil
|
123
|
+
break if @parent_pattern.children[0].filters.size == current_example_index+1
|
124
|
+
else
|
125
|
+
break if @parent_pattern.children[0].examples.size == current_example_index+1
|
126
|
+
end
|
127
|
+
current_example_index += 1
|
103
128
|
end
|
104
|
-
@temp_sink = result
|
105
|
-
@xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(@temp_sink, nil, false) :
|
106
|
-
XPathUtils.generate_XPath(@temp_sink, nil, true)
|
107
|
-
@parent_pattern.children.each do |child_pattern|
|
108
|
-
child_pattern.filters.each do |filter|
|
109
|
-
filter.xpath =
|
110
|
-
child_pattern.generalize ? XPathUtils.generate_generalized_relative_XPath(filter.temp_sink, result) :
|
111
|
-
XPathUtils.generate_relative_XPath(filter.temp_sink, result)
|
112
|
-
end
|
113
|
-
end
|
114
129
|
when EXAMPLE_TYPE_IMAGE
|
115
|
-
@temp_sink = XPathUtils.find_image(@parent_pattern.root_pattern.source[0], @
|
130
|
+
@temp_sink = XPathUtils.find_image(@parent_pattern.root_pattern.filters[0].source[0], @example)
|
116
131
|
@xpath = XPathUtils.generate_XPath(@temp_sink, nil, false)
|
117
132
|
end
|
118
133
|
end
|