scrubyt 0.1.0 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +34 -0
- data/COPYING +340 -0
- data/README +34 -5
- data/Rakefile +6 -5
- data/lib/scrubyt.rb +1 -0
- data/lib/scrubyt/constraint.rb +12 -24
- data/lib/scrubyt/constraint_adder.rb +3 -17
- data/lib/scrubyt/export.rb +33 -17
- data/lib/scrubyt/extractor.rb +74 -23
- data/lib/scrubyt/filter.rb +52 -37
- data/lib/scrubyt/pattern.rb +74 -30
- data/lib/scrubyt/post_processor.rb +58 -0
- data/lib/scrubyt/result.rb +2 -2
- data/lib/scrubyt/result_dumper.rb +6 -0
- data/lib/scrubyt/xpathutils.rb +52 -15
- data/test/unittests/constraint_test.rb +0 -3
- data/test/unittests/extractor_test.rb +11 -13
- data/test/unittests/xpathutils_test.rb +31 -31
- metadata +8 -5
data/lib/scrubyt.rb
CHANGED
data/lib/scrubyt/constraint.rb
CHANGED
@@ -36,12 +36,11 @@ module Scrubyt
|
|
36
36
|
#2b) Do it on the XML level - most probably this solution will be implemented
|
37
37
|
|
38
38
|
# Different constraint types
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_NODE = 5
|
39
|
+
CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN = 0
|
40
|
+
CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_ATTRIBUTE = 1
|
41
|
+
CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ATTRIBUTE = 2
|
42
|
+
CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_ANCESTOR_NODE = 3
|
43
|
+
CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_NODE = 4
|
45
44
|
|
46
45
|
|
47
46
|
attr_reader :type, :target, :parent_filter
|
@@ -52,22 +51,12 @@ module Scrubyt
|
|
52
51
|
#(child pattern, or child pattern of a child pattern, etc.) denoted by "ancestor"
|
53
52
|
#'Has an ancestor pattern' means that the ancestor pattern actually extracts something
|
54
53
|
#(just by looking at the wrapper model, the ancestor pattern is always present)
|
55
|
-
#
|
56
|
-
|
57
|
-
|
54
|
+
#Note that from this type of constraint there is no 'ensure_absence' version, since
|
55
|
+
#I could not think about an use case for that
|
56
|
+
def self.add_ensure_presence_of_pattern(parent_filter, ancestor)
|
57
|
+
Constraint.new(parent_filter, ancestor, CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN)
|
58
58
|
end
|
59
59
|
|
60
|
-
#Add 'ensure presence of ancestor pattern' constraint
|
61
|
-
|
62
|
-
#If this type of constraint is added to a pattern, it must NOT have an ancestor pattern
|
63
|
-
#(child pattern, or child pattern of a child pattern, etc.) denoted by "ancestor"
|
64
|
-
#'Has an ancestor pattern' means that the ancestor pattern actually extracts something
|
65
|
-
#(just by looking at the wrapper model, the ancestor pattern is always present)
|
66
|
-
#ON result level!!!
|
67
|
-
def self.add_ensure_absence_of_ancestor_pattern(parent_filter, ancestor)
|
68
|
-
Constraint.new(parent_filter, ancestor, CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_PATTERN)
|
69
|
-
end
|
70
|
-
|
71
60
|
#Add 'ensure absence of attribute' constraint
|
72
61
|
|
73
62
|
#If this type of constraint is added to a pattern, the HTML node it targets
|
@@ -127,10 +116,9 @@ module Scrubyt
|
|
127
116
|
#content of the pattern
|
128
117
|
def check(result)
|
129
118
|
case @type
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
puts "CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ANCESTOR_PATTERN"
|
119
|
+
#checked after evaluation, so here always return true
|
120
|
+
when CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN
|
121
|
+
return true
|
134
122
|
when CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_ATTRIBUTE
|
135
123
|
attribute_present(result)
|
136
124
|
when CONSTRAINT_TYPE_ENSURE_ABSENCE_OF_ATTRIBUTE
|
@@ -11,15 +11,8 @@ module Scrubyt
|
|
11
11
|
#functions with their documentation in Scrubyt::Constraint.rb
|
12
12
|
class ConstraintAdder
|
13
13
|
|
14
|
-
def self.
|
15
|
-
|
16
|
-
pattern.filters[0].ensure_presence_of_ancestor_pattern(ancestor_node_name)
|
17
|
-
pattern #To make chaining possible
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.ensure_absence_of_ancestor_pattern(pattern, ancestor_node_name)
|
21
|
-
data = self.prepare_ensure_ancestor_pattern(pattern, sym_root, sym_ancestor)
|
22
|
-
pattern.filters[0].ensure_absence_of_ancestor_pattern(ancestor_node_name)
|
14
|
+
def self.ensure_presence_of_pattern(pattern, ancestor_node_name)
|
15
|
+
pattern.filters[0].ensure_presence_of_pattern(ancestor_node_name)
|
23
16
|
pattern #To make chaining possible
|
24
17
|
end
|
25
18
|
|
@@ -74,13 +67,6 @@ private
|
|
74
67
|
end
|
75
68
|
end
|
76
69
|
return attribute_pairs
|
77
|
-
end
|
78
|
-
|
79
|
-
def self.prepare_ensure_ancestor_pattern(pattern, root, ancestor)
|
80
|
-
context_pattern = find_by_name(pattern.root_pattern, root)
|
81
|
-
target_pattern = find_by_name(pattern.root_pattern, ancestor)
|
82
|
-
return [context_pattern, target_pattern]
|
83
|
-
end
|
84
|
-
|
70
|
+
end #end of method prepare_attributes
|
85
71
|
end #end of class ConstraintAddere
|
86
72
|
end #end of module Scrubyt
|
data/lib/scrubyt/export.rb
CHANGED
@@ -80,7 +80,7 @@ module Scrubyt
|
|
80
80
|
|
81
81
|
private
|
82
82
|
def self.export_header(output_file)
|
83
|
-
@result += "require '
|
83
|
+
@result += "require 'rubygems'\nrequire 'scrubyt'\n\n"
|
84
84
|
end
|
85
85
|
|
86
86
|
def self.cleanup_result
|
@@ -142,19 +142,21 @@ private
|
|
142
142
|
@name_to_xpath_map = {}
|
143
143
|
create_name_to_xpath_map(pattern)
|
144
144
|
#Replace the examples which are quoted with " and '
|
145
|
-
@name_to_xpath_map.each do |name,
|
146
|
-
replace_example_with_xpath(name,
|
147
|
-
replace_example_with_xpath(name,
|
145
|
+
@name_to_xpath_map.each do |name, xpaths|
|
146
|
+
replace_example_with_xpath(name, xpaths, %q{"})
|
147
|
+
replace_example_with_xpath(name, xpaths, %q{'})
|
148
148
|
end
|
149
149
|
#Finally, add XPaths to pattern which had no example at the beginning (the XPath was
|
150
150
|
#generated from the child patterns
|
151
|
-
@name_to_xpath_map.each do |name,
|
152
|
-
|
153
|
-
|
154
|
-
@full_definition.
|
155
|
-
|
156
|
-
|
157
|
-
|
151
|
+
@name_to_xpath_map.each do |name, xpaths|
|
152
|
+
xpaths.each do |xpath|
|
153
|
+
comma = @full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0].sub('do'){}.strip == '' ? '' : ','
|
154
|
+
if (@full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0]).include?('{')
|
155
|
+
@full_definition.sub!("P.#{name}") {"P.#{name}('#{xpath}')"}
|
156
|
+
else
|
157
|
+
@full_definition.sub!("P.#{name}") {"P.#{name} \"#{xpath}\"#{comma}"}
|
158
|
+
end
|
159
|
+
end
|
158
160
|
end
|
159
161
|
@result += @full_definition
|
160
162
|
end
|
@@ -169,18 +171,32 @@ private
|
|
169
171
|
|
170
172
|
|
171
173
|
def self.create_name_to_xpath_map(pattern)
|
172
|
-
@name_to_xpath_map[pattern.name] =
|
174
|
+
@name_to_xpath_map[pattern.name] = []
|
175
|
+
pattern.filters.each do |filter|
|
176
|
+
@name_to_xpath_map[pattern.name] << filter.xpath if pattern.filters[0].xpath != nil
|
177
|
+
end
|
173
178
|
pattern.children.each {|child| create_name_to_xpath_map child}
|
174
179
|
end
|
175
180
|
|
176
|
-
def self.replace_example_with_xpath(name,
|
177
|
-
|
178
|
-
|
179
|
-
|
181
|
+
def self.replace_example_with_xpath(name, xpaths, left_delimiter, right_delimiter=left_delimiter)
|
182
|
+
return if name=='root'
|
183
|
+
full_line = @full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0]
|
184
|
+
examples = full_line.split(",")
|
185
|
+
examples.reject! {|exa| exa.strip!; exa[0..0] != %q{"} && exa[0..0] != %q{'} }
|
186
|
+
all_xpaths = ""
|
187
|
+
examples.each do |e|
|
188
|
+
index = examples.index(e)
|
189
|
+
xpath = xpaths[index]
|
190
|
+
return if xpath == nil
|
191
|
+
all_xpaths += ", " if index > 0
|
192
|
+
all_xpaths += '"' + xpath + '"'
|
193
|
+
end
|
194
|
+
replacing_xpath = full_line.include?('{') ? "P.#{name}('#{all_xpaths}')" :
|
195
|
+
"P.#{name} #{all_xpaths}"
|
180
196
|
@full_definition.sub!(/P\.#{name}\s+#{left_delimiter}(.*)#{right_delimiter}/) do
|
181
197
|
@name_to_xpath_map.delete("#{name}")
|
182
198
|
replacing_xpath
|
183
|
-
end
|
199
|
+
end
|
184
200
|
end
|
185
201
|
|
186
202
|
end
|
data/lib/scrubyt/extractor.rb
CHANGED
@@ -4,6 +4,7 @@ require 'rubygems'
|
|
4
4
|
require 'mechanize'
|
5
5
|
require 'hpricot'
|
6
6
|
require 'pp'
|
7
|
+
require 'set'
|
7
8
|
|
8
9
|
module Scrubyt
|
9
10
|
##
|
@@ -43,6 +44,8 @@ module Scrubyt
|
|
43
44
|
else
|
44
45
|
evaluate_wrapper(root_pattern)
|
45
46
|
end
|
47
|
+
ensure_all_postconditions(root_pattern)
|
48
|
+
PostProcessor.remove_multiple_filter_duplicates(root_pattern)
|
46
49
|
#Return the root pattern
|
47
50
|
root_pattern
|
48
51
|
end
|
@@ -104,39 +107,35 @@ module Scrubyt
|
|
104
107
|
|
105
108
|
##
|
106
109
|
#Action to fetch a document (either a file or a http address)
|
107
|
-
#
|
110
|
+
#
|
108
111
|
#*parameters*
|
109
112
|
#
|
110
113
|
#_doc_url_ - the url or file name to fetch
|
111
114
|
def self.fetch(doc_url, mechanize_doc=nil)
|
112
|
-
puts "fetching: #{doc_url}"
|
113
115
|
if (mechanize_doc == nil)
|
114
116
|
@@current_doc_url = doc_url
|
115
117
|
@@current_doc_protocol = ((doc_url =~ /^http/ || doc_url =~ /^www/) ? :http : :file)
|
116
118
|
if @@base_dir == nil
|
117
119
|
@@base_dir = doc_url.scan(/.+\//)[0] if @@current_doc_protocol == :file
|
118
|
-
else
|
120
|
+
else
|
119
121
|
@@current_doc_url = ((@@base_dir + doc_url) if doc_url !~ /#{@@base_dir}/)
|
120
122
|
end
|
121
123
|
|
122
|
-
if @@host_name
|
123
|
-
if
|
124
|
-
@@
|
125
|
-
@@
|
126
|
-
end
|
127
|
-
else
|
128
|
-
@@current_doc_url = (@@host_name + doc_url) if doc_url !~ /#{@@host_name}/
|
124
|
+
if @@host_name != nil
|
125
|
+
if doc_url !~ /#{@@host_name}/
|
126
|
+
@@current_doc_url = (@@host_name + doc_url)
|
127
|
+
@@current_doc_url.gsub!(/([^:])\/\//) {"#{$1}/"}
|
128
|
+
end
|
129
129
|
end
|
130
|
-
|
130
|
+
puts "[ACTION] fetching document: #{@@current_doc_url}"
|
131
131
|
@@mechanize_doc = @@agent.get(@@current_doc_url) if @@current_doc_protocol == :http
|
132
132
|
else
|
133
133
|
@@current_doc_url = doc_url
|
134
134
|
@@mechanize_doc = mechanize_doc
|
135
|
+
@@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0]
|
136
|
+
@@host_name = doc_url if @@host_name == nil
|
135
137
|
end
|
136
|
-
@@hpricot_doc =
|
137
|
-
out = open('kamaty.html', 'w')
|
138
|
-
out.write @@hpricot_doc.to_s
|
139
|
-
out.close
|
138
|
+
@@hpricot_doc = Hpricot(open(@@current_doc_url))#.to_original_html
|
140
139
|
end
|
141
140
|
|
142
141
|
##
|
@@ -149,7 +148,7 @@ module Scrubyt
|
|
149
148
|
#
|
150
149
|
#_query_string_ - the string that should be entered into the textfield
|
151
150
|
def self.fill_textfield(textfield_name, query_string)
|
152
|
-
puts
|
151
|
+
puts "[ACTION] typing #{query_string} into the textfield named '#{textfield_name}'"
|
153
152
|
textfield = (@@hpricot_doc/"input[@name=#{textfield_name}]").map()[0]
|
154
153
|
formname = Scrubyt::XPathUtils.traverse_up_until_name(textfield, 'form').attributes['name']
|
155
154
|
@@current_form = @@mechanize_doc.forms.with.name(formname).first
|
@@ -158,16 +157,16 @@ module Scrubyt
|
|
158
157
|
|
159
158
|
#Submit the last form;
|
160
159
|
def self.submit
|
161
|
-
puts '
|
160
|
+
puts '[ACTION] submitting form...'
|
162
161
|
result_page = @@agent.submit(@@current_form)#, @@current_form.buttons.first)
|
163
162
|
@@current_doc_url = result_page.uri.to_s
|
164
163
|
fetch(@@current_doc_url, result_page)
|
165
164
|
end
|
166
165
|
|
167
|
-
def self.click_link(link_text)
|
168
|
-
puts
|
169
|
-
puts /^#{Regexp.escape(link_text)}$/
|
170
|
-
p /^#{Regexp.escape(link_text)}$/
|
166
|
+
def self.click_link(link_text)
|
167
|
+
puts "[ACTION] clicking link: #{link_text}"
|
168
|
+
#puts /^#{Regexp.escape(link_text)}$/
|
169
|
+
#p /^#{Regexp.escape(link_text)}$/
|
171
170
|
link = @@mechanize_doc.links.text(/^#{Regexp.escape(link_text)}$/)
|
172
171
|
result_page = @@agent.click(link)
|
173
172
|
@@current_doc_url = result_page.uri.to_s
|
@@ -178,10 +177,62 @@ module Scrubyt
|
|
178
177
|
#############
|
179
178
|
|
180
179
|
private
|
180
|
+
def self.ensure_all_postconditions(pattern)
|
181
|
+
ensure_postconditions(pattern)
|
182
|
+
pattern.children.each {|child| ensure_all_postconditions(child)}
|
183
|
+
end
|
184
|
+
|
185
|
+
def self.ensure_postconditions(pattern)
|
186
|
+
#holds the name of those child patterns which have to be present as children of the input parameter
|
187
|
+
epop_names = pattern.get_constraints.select {|c| c.type == Scrubyt::Constraint::CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN}.map {|c| c.target}
|
188
|
+
return if epop_names.empty?
|
189
|
+
#all_parent_values holds instances extracted by pattern
|
190
|
+
all_parent_values = []
|
191
|
+
pattern.result.childmap.each { |h| all_parent_values << h.values }
|
192
|
+
all_parent_values.flatten!
|
193
|
+
#indices of result instances (of pattern) we are going to remove
|
194
|
+
results_to_remove = Set.new
|
195
|
+
pattern.children.each do |child_pattern|
|
196
|
+
#all_child_values holds instances extracted by child_pattern
|
197
|
+
all_child_values = []
|
198
|
+
child_pattern.result.childmap.each { |h| all_child_values << h.values }
|
199
|
+
all_child_values.flatten!
|
200
|
+
|
201
|
+
#populate results_to_remove
|
202
|
+
i = 0
|
203
|
+
all_parent_values.each do |parent_value|
|
204
|
+
#Hey! Not just the direct children but all the ancestors
|
205
|
+
@found_ancestor = false
|
206
|
+
check_ancestors(parent_value, all_child_values)
|
207
|
+
|
208
|
+
results_to_remove << i if (!@found_ancestor && (epop_names.include? child_pattern.name))
|
209
|
+
i += 1
|
210
|
+
end
|
211
|
+
end
|
212
|
+
#based on results_to_remove, populate the array 'rejected' which holds the actual instances
|
213
|
+
#(and not indices, as in the case of results_to_remove!). In other words, we are mapping
|
214
|
+
#results_to_remove indices to their actual instances
|
215
|
+
rejected = []
|
216
|
+
i = -1
|
217
|
+
pattern.result.childmap.each do |h|
|
218
|
+
h.each { |k,v| rejected = v.reject {|e| i += 1; !results_to_remove.include? i } }
|
219
|
+
end
|
220
|
+
|
221
|
+
#Correct the statistics
|
222
|
+
pattern.get_instance_count[pattern.name] -= rejected.size
|
223
|
+
|
224
|
+
#Finally, do the actual delete!
|
225
|
+
pattern.result.childmap.each { |h| h.each { |k,v| rejected.each { |r| v.delete(r)} } }
|
226
|
+
end
|
227
|
+
|
228
|
+
def self.check_ancestors(parent_value, all_child_values)
|
229
|
+
parent_value.children.each { |child| @found_ancestor = true if all_child_values.include? child }
|
230
|
+
parent_value.children.each { |child| check_ancestors(child, all_child_values) if child.is_a? Hpricot::Elem }
|
231
|
+
end
|
232
|
+
|
181
233
|
def self.evaluate_wrapper(pattern)
|
182
234
|
pattern.evaluate
|
183
235
|
pattern.children.each { |child| evaluate_wrapper child }
|
184
|
-
end
|
185
|
-
|
236
|
+
end #end of method evaluate_wrapper
|
186
237
|
end #end of class Extractor
|
187
238
|
end #end of module Scrubyt
|
data/lib/scrubyt/filter.rb
CHANGED
@@ -2,7 +2,7 @@ module Scrubyt
|
|
2
2
|
##
|
3
3
|
#=<tt>Filter out relevant pieces from the parent pattern</tt>
|
4
4
|
#
|
5
|
-
#A Scrubyt
|
5
|
+
#A Scrubyt extractor is almost like a waterfall: water is pouring from the top until
|
6
6
|
#it reaches the bottom. The biggest difference is that instead of water, a HTML
|
7
7
|
#document travels through the space.
|
8
8
|
#
|
@@ -15,12 +15,12 @@ module Scrubyt
|
|
15
15
|
#The working of a filter will be explained most easily by the help of an example.
|
16
16
|
#Let's consider that we would like to extract information from a webshop; Concretely
|
17
17
|
#we are interested in the name of the items and the URL pointing to the image of the
|
18
|
-
#item
|
18
|
+
#item.
|
19
19
|
#
|
20
|
-
#To accomplish this
|
20
|
+
#To accomplish this, first we select the items with the pattern item (a pattern is
|
21
21
|
#a logical grouping of fillters; see Pattern documentation) Then our new
|
22
|
-
#context is the result extracted by the item pattern; For every pattern, further
|
23
|
-
#extract the name and the image of the item; and finally,
|
22
|
+
#context is the result extracted by the 'item' pattern; For every 'item' pattern, further
|
23
|
+
#extract the name and the image of the item; and finally, extract the href attribute
|
24
24
|
#of the image. Let's see an illustration:
|
25
25
|
#
|
26
26
|
# root --> This pattern is called a 'root pattern', It is invisible to you
|
@@ -46,15 +46,18 @@ module Scrubyt
|
|
46
46
|
#Regexp example, like /\d+@*\d+[a-z]/
|
47
47
|
EXAMPLE_TYPE_REGEXP = 4
|
48
48
|
|
49
|
-
attr_accessor :example_type, :parent_pattern, :temp_sink,
|
49
|
+
attr_accessor :example_type, :parent_pattern, :temp_sink,
|
50
|
+
:constraints, :xpath, :regexp, :example, :source, :sink
|
50
51
|
|
51
|
-
def initialize(parent_pattern, *args)
|
52
|
+
def initialize(parent_pattern, example=nil, *args)
|
52
53
|
@parent_pattern = parent_pattern
|
53
54
|
#If the example type is not explicitly defined in the pattern definition,
|
54
55
|
#try to determine it automatically from the example
|
55
|
-
@example_type = (args[0] == nil ? Filter.determine_example_type(
|
56
|
+
@example_type = (args[0] == nil ? Filter.determine_example_type(example) :
|
56
57
|
args[0][:example_type])
|
57
|
-
@
|
58
|
+
@sink = [] #output of a filter
|
59
|
+
@source = [] #input of a filter
|
60
|
+
@example = example
|
58
61
|
@xpath = nil #The xpath to evaluate this filter
|
59
62
|
#temp sinks are used for the initial run when determining the XPaths for examples;
|
60
63
|
@temp_sink = nil
|
@@ -64,14 +67,15 @@ module Scrubyt
|
|
64
67
|
#Evaluate this filter. This method shoulf not be called directly - as the pattern hierarchy
|
65
68
|
#is evaluated, every pattern evaluates its filters and then they are calling this method
|
66
69
|
def evaluate(source)
|
70
|
+
@parent_pattern.root_pattern.already_evaluated_sources ||= {}
|
67
71
|
case @parent_pattern.type
|
68
|
-
when Scrubyt::Pattern::PATTERN_TYPE_TREE
|
72
|
+
when Scrubyt::Pattern::PATTERN_TYPE_TREE
|
69
73
|
result = source/@xpath
|
70
74
|
result.class == Hpricot::Elements ? result.map : [result]
|
71
75
|
when Scrubyt::Pattern::PATTERN_TYPE_ATTRIBUTE
|
72
|
-
[source.attributes[@
|
76
|
+
[source.attributes[@example]]
|
73
77
|
when Scrubyt::Pattern::PATTERN_TYPE_REGEXP
|
74
|
-
source.inner_text.scan(@
|
78
|
+
source.inner_text.scan(@example).flatten
|
75
79
|
end
|
76
80
|
end
|
77
81
|
|
@@ -81,38 +85,49 @@ module Scrubyt
|
|
81
85
|
def generate_XPath_for_example
|
82
86
|
case @example_type
|
83
87
|
when EXAMPLE_TYPE_XPATH
|
84
|
-
@xpath = @
|
88
|
+
@xpath = @example
|
85
89
|
when EXAMPLE_TYPE_STRING
|
86
|
-
@temp_sink = XPathUtils.find_node_from_text( @parent_pattern.root_pattern.source[0], @
|
90
|
+
@temp_sink = XPathUtils.find_node_from_text( @parent_pattern.root_pattern.filters[0].source[0], @example )
|
87
91
|
@xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(@temp_sink, nil, false) :
|
88
92
|
XPathUtils.generate_XPath(@temp_sink, nil, true)
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
93
|
+
puts @xpath
|
94
|
+
when EXAMPLE_TYPE_CHILDREN
|
95
|
+
current_example_index = 0
|
96
|
+
loop do
|
97
|
+
all_child_temp_sinks = []
|
98
|
+
@parent_pattern.children.each do |child_pattern|
|
99
|
+
all_child_temp_sinks << child_pattern.filters[current_example_index].temp_sink
|
94
100
|
end
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
101
|
+
|
102
|
+
result = all_child_temp_sinks.pop
|
103
|
+
if all_child_temp_sinks.empty?
|
104
|
+
result = result.parent
|
105
|
+
else
|
106
|
+
all_child_temp_sinks.each do |child_sink|
|
107
|
+
result = XPathUtils.lowest_common_ancestor(result, child_sink)
|
108
|
+
end
|
102
109
|
end
|
110
|
+
xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(result, nil, false) :
|
111
|
+
XPathUtils.generate_XPath(result, nil, true)
|
112
|
+
if @parent_pattern.filters.size < current_example_index + 1
|
113
|
+
@parent_pattern.filters << Scrubyt::Filter.new(@parent_pattern)
|
114
|
+
end
|
115
|
+
@parent_pattern.filters[current_example_index].xpath = xpath
|
116
|
+
@parent_pattern.filters[current_example_index].temp_sink = result
|
117
|
+
@parent_pattern.children.each do |child_pattern|
|
118
|
+
child_pattern.filters[current_example_index].xpath =
|
119
|
+
child_pattern.generalize ? XPathUtils.generate_generalized_relative_XPath(child_pattern.filters[current_example_index].temp_sink, result) :
|
120
|
+
XPathUtils.generate_relative_XPath(child_pattern.filters[current_example_index].temp_sink, result)
|
121
|
+
end
|
122
|
+
if @parent_pattern.children[0].examples == nil
|
123
|
+
break if @parent_pattern.children[0].filters.size == current_example_index+1
|
124
|
+
else
|
125
|
+
break if @parent_pattern.children[0].examples.size == current_example_index+1
|
126
|
+
end
|
127
|
+
current_example_index += 1
|
103
128
|
end
|
104
|
-
@temp_sink = result
|
105
|
-
@xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(@temp_sink, nil, false) :
|
106
|
-
XPathUtils.generate_XPath(@temp_sink, nil, true)
|
107
|
-
@parent_pattern.children.each do |child_pattern|
|
108
|
-
child_pattern.filters.each do |filter|
|
109
|
-
filter.xpath =
|
110
|
-
child_pattern.generalize ? XPathUtils.generate_generalized_relative_XPath(filter.temp_sink, result) :
|
111
|
-
XPathUtils.generate_relative_XPath(filter.temp_sink, result)
|
112
|
-
end
|
113
|
-
end
|
114
129
|
when EXAMPLE_TYPE_IMAGE
|
115
|
-
@temp_sink = XPathUtils.find_image(@parent_pattern.root_pattern.source[0], @
|
130
|
+
@temp_sink = XPathUtils.find_image(@parent_pattern.root_pattern.filters[0].source[0], @example)
|
116
131
|
@xpath = XPathUtils.generate_XPath(@temp_sink, nil, false)
|
117
132
|
end
|
118
133
|
end
|