scrubyt 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. data/CHANGELOG +59 -12
  2. data/Rakefile +2 -2
  3. data/lib/scrubyt.rb +24 -6
  4. data/lib/scrubyt/core/navigation/fetch_action.rb +91 -56
  5. data/lib/scrubyt/core/navigation/navigation_actions.rb +32 -22
  6. data/lib/scrubyt/core/scraping/constraint.rb +53 -57
  7. data/lib/scrubyt/core/scraping/constraint_adder.rb +15 -38
  8. data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +17 -0
  9. data/lib/scrubyt/core/scraping/filters/base_filter.rb +111 -0
  10. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +14 -0
  11. data/lib/scrubyt/core/scraping/filters/download_filter.rb +49 -0
  12. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -0
  13. data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +17 -0
  14. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +121 -0
  15. data/lib/scrubyt/core/scraping/pattern.rb +292 -157
  16. data/lib/scrubyt/core/scraping/result_indexer.rb +51 -47
  17. data/lib/scrubyt/core/shared/evaluation_context.rb +3 -42
  18. data/lib/scrubyt/core/shared/extractor.rb +122 -163
  19. data/lib/scrubyt/output/export.rb +59 -174
  20. data/lib/scrubyt/output/post_processor.rb +4 -3
  21. data/lib/scrubyt/output/result.rb +8 -9
  22. data/lib/scrubyt/output/result_dumper.rb +81 -42
  23. data/lib/scrubyt/utils/compound_example_lookup.rb +11 -11
  24. data/lib/scrubyt/utils/ruby_extensions.rb +113 -0
  25. data/lib/scrubyt/utils/shared_utils.rb +39 -26
  26. data/lib/scrubyt/utils/simple_example_lookup.rb +6 -6
  27. data/lib/scrubyt/utils/xpathutils.rb +31 -30
  28. data/test/unittests/constraint_test.rb +11 -7
  29. data/test/unittests/extractor_test.rb +6 -6
  30. data/test/unittests/filter_test.rb +66 -66
  31. metadata +22 -15
  32. data/lib/scrubyt/core/scraping/filter.rb +0 -201
@@ -1,5 +1,3 @@
1
- #require File.join(File.dirname(__FILE__), 'pattern.rb')
2
-
3
1
  module Scrubyt
4
2
  # =<tt>exporting previously defined extractors</tt>
5
3
  class Export
@@ -15,7 +13,7 @@ module Scrubyt
15
13
  #
16
14
  #*parameters*
17
15
  #
18
- #_pattern_ - the root pattern of the extractor. This is the variable 'something' in
16
+ #_root_pattern_ - the root pattern of the extractor. This is the variable 'something' in
19
17
  #such a call:
20
18
  #
21
19
  # something = Scrubyt::Extractor.define ...
@@ -63,194 +61,81 @@ module Scrubyt
63
61
  #This snippet will export the extractor to a file named 'my_super_camera_extractor.rb'.
64
62
  #After running 'my_super_camera_extractor.rb', the result will be dumped to the file
65
63
  #'/home/peter/stuff/result.xml'.
66
- def self.export(input_file, pattern, output_file_name, extractor_result_file_name)
67
- @result = ""
68
- contents = open(input_file).read
69
- wrapper_name = contents.scan(/\s+(.+)\s+=.*Extractor\.define.*/)
70
- output_file = output_file_name == nil ? open("#{wrapper_name}_extractor_export.rb", 'w') :
71
- open(output_file_name, 'w')
72
- export_header(output_file)
73
- export_subextractors(contents, pattern, output_file)
74
- export_extractor(contents, pattern, output_file)
75
- export_footer(output_file, wrapper_name, extractor_result_file_name)
76
- cleanup_result
77
- output_file.write(@result)
64
+ def self.export(root_pattern, wrapper_name, output_file_name=nil, extractor_result_file_name=nil)
65
+ sexp = [:block]
66
+ sexp << export_header(wrapper_name)
67
+ sexp << export_extractor(root_pattern, wrapper_name)
68
+ sexp << export_footer(wrapper_name, extractor_result_file_name)
69
+
70
+ result = RubyToRuby.new.process(sexp)
71
+ result.gsub! '"' + root_pattern.source_file + '"', '__FILE__'
72
+
73
+ output_file_name ||= "#{wrapper_name}_extractor_export.rb"
74
+ output_file = open(output_file_name, 'w')
75
+ output_file.write(result)
78
76
  output_file.close
79
- @result
77
+ result
80
78
  end
81
79
 
82
80
  private
83
- def self.export_header(output_file)
84
- @result += "require 'rubygems'\nrequire 'scrubyt'\n\n"
85
- end
86
-
87
- def self.cleanup_result
88
- @result.gsub!('P.') {}
89
- CompoundExample::DESCRIPTORS.each {|d|
90
- @result.gsub!(/,\s*:#{d.to_s}.+?'.+?'/) {}
91
- }
92
- end
93
-
94
-
95
- def self.export_subextractors(contents, pattern, output_file)
96
- all_subextractor_code = contents.scan(/.+=\s+lambda.+Extractor\.define/m)
97
- return if all_subextractor_code.empty?
98
- all_subextractor_code = all_subextractor_code[0].split("\n")
99
- pure_subextractor_code = []
100
- meaningful_code = false
101
- all_subextractor_code.each do |sec|
102
- meaningful_code = true if sec =~ /lambda/
103
- meaningful_code = false if sec =~ /Extractor.define/
104
- pure_subextractor_code << sec if meaningful_code
105
- end
106
- add_P pure_subextractor_code
107
- substitute_examples_with_XPaths(pattern,pure_subextractor_code)
81
+ def self.create_sexp(code)
82
+ (ParseTree.new.parse_tree_for_string(code))[0]
108
83
  end
109
-
110
- #OK, I have to admit: this function is powered by woodo magic. A lots of woodoo magic.
111
- #Piles of tons of heaps of woodoo magic :-)
112
- #
113
- #The only reason I can expect it to work is that it passes all the tests of the extractors
114
- #I have created so far. However at the same time I know how to create one easily which
115
- #would break the exporting, so don't experiment with this too much...
116
- #
117
- #The other solutions include:
118
- #- serialization (yaml, pstore etc) but that would mess the code terribly up - so
119
- #therefore I did not chose this solution.
120
- #- defining the block as string - however, this introduces ugly %q{}s etc - all in all,
121
- #this is still a more viable solution that serialization IMHO
122
- #- a lot of other tricks - however, all of these introduce a lot of noise which I don't
123
- #like.
124
- #
125
- #Conclusion: If there will be no terrible, unrepairable, uncontrollable etc. problems
126
- #with this approach, it will be replaced (probably with constructing the extractor as
127
- #a string). However, until that point, it will stay.
128
- def self.export_extractor(contents, pattern, output_file)
129
- first_line = contents.scan(/.*Extractor\.define.*/)
130
- #During wrapper construction, we count the number of blocks; add one occurrence of
131
- #end (to close the block of the extractor definition)
132
- count = pattern.evaluation_context.block_count + 1
133
- #Construct the extractor definition matching regexp based on the number of ends
134
- definition = contents.scan(/Extractor\.define(?:.*?(?:\}|\s+end)){#{count.to_s}}/m)
135
- #Since the regexp matching the extractor definition was multiline, get the first
136
- #line separately and patch it in!
137
- rows = definition[0].split("\n")
138
- add_P(rows)
139
- rows[0] = first_line
140
- substitute_examples_with_XPaths(pattern,rows)
141
- end
142
-
143
- def self.substitute_examples_with_XPaths(pattern,rows)
144
- #@full_definition holds the original definition (at this point, later on it will be
145
- #gsub!bed and all)
146
- @full_definition = rows.join("\n")
147
- #This hash contains all the examples that need to be replaced with their XPath
148
- #counterparts;"P.#{name}"
149
- #We are relying on the convention that if an example is definied, it is always
150
- #the first parameter and it is always a string
151
- @name_to_xpath_map = {}
152
- create_name_to_xpath_map(pattern)
153
- #Replace the examples which are quoted with " and '
154
- @name_to_xpath_map.each do |name, xpaths|
155
- replace_example_with_xpath(name, xpaths, %q{"})
156
- replace_example_with_xpath(name, xpaths, %q{'})
157
- end
158
- #Finally, add XPaths to pattern which had no example at the beginning (the XPath was
159
- #generated from the child patterns)
160
- @name_to_xpath_map.each do |name, xpaths|
161
- xpaths.reverse.each do |xpath|
162
- next if !@full_definition.include? "P.#{name}"
163
- comma = @full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0].sub('do'){}.strip == '' ? '' : ','
164
- if (@full_definition.scan(Regexp.new("P.#{name}(.+)$"))[0][0]).include?('{')
165
- @full_definition.sub!("P.#{name}") {"P.#{name}('#{xpath}')"}
166
- else
167
- @full_definition.sub!("P.#{name}") {"P.#{name} \"#{xpath}\"#{comma}"}
168
- end
169
- end
170
- end
171
- @result += @full_definition
84
+
85
+ def self.export_header(wrapper_name)
86
+ create_sexp "require 'rubygems'; require 'scrubyt'"
172
87
  end
173
88
 
174
- def self.export_footer(output_file, wrapper_name, extractor_result_file_name)
89
+ def self.export_footer(wrapper_name, extractor_result_file_name)
175
90
  if extractor_result_file_name
176
- @result += "\n\n#{wrapper_name}.to_xml.write(open('result_of_exported_extractor.xml', 'w'), 1)"
91
+ create_sexp "#{wrapper_name}.to_xml.write(open('result_of_exported_extractor.xml', 'w'), 1)"
177
92
  else
178
- @result += "\n\n#{wrapper_name}.to_xml.write($stdout, 1)"
93
+ create_sexp "#{wrapper_name}.to_xml.write($stdout, 1)"
179
94
  end
180
95
  end
181
96
 
182
- def self.add_P(rows)
183
- #Lord of the hacks :-) Originally, when we have used the class P as a pattern definer,
184
- #patterns could be matched very easily from the extractor definition (because they begun
185
- #with 'P.'). Now that P has been removed, mimick it!
186
- rows.each do |row|
187
- #Do not prepend P. to comments and empty lines
188
- next if (row.strip =~ /^#/ || row.strip == '')
189
- #Do not prepend P. to any of the reserved keywords
190
- jump_to_next = false
191
- NavigationActions::KEYWORDS.each { |keyword| jump_to_next = true if row.strip =~ /^#{keyword}/ }
192
- jump_to_next = true if row =~ /lambda/
193
- next if jump_to_next
194
- #Prepend P.s - the hairy stuff with eval et al is there to preserve the whitespace
195
- row.sub!(/\s+(.*)/) { "#{' ' * eval((row.size - row.lstrip.size ).to_s)} P.#{$1}" }
196
- #Don't forget also the stuff in parentheses!
197
- row.gsub!(/\{\s+/) {"{P."}
198
- end
199
- end
200
-
201
-
202
- def self.create_name_to_xpath_map(pattern)
203
- puts " Cereating mapping for: #{pattern.name}"
204
- @name_to_xpath_map[pattern.name] = []
205
- pattern.filters.each do |filter|
206
- @name_to_xpath_map[pattern.name] << filter.xpath if pattern.filters[0].xpath != nil
207
- end
208
- pattern.children.each {|child| create_name_to_xpath_map child}
209
- if pattern.type == Scrubyt::Pattern::PATTERN_TYPE_DETAIL
210
- puts pattern.name
211
- puts "-------"
212
- puts pattern.evaluation_context.extractor.get_detail_pattern_relations.each {|k,v|
213
- if k.include? pattern
214
- v.parent.children.each do |child|
215
- create_name_to_xpath_map child
216
- end
97
+ def self.export_extractor(root_pattern, wrapper_name)
98
+ # filter actions before and after pattern
99
+ pre_pattern_sexp = []
100
+ post_pattern_sexp = []
101
+ pattern_skipped = false
102
+ actions = ['next_page', *NavigationActions::KEYWORDS]
103
+
104
+ root_pattern.source_proc.to_sexp[3][1..-1].each do |sexp|
105
+ get_call = lambda { |sexp|
106
+ if sexp[0] == :fcall
107
+ return sexp[1].to_s
108
+ elsif sexp[0] == :iter || sexp[0] == :call
109
+ return get_call.call(sexp[1])
110
+ else
111
+ return nil
217
112
  end
218
113
  }
219
-
220
- #pattern.evaluation_context.extractor.get_detail_pattern_relations[pattern].parent.children.each do |child|
221
- # create_name_to_xpath_map child
222
- #end
114
+ call = get_call.call(sexp)
115
+ if(call.nil? || actions.index(call) != nil)
116
+ if !pattern_skipped
117
+ pre_pattern_sexp.push(sexp)
118
+ else
119
+ post_pattern_sexp.push(sexp)
120
+ end
121
+ else
122
+ raise "Second pattern tree found while exporting." if pattern_skipped
123
+ pattern_skipped = true
124
+ end
223
125
  end
126
+
127
+ # build extractor content
128
+ inner_block = [:block]
129
+ inner_block.push([:block, *pre_pattern_sexp])
130
+ inner_block.push([:block, export_pattern(root_pattern)])
131
+ inner_block.push([:block, *post_pattern_sexp])
132
+
133
+ # build extractor
134
+ [:block, [:lasgn, wrapper_name, [:iter, [:call, [:colon2, [:const, :Scrubyt], :Extractor], :define], nil, inner_block]]]
224
135
  end
225
136
 
226
- def self.replace_example_with_xpath(name, xpaths, left_delimiter, right_delimiter=left_delimiter)
227
- return if name=='root'
228
- return if !@full_definition.include? "P.#{name}"
229
- parens = @full_definition.scan(/P.#{name}\s*\((.+?)\)/)
230
- if parens.empty?
231
- full_line = @full_definition.scan(/P.#{name}\W(.+)$/)[0][0]
232
- else
233
- full_line = parens[0][0]
234
- end
235
- examples = full_line.split(",")
236
- examples.reject! {|exa| exa.strip!; exa[0..0] != %q{"} && exa[0..0] != %q{'} }
237
- all_xpaths = ""
238
- examples.each do |e|
239
- index = examples.index(e)
240
- xpath = xpaths[index]
241
- return if xpath == nil
242
- all_xpaths += ", " if index > 0
243
- all_xpaths += '"' + xpath + '"'
244
- end
245
- replacing_xpath = full_line.include?('{') ? "P.#{name}('#{all_xpaths}')" :
246
- "P.#{name} #{all_xpaths}"
247
- optional_paren_escaped = parens.empty? ? '' : '\('
248
- optional_paren = parens.empty? ? '' : '('
249
- @full_definition.sub!(/P\.#{name}\s*#{optional_paren_escaped}#{left_delimiter}(.*)#{right_delimiter}/) do
250
- @name_to_xpath_map.delete("#{name}")
251
- optional_paren + replacing_xpath
252
- end
137
+ def self.export_pattern(root_pattern)
138
+ root_pattern.children[0].to_sexp
253
139
  end
254
-
255
140
  end
256
141
  end
@@ -56,7 +56,7 @@ require 'set'
56
56
  private
57
57
  def self.ensure_presence_of_pattern(pattern)
58
58
  #holds the name of those child patterns which have to be present as children of the input parameter
59
- epop_names = pattern.get_constraints.select {|c| c.type == Scrubyt::Constraint::CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN}.map {|c| c.target}
59
+ epop_names = pattern.constraints.select {|c| c.type == Scrubyt::Constraint::CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN}.map {|c| c.target}
60
60
  return if epop_names.empty?
61
61
  #all_parent_values holds instances extracted by pattern
62
62
  all_parent_values = []
@@ -95,8 +95,9 @@ private
95
95
  end
96
96
 
97
97
  def self.check_ancestors(parent_value, all_child_values)
98
- parent_value.children.each { |child| @found_ancestor = true if all_child_values.include? child }
99
- parent_value.children.each { |child| check_ancestors(child, all_child_values) if child.is_a? Hpricot::Elem }
98
+ parent_value.children.each { |child| @found_ancestor = true if all_child_values.include? child } if
99
+ parent_value.is_a? Hpricot::Elem
100
+ parent_value.children.each { |child| check_ancestors(child, all_child_values) if child.is_a? Hpricot::Elem } if parent_value.is_a? Hpricot::Elem
100
101
  end
101
102
 
102
103
  def self.remove_multiple_filter_duplicates_intern(pattern)
@@ -1,24 +1,23 @@
1
- module Scrubyt
1
+ module Scrubyt
2
2
  ##
3
3
  #=<tt>Represents the results of a pattern</tt>
4
4
  class Result
5
5
  attr_reader :childmap, :instances
6
-
6
+
7
7
  def initialize
8
8
  @childmap ||= []
9
9
  end
10
-
10
+
11
11
  def add_result(source, result)
12
12
  @childmap.each do |hash|
13
13
  if hash.keys[0] == source
14
- return if hash[source] == nil
15
14
  hash[source] << result if !hash[source].include? result
16
15
  return
17
16
  end
18
17
  end
19
- @childmap << {source => [result]}
18
+ @childmap << {source => [result]}
20
19
  end
21
-
20
+
22
21
  def lookup(last_result)
23
22
  @childmap.each do |hashes|
24
23
  hashes.each { |key, value| return value if (key == last_result) }
@@ -33,12 +32,12 @@ end#end of module Scrubyt
33
32
  # root
34
33
  # source: nil
35
34
  # childmap: [ {doc1 => [doc1]}, {doc2 => [doc2]} ]
36
-
35
+
37
36
  #table
38
37
  # source: doc1
39
38
  # childmap [ {doc1 => [table[1]s1, table[2]s1, table[3]s1]}, {doc2 => [table[1]s2, table[2]s2, table[3]s2]} ]
40
-
39
+
41
40
  #row
42
41
  # source: table1s1, table2s1, table3s1
43
42
  # childmap: [ {table[1]s1 => [row1s1, row2s1]}, {table[2]s1 => [row3s1, row3s1, row5s1]},
44
- # {table[1]s2 => [row1s2, row2s2]}, {table[2]s2 => [row3s2, row3s2, row5s2]}]
43
+ # {table[1]s2 => [row1s2, row2s2]}, {table[2]s2 => [row3s2, row3s2, row5s2]}]
@@ -12,36 +12,69 @@ module Scrubyt
12
12
  root = REXML::Element.new('root')
13
13
  doc.add_element(root)
14
14
  all_extracted_docs = pattern.last_result
15
- all_extracted_docs.each do |lr|
15
+ [all_extracted_docs].flatten.each do |lr|
16
16
  pattern.last_result = lr
17
- to_xml_recursive(pattern, root)
17
+ to_xml_recursive(pattern, root)
18
18
  end
19
19
  remove_empty_leaves(doc)
20
20
  @@last_doc = doc
21
21
  end
22
-
22
+
23
23
  def self.remove_empty_leaves(node)
24
24
  node.remove if node.elements.empty? && node.text == nil
25
25
  node.elements.each {|child| remove_empty_leaves child }
26
26
  end
27
-
27
+
28
28
  ##
29
29
  #Output the text of the pattern; If this pattern is a tree, collect the text from its
30
30
  #result instance node; otherwise rely on the last_result
31
+ #TODO: throw this away!!!
31
32
  def self.to_text(pattern)
32
- last_result = pattern.last_result
33
- result = ""
34
- if pattern.type == Scrubyt::Pattern::PATTERN_TYPE_TREE
35
- last_result.traverse_text { |t| result += t.to_s }
36
- else
37
- result = last_result
38
- end
39
- result
33
+ last_result = pattern.last_result
34
+ result = ""
35
+ if pattern.type == :tree
36
+ last_result.traverse_text { |t| result += t.to_s }
37
+ else
38
+ result = last_result
39
+ end
40
+ result
41
+ end
42
+
43
+ def self.to_csv(pattern)
44
+ result = []
45
+ flat_csv_inner = lambda {|e, parts|
46
+ content = e.text || ''
47
+ parts << content if ((e.is_a? REXML::Element) && content != '')
48
+ e.children.each {|c| flat_hash_inner.call(c, parts) if c.is_a? REXML::Element }
49
+ parts
50
+ }
51
+ to_xml(pattern).root.elements['/root'].each {|e| result << flat_csv_inner.call(e, []) }
52
+ (result.map! {|a| a.join(',')}).join("\n")
53
+ end
54
+
55
+ def self.to_hash(pattern)
56
+ result = []
57
+ flat_hash_inner = lambda {|e, parts|
58
+ content = e.text || ''
59
+ if ((e.is_a? REXML::Element) && content != '')
60
+ if parts[e.local_name]
61
+ parts[e.local_name] = parts[e.local_name] + "," + content
62
+ else
63
+ parts[e.local_name] = content
64
+ end
65
+ end
66
+ e.children.each {|c| flat_hash_inner.call(c, parts) if c.is_a? REXML::Element }
67
+ parts
68
+ }
69
+ to_xml(pattern).root.elements['/root'].each {|e| result << flat_hash_inner.call(e, {}) }
70
+ result
40
71
  end
41
-
72
+
73
+
74
+
42
75
  ##
43
- #Print some simple statistics on the extracted results, like the count of extracted
44
- #instances by each pattern
76
+ #Print some simple statistics on the extracted results, like the count of extracted
77
+ #instances by each pattern
45
78
  def self.print_statistics(pattern)
46
79
  puts "\n" * 2
47
80
  print_statistics_recursive(pattern,0)
@@ -54,20 +87,34 @@ private
54
87
  childresults = child.result.lookup(child.parent.last_result)
55
88
  #Output text for leaf nodes only; Maybe add possibility to customize this later
56
89
  if (childresults == nil)
90
+ ##TODO: is this needed for anything? I guess not! Drop it!!!!!!
91
+ #Update: it seems the blackbox tests are not passing because of this (?) so temporarily adding it back
92
+ ##=begin
57
93
  res = ""
58
- child.parent.last_result.traverse_text { |t| res += t.to_s }
59
- if (child.parent.size == 0)
60
- element.text = (res.gsub('&nbsp;'){' '}).strip unless element.parent.is_a? REXML::Document
94
+ if child.parent.last_result.is_a? String
95
+ res = child.parent.last_result
96
+ else
97
+ child.parent.last_result.traverse_text { |t| res += t.to_s }
98
+ end
99
+ if (child.parent.respond_to?(:size) && child.parent.size == 0) #TODO: respond_to should not be used here, it's just a quick workaround
100
+ element.text = SharedUtils.unescape_entities(res).strip unless element.parent.is_a? REXML::Document
61
101
  end
62
102
  next
103
+ ##=end
63
104
  end
105
+
64
106
  generate_children(child, childresults, element)
65
107
  end
66
108
  end
67
-
109
+
68
110
  def self.generate_children(child, childresults, element)
111
+ if childresults == nil
112
+ child_node = REXML::Element.new(child.name)
113
+ child_node.text = child.default
114
+ element.add_element(child_node)
115
+ else
69
116
  childresults.size.times do |num|
70
- child.last_result = childresults[num]
117
+ child.last_result = childresults[num]
71
118
  res = ""
72
119
  if child.last_result.instance_of? String
73
120
  res = child.last_result
@@ -78,37 +125,29 @@ private
78
125
  child.last_result.children.each { |c| element.add_element c }
79
126
  end
80
127
  end
81
- child_node = REXML::Element.new(child.name)
82
- child_node.text = (res.gsub('&nbsp;'){' '}).strip if write_text_criteria_met(child)
83
-
84
- element.add_element(child_node) if child.type != Scrubyt::Pattern::PATTERN_TYPE_DETAIL
128
+ child_node = REXML::Element.new(child.name)
129
+ child_node.text = SharedUtils.unescape_entities(res).strip if child.write_text
130
+ element.add_element(child_node) if (child.type != :detail_page && child_node.text != '')
85
131
  to_xml_recursive(child, child_node)
86
- end
87
- end
88
-
89
- def self.write_text_criteria_met(pattern)
90
- if (pattern.write_text == nil)
91
- return pattern.children.size == 0
92
- else
93
- pattern.write_text
132
+ end
94
133
  end
95
134
  end
96
-
135
+
97
136
  def self.print_statistics_recursive(pattern, depth)
98
- if pattern.name != 'root'
99
- if pattern.type == Scrubyt::Pattern::PATTERN_TYPE_DETAIL
137
+ if pattern.name != 'root'
138
+ if pattern.type == :detail_page
100
139
  pattern.evaluation_context.extractor.get_detail_pattern_relations[pattern].parent.children.each do |child|
101
140
  print_statistics_recursive(child, depth)
102
- end
141
+ end
103
142
  else
104
- count = REXML::XPath.match(@@last_doc, "//#{pattern.name}").size
143
+ count = REXML::XPath.match(@@last_doc, "//#{pattern.name}").size
105
144
  puts((' ' * "#{depth}".to_i) + "#{pattern.name} extracted #{count} instances.")
106
145
  end
107
146
  end
108
-
147
+
109
148
  pattern.children.each do |child|
110
149
  print_statistics_recursive(child, depth + 4)
111
- end
112
- end#end of method print_statistics_recursive
113
- end #end of class ResultDumper
114
- end #end of module Scrubyt
150
+ end
151
+ end#end of method print_statistics_recursive
152
+ end #end of class ResultDumper
153
+ end #end of module Scrubyt