scrubyt 0.2.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/CHANGELOG +32 -2
  2. data/Rakefile +25 -20
  3. data/lib/scrubyt.rb +24 -5
  4. data/lib/scrubyt/core/navigation/fetch_action.rb +76 -42
  5. data/lib/scrubyt/core/navigation/navigation_actions.rb +24 -6
  6. data/lib/scrubyt/core/scraping/filters/base_filter.rb +5 -5
  7. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +2 -2
  8. data/lib/scrubyt/core/scraping/filters/download_filter.rb +2 -1
  9. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -2
  10. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +37 -12
  11. data/lib/scrubyt/core/scraping/pattern.rb +82 -90
  12. data/lib/scrubyt/core/scraping/pre_filter_document.rb +2 -1
  13. data/lib/scrubyt/core/shared/evaluation_context.rb +14 -37
  14. data/lib/scrubyt/core/shared/extractor.rb +55 -54
  15. data/lib/scrubyt/logging.rb +16 -0
  16. data/lib/scrubyt/output/export.rb +1 -1
  17. data/lib/scrubyt/output/post_processor.rb +6 -5
  18. data/lib/scrubyt/output/result.rb +1 -0
  19. data/lib/scrubyt/output/result_dumper.rb +4 -3
  20. data/lib/scrubyt/output/result_node.rb +73 -0
  21. data/lib/scrubyt/output/scrubyt_result.rb +28 -0
  22. data/lib/scrubyt/utils/ruby_extensions.rb +8 -0
  23. data/lib/scrubyt/utils/simple_example_lookup.rb +14 -1
  24. data/lib/scrubyt/utils/xpathutils.rb +11 -0
  25. metadata +7 -12
  26. data/test/unittests/constraint_test.rb +0 -107
  27. data/test/unittests/extractor_test.rb +0 -91
  28. data/test/unittests/filter_test.rb +0 -79
  29. data/test/unittests/input/constraint_test.html +0 -55
  30. data/test/unittests/input/test.html +0 -39
  31. data/test/unittests/pattern_test.rb +0 -27
  32. data/test/unittests/simple_example_lookup_test.rb +0 -68
  33. data/test/unittests/xpathutils_test.rb +0 -152
@@ -43,13 +43,14 @@ module Scrubyt
43
43
  EXAMPLE_TYPE_IMAGE = 2
44
44
  #No example - the actual XPath is determined from the children XPaths (their LCA)
45
45
  EXAMPLE_TYPE_CHILDREN = 3
46
+
46
47
  #Regexp example, like /\d+@*\d+[a-z]/
47
48
  EXAMPLE_TYPE_REGEXP = 4
48
49
  #Compound example, like :contains => 'goodies'
49
50
  EXAMPLE_TYPE_COMPOUND = 5
50
51
 
51
52
  attr_accessor(:example_type, :parent_pattern, :temp_sink,
52
- :constraints, :xpath, :regexp, :example, :source, :sink)
53
+ :constraints, :xpath, :regexp, :example, :final_result)
53
54
 
54
55
  def self.create(parent_pattern, example=nil)
55
56
 
@@ -63,6 +64,7 @@ module Scrubyt
63
64
 
64
65
  #Dispatcher method to add constraints; of course, as with any method_missing, this method
65
66
  #should not be called directly
67
+
66
68
  #TODO still used?
67
69
  def method_missing(method_name, *args, &block)
68
70
  case method_name.to_s
@@ -82,8 +84,6 @@ module Scrubyt
82
84
  def initialize(parent_pattern, example)
83
85
  @example_type = BaseFilter.determine_example_type(example)
84
86
  @parent_pattern = parent_pattern
85
- @sink = [] #output of a filter
86
- @source = [] #input of a filter
87
87
  @example = example
88
88
  @xpath = nil #The xpath to evaluate this filter
89
89
  @constraints = [] #list of constraints
@@ -98,9 +98,9 @@ module Scrubyt
98
98
  case example
99
99
  when nil
100
100
  EXAMPLE_TYPE_CHILDREN
101
- when /\.(jpg|png|gif|jpeg)$/
101
+ when /\.(jpg|png|gif|jpeg)(\[\d+\])?$/
102
102
  EXAMPLE_TYPE_IMAGE
103
- when /^\/{1,2}[a-z]+[0-9]?(\[[0-9]+\])?(\/{1,2}[a-z()]+[0-9]?(\[[0-9]+\])?)*$/
103
+ when /^\/{1,2}[a-z]+[0-9]?(\[[0-9]+\])?(\/{1,2}[a-z()]+[0-9]?(\[[0-9]+\])?)*(\[@.+=.+\])?(\/@.+)?$/
104
104
  (example.include? '/' || example.include?('[')) ? EXAMPLE_TYPE_XPATH : EXAMPLE_TYPE_STRING
105
105
  else
106
106
  EXAMPLE_TYPE_STRING
@@ -3,9 +3,9 @@ module Scrubyt
3
3
 
4
4
  def evaluate(source)
5
5
  if source.is_a? String
6
- result = @parent_pattern.evaluation_context.extractor.evaluate_subextractor(source, @parent_pattern, @parent_pattern.resolve)
6
+ @parent_pattern.evaluation_context.extractor.evaluate_subextractor(source, @parent_pattern, @parent_pattern.resolve)
7
7
  else
8
- result = @parent_pattern.evaluation_context.extractor.evaluate_subextractor(
8
+ @parent_pattern.evaluation_context.extractor.evaluate_subextractor(
9
9
  XPathUtils.find_nearest_node_with_attribute(source, 'href').attributes['href'],
10
10
  @parent_pattern, @parent_pattern.resolve)
11
11
  end
@@ -20,7 +20,8 @@ private
20
20
  return '' if source.size < 4
21
21
  file_name = source.scan(/.+\/(.*)/)[0][0]
22
22
  Net::HTTP.start(base_url) { |http|
23
- resp = http.get(source)
23
+ puts "downloading: #{source.scan(/\s*(.+)/)[0][0]}"
24
+ resp = http.get(source.scan(/\s*(.+)/)[0][0])
24
25
  outfile = DownloadFilter.find_nonexisting_file_name(File.join(@example, file_name))
25
26
  FileUtils.mkdir_p @example
26
27
  open(outfile, 'wb') {|f| f.write(resp.body) }
@@ -1,7 +1,12 @@
1
1
  module Scrubyt
2
- class HTMLSubTreeFilter < BaseFilter
3
-
2
+ class HtmlSubtreeFilter < BaseFilter
3
+
4
4
  def evaluate(source)
5
+ source.inner_html
6
+ end
7
+
8
+ def to_sexp
9
+ nil
5
10
  end #end of method
6
11
  end #End of class TreeFilter
7
12
  end #End of module Scrubyt
@@ -2,9 +2,22 @@ module Scrubyt
2
2
  class TreeFilter < BaseFilter
3
3
 
4
4
  def evaluate(source)
5
+ return [@final_result] if @final_result
6
+ #Crude hack! Drop it after it will be supported in Hpricot
7
+ if @xpath =~ /.+\/@.+$/
8
+ @example = @xpath
9
+ @xpath = @xpath.scan(/^(.+?)\/@/)[0][0]
10
+ end
5
11
  result = source/@xpath
6
- #puts "Evaluating #{@parent_pattern.name} with #{@xpath}"
7
- xpath_results = if result.class == Hpricot::Elements then result.map else [result] end
12
+
13
+ Scrubyt.log :ACTION, "Evaluating #{@parent_pattern.name} with #{@xpath}"
14
+
15
+ xpath_results = Hpricot::Elements === result ? result : [result]
16
+
17
+ if @example =~ /.+\/@.+$/
18
+ result_attribute = @example.scan(/.+\/@(.+?)$/)[0][0]
19
+ xpath_results.map! {|r| r.attributes[result_attribute] }
20
+ end
8
21
  if @regexp == nil
9
22
  xpath_results
10
23
  else
@@ -22,6 +35,8 @@ module Scrubyt
22
35
  def generate_regexp_for_example
23
36
  return if @example_type != EXAMPLE_TYPE_STRING
24
37
  return if @temp_sink.nil?
38
+ return if @temp_sink.is_a? String
39
+ return if @example =~ /.+\[.+\]$/
25
40
 
26
41
  text = SharedUtils.prepare_text_for_comparison(@temp_sink.inner_text)
27
42
  match_range = @temp_sink.match_data.begin(0)..@temp_sink.match_data.end(0)
@@ -49,10 +64,14 @@ module Scrubyt
49
64
  when EXAMPLE_TYPE_XPATH
50
65
  @xpath = @example
51
66
  when EXAMPLE_TYPE_STRING
52
- @temp_sink = SimpleExampleLookup.find_node_from_text(@parent_pattern.evaluation_context.root_pattern.filters[0].source[0],
67
+ @temp_sink = SimpleExampleLookup.find_node_from_text(@parent_pattern.evaluation_context.extractor.get_hpricot_doc,
53
68
  @example,
54
69
  next_page_example)
55
70
  return if @temp_sink == nil
71
+ if @temp_sink.is_a? String
72
+ @final_result = @temp_sink
73
+ return
74
+ end
56
75
 
57
76
  mark_changing_ranges = lambda { |element, range|
58
77
  element.instance_eval do
@@ -63,14 +82,14 @@ module Scrubyt
63
82
  end
64
83
  }
65
84
  mark_changing_ranges.call(@temp_sink, @temp_sink.match_data.begin(0)..@temp_sink.match_data.end(0))
66
-
67
- @xpath = XPathUtils.generate_XPath(@temp_sink, nil, !@parent_pattern.generalize)
85
+ write_indices = next_page_example ? true : !@parent_pattern.generalize
86
+ @xpath = XPathUtils.generate_XPath(@temp_sink, nil, write_indices)
68
87
  when EXAMPLE_TYPE_CHILDREN
69
88
  current_example_index = 0
70
89
  loop do
71
90
  all_child_temp_sinks = []
72
91
  @parent_pattern.children.each do |child_pattern|
73
- all_child_temp_sinks << child_pattern.filters[current_example_index].temp_sink
92
+ all_child_temp_sinks << child_pattern.filters[current_example_index].temp_sink if child_pattern.filters[current_example_index].temp_sink
74
93
  end
75
94
  result = all_child_temp_sinks.pop
76
95
  if all_child_temp_sinks.empty?
@@ -81,7 +100,7 @@ module Scrubyt
81
100
  end
82
101
  end
83
102
  xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(result, nil, false) :
84
- XPathUtils.generate_XPath(result, nil, true)
103
+ XPathUtils.generate_XPath(result, nil, true)
85
104
  if @parent_pattern.filters.size < current_example_index + 1
86
105
  @parent_pattern.filters << Scrubyt::BaseFilter.create(@parent_pattern)
87
106
  end
@@ -97,24 +116,30 @@ module Scrubyt
97
116
  current_example_index += 1
98
117
  end
99
118
  when EXAMPLE_TYPE_IMAGE
100
- #@temp_sink = XPathUtils.find_image(@parent_pattern.evaluation_context.root_pattern.filters[0].source[0], @example)\
101
119
  @temp_sink = XPathUtils.find_image(@parent_pattern.evaluation_context.extractor.get_hpricot_doc, @example)
102
- @xpath = XPathUtils.generate_XPath(@temp_sink, nil, false)
120
+ @xpath = XPathUtils.generate_XPath(@temp_sink, nil, true)
103
121
  when EXAMPLE_TYPE_COMPOUND
104
- @temp_sink = CompoundExampleLookup.find_node_from_compund_example(@parent_pattern.evaluation_context.root_pattern.filters[0].source[0],
122
+ @temp_sink = CompoundExampleLookup.find_node_from_compund_example(@parent_pattern.evaluation_context.extractor.get_hpricot_doc,
105
123
  @example,
106
124
  next_page_example)
107
125
  @xpath = @parent_pattern.generalize ? XPathUtils.generate_XPath(@temp_sink, nil, false) :
108
- XPathUtils.generate_XPath(@temp_sink, nil, true)
126
+ XPathUtils.generate_XPath(@temp_sink, nil, true)
109
127
  end
110
128
  end
111
129
 
112
130
  def generate_relative_XPath(parent_xpath)
131
+ parent_xpath = XPathUtils.to_full_XPath(@parent_pattern.evaluation_context.extractor.get_hpricot_doc,
132
+ parent_xpath,
133
+ @parent_pattern.parent.generalize) if parent_xpath =~ /(\[@.+=.+\])$/
113
134
  @xpath = XPathUtils.generate_relative_XPath_from_XPaths(parent_xpath, @xpath) if (@xpath =~ /^\/html/) #TODO: should not rely on <html> being the root node
114
135
  end
115
136
 
116
137
  def to_sexp
117
- [:str, @xpath]
138
+ if @example =~ /.+\[@.+\]$/
139
+ [:str, "#{@xpath}/@#{@example.scan(/\[@(.+?)\]/)[0][0]}"]
140
+ else
141
+ [:str, @xpath]
142
+ end
118
143
  end
119
144
 
120
145
  end #End of class TreeFilter
@@ -33,7 +33,7 @@ module Scrubyt
33
33
  # # write out the HTML subtree beginning at the matched element
34
34
  # PATTERN_TYPE_HTML_SUBTREE = :PATTERN_TYPE_HTML_SUBTREE
35
35
 
36
- VALID_PATTERN_TYPES = [:root, :tree, :attribute, :regexp, :detail_page, :download, :html_subtree]
36
+ VALID_PATTERN_TYPES = [:tree, :attribute, :regexp, :detail_page, :download, :html_subtree]
37
37
 
38
38
  #The pattern can be either a model pattern (in this case it is
39
39
  #written to the output) or a temp pattern (in this case it is skipped)
@@ -49,10 +49,11 @@ module Scrubyt
49
49
  VALID_OUTPUT_TYPES = [:model, :temp]
50
50
 
51
51
  #These options can be set upon wrapper creation
52
- VALID_OPTIONS = [:generalize, :type, :output_type, :write_text, :references, :limit, :default, :resolve] + Scrubyt::CompoundExample::DESCRIPTORS
52
+ PATTERN_OPTIONS = [:generalize, :type, :output_type, :references, :limit, :default, :resolve]
53
+ VALID_OPTIONS = PATTERN_OPTIONS + Scrubyt::CompoundExample::DESCRIPTORS + Scrubyt::ResultNode::OUTPUT_OPTIONS
53
54
 
54
55
  attr_accessor(:name, :options, :children, :constraints, :filters, :parent,
55
- :last_result, :result, :evaluation_context,
56
+ :last_result, :evaluation_context,
56
57
  :indices_to_extract, :referenced_extractor, :referenced_pattern,
57
58
  :source_file, :source_proc, :modifier_calls)
58
59
 
@@ -60,7 +61,7 @@ module Scrubyt
60
61
 
61
62
  option_reader(:type => :tree, :output_type => :model, :generalize => false,
62
63
  :write_text => lambda { @children.size == 0 }, :limit => nil,
63
- :default => nil, :resolve => :full)
64
+ :default => nil, :resolve => :full)
64
65
 
65
66
  def initialize(name, args=[], evaluation_context=nil, parent=nil, &block)
66
67
  #init attributes
@@ -71,7 +72,6 @@ module Scrubyt
71
72
  @children = []
72
73
  @filters = []
73
74
  @constraints = []
74
- @result = Result.new
75
75
  @modifier_calls = []
76
76
 
77
77
  #grab any examples that are defined
@@ -93,9 +93,12 @@ module Scrubyt
93
93
  end
94
94
  end
95
95
 
96
- #by default, generalize direct children of the root pattern, but only in the case if
96
+ #by default, generalize the root pattern, but only in the case if
97
97
  #@generalize was not set up explicitly
98
- @options[:generalize] = true if parent && parent.type == :root && @options[:generalize].nil?
98
+ if @options[:generalize].nil?
99
+ @options[:generalize] = true if parent.nil?
100
+ @options[:generalize] = false if filters[0].example =~ /.+\[[a-zA-Z].+\]$/
101
+ end
99
102
 
100
103
  #parse child patterns if available
101
104
  parse_child_patterns(&block) if ( !block.nil? && type != :detail_page )
@@ -104,7 +107,7 @@ module Scrubyt
104
107
  if type == :tree
105
108
  #generate xpaths and regexps
106
109
  @filters.each do |filter|
107
- filter.generate_XPath_for_example(false)
110
+ filter.generate_XPath_for_example(false) unless @name == 'next_page'
108
111
  filter.generate_regexp_for_example
109
112
  end
110
113
  #when the xpaths of this pattern have been created, its children can make their xpaths relative
@@ -154,9 +157,6 @@ module Scrubyt
154
157
  #an ancestor of a detail pattern , and store this in a hash if yes (to be able to
155
158
  #traverse the pattern structure on detail pages as well).
156
159
  def check_if_detail_page(block)
157
- #return if !@options[:references]
158
- #@options[:type] = :detail_page
159
- #@referenced_extractor = @options[:references]
160
160
  if @name =~ /.+_detail/
161
161
  @options[:type] = :detail_page
162
162
  @referenced_extractor = block
@@ -168,6 +168,10 @@ module Scrubyt
168
168
  @children.inject(false) { |is_parent_of_leaf, child| is_parent_of_leaf || child.children.empty? }
169
169
  end
170
170
 
171
+ def filter_count
172
+ @filters.size
173
+ end
174
+
171
175
  def parse_child_patterns(&block)
172
176
  context = Object.new
173
177
  context.instance_eval do
@@ -177,7 +181,8 @@ module Scrubyt
177
181
  def method_missing(method_name, *args, &block)
178
182
  if method_name.to_s[0..0] == '_'
179
183
  #add hash option
180
- key = :"#{method_name.to_s[1..-1]}"
184
+ key = method_name.to_s[1..-1].to_sym
185
+ check_option(key)
181
186
  args.each do |arg|
182
187
  current_value = @current.options[key]
183
188
  if current_value.nil?
@@ -216,8 +221,6 @@ module Scrubyt
216
221
  when 'select_indices'
217
222
  @result_indexer = Scrubyt::ResultIndexer.new(*args)
218
223
  return self
219
- when /^to_/
220
- return Scrubyt::ResultDumper.send(method_name.to_s, self)
221
224
  when /^ensure_/
222
225
  @constraints << Scrubyt::ConstraintAdder.send(method_name, *args)
223
226
  return self #To make chaining possible
@@ -228,80 +231,72 @@ module Scrubyt
228
231
  raise NoMethodError.new(method_name.to_s, method_name.to_s, args)
229
232
  end
230
233
 
231
- #Companion function to the previous one (Pattern::method_missing). It makes
232
- #inspecting results, like
233
- #
234
- # camera_data.item[1].item_name[0]
235
- #
236
- #possible. The method Pattern::method missing handles the 'item', 'item_name' etc.
237
- #parts, while the indexing ([1], [0]) is handled by this function.
238
- #If you would like to select a different document than the first one (which is
239
- #the default), you should use the form:
240
- #
241
- # camera_data[1].item[1].item_name[0]
242
- def [](index)
243
- if @name == 'root'
244
- @evaluation_context.document_index = index
245
- else
246
- @parent.last_result = @parent.last_result[@evaluation_context.document_index] if @parent.last_result.is_a? Array
247
- return nil if (@result.lookup(@parent.last_result)) == nil
248
- @last_result = @result.lookup(@parent.last_result)[index]
234
+ def evaluate(source, filter_indices)
235
+ if type == :detail_page # DIRTY!
236
+ return @filters[0].evaluate(source)
249
237
  end
250
- self
251
- end
252
238
 
253
- ##
254
- #If export is called on the root pattern, it exports the whole extractor wher it is
255
- #defined; See export.rb for further details on the parameters
256
- def export(arg1, output_file_name=nil, extractor_result_file_name=nil)
257
- # require 'scrubyt/output/export_old'; Scrubyt::ExportOld.export(arg1, self, output_file_name, extractor_result_file_name) ; return
258
- if File.exists? arg1
259
- old_export(arg1, output_file_name, extractor_result_file_name)
260
- else
261
- new_export(arg1, output_file_name, extractor_result_file_name)
239
+ #we apply all filters if filter_indices is nil
240
+ indices_to_evaluate = filter_indices.nil? ? 0...@filters.size : filter_indices
241
+ #stores the results of all filters
242
+ all_filter_results = []
243
+ #remembers which filters have retured a certain result
244
+ indices_mapping = {}
245
+ #evaluate filters and collect filter results
246
+ indices_to_evaluate.each do |filter_index|
247
+ filter = @filters[filter_index]
248
+ filter_results = filter.evaluate(source)
249
+ filter_results.each do |result|
250
+ #add result to list if not already there
251
+ all_filter_results << result if all_filter_results.index(result).nil?
252
+ #add the current filter's index to the mapping
253
+ (indices_mapping[result] ||= []) << filter_index
254
+ end
262
255
  end
263
- end
264
-
265
- def old_export(input_file, output_file_name=nil, extractor_result_file_name=nil)
266
- contents = open(input_file).read
267
- wrapper_name = contents.scan(/\s+(.+)\s+=.*Extractor\.define.*/)[0][0]
268
- Scrubyt::Export.export(self, wrapper_name, output_file_name, extractor_result_file_name)
269
- end
270
-
271
- def new_export(wrapper_name, output_file_name=nil, extractor_result_file_name=nil)
272
- Scrubyt::Export.export(self, wrapper_name, output_file_name, extractor_result_file_name)
273
- end
274
256
 
275
- ##
276
- #Evaluate the pattern. This means evaluating all the filters and adding
277
- #their extracted instances to the array of results of this pattern
278
- def evaluate(parent_filters)
279
- if type != :root #TODO: should be removed, but there is more refactoring of filter handling needed to do so
280
- all_filter_results = []
281
- @filters.each do |filter|
282
- filter_index = @filters.index(filter)
283
- filter_index = 0 if parent_filters.size <= filter_index
284
- filter.source = parent_filters[filter_index].sink
285
- filter.source.each do |source|
286
- results = filter.evaluate(source)
287
- next if results == nil
288
- #apply constraints
289
- if @constraints.size > 0
290
- results = results.select do |result|
291
- @constraints.inject(true) { |accepted, constraint| accepted && constraint.check(result) }
292
- end
293
- end
294
- #apply indexer
295
- results = @result_indexer.select_indices_to_extract(results) if !@result_indexer.nil?
296
- add_result(filter, source, results)
257
+ #apply constraints
258
+ if @constraints.size > 0
259
+ all_filter_results = all_filter_results.select do |result|
260
+ @constraints.inject(true) { |accepted, constraint| accepted && constraint.check(result) }
261
+ end
262
+ end
263
+ #apply indexer
264
+ all_filter_results = @result_indexer.select_indices_to_extract(all_filter_results) if !@result_indexer.nil?
265
+
266
+ #create result nodes and evaluate children
267
+ result_nodes = []
268
+ all_filter_results.each do |result|
269
+ #create result node
270
+ node = ResultNode.new(@name, result, @options)
271
+ node.generated_by_leaf = (@children.size == 0)
272
+ #evaluate children
273
+ @children.each do |child|
274
+ raise if self.filter_count != 1 && child.filter_count != self.filter_count
275
+ if self.filter_count == 1
276
+ #evaluate all child filters
277
+ node.push(*child.evaluate(result, nil))
278
+ else
279
+ #evaluate appropriate child filters
280
+ node.push(*child.evaluate(result, indices_mapping[result]))
297
281
  end
298
282
  end
283
+ #apply child constraints (ensure_presence_of_pattern)
284
+ required_child_names = @constraints.select {|c| c.type == Scrubyt::Constraint::CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN }.map {|c| c.target}
285
+ unless required_child_names.empty?
286
+ check = lambda { |node_to_check|
287
+ required_child_names.delete node_to_check.name
288
+ node_to_check.each { |child| check.call child }
289
+ }
290
+ check.call node
291
+ end
292
+ next unless required_child_names.empty?
293
+ #add the current result node to the list
294
+ result_nodes << node
299
295
  end
300
-
301
- #evaluate children
302
- @children.each { |child| child.evaluate(@filters) }
303
-
304
- #do postprocessing
296
+ if result_nodes.empty?
297
+ result_nodes << ResultNode.new(@name,@options[:default],@options) if @options[:default]
298
+ end
299
+ result_nodes
305
300
  end
306
301
 
307
302
  def to_sexp
@@ -316,7 +311,7 @@ module Scrubyt
316
311
  if type == :detail_page
317
312
  #add detail page extractor
318
313
  detail_root = @evaluation_context.extractor.get_detail_extractor(self)
319
- sexp = [:iter, sexp, nil, [:block, *detail_root.children.to_sexp_array ]]
314
+ sexp = [:iter, sexp, nil, [:block, detail_root.to_sexp]]
320
315
  else
321
316
  #add child block if the pattern has children
322
317
  sexp = [:iter, sexp, nil, [:block, *@children.to_sexp_array ]] if !@children.empty?
@@ -336,11 +331,15 @@ module Scrubyt
336
331
  #merge provided hash
337
332
  @options.merge!(hash)
338
333
  #check if valid
339
- hash.each { |key, value| raise "Unknown pattern option: #{key.to_s}" if VALID_OPTIONS.index(key.to_sym).nil? }
334
+ hash.each { |key, value| check_option(key.to_sym) }
340
335
  raise "Invalid pattern type: #{type.to_s}" if VALID_PATTERN_TYPES.index(type.to_sym).nil?
341
336
  raise "Invalid output type: #{output_type.to_s}" if VALID_OUTPUT_TYPES.index(output_type.to_sym).nil?
342
337
  end
343
338
 
339
+ def check_option(option)
340
+ raise "Unknown pattern option: #{option.to_s}" if VALID_OPTIONS.index(option).nil?
341
+ end
342
+
344
343
  def look_for_examples(args)
345
344
  if (args[0].is_a? String)
346
345
  examples = args.select {|e| e.is_a? String}
@@ -370,12 +369,5 @@ module Scrubyt
370
369
  examples
371
370
  end
372
371
 
373
- def add_result(filter, source, results)
374
- results.each do |res|
375
- filter.sink << res
376
- @result.add_result(source, res)
377
- end
378
- end
379
-
380
372
  end #end of class Pattern
381
373
  end #end of module Scrubyt