andyverprauskus-scrubyt 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/CHANGELOG +355 -0
  2. data/COPYING +340 -0
  3. data/README.rdoc +121 -0
  4. data/Rakefile +101 -0
  5. data/lib/scrubyt.rb +53 -0
  6. data/lib/scrubyt/core/navigation/agents/firewatir.rb +318 -0
  7. data/lib/scrubyt/core/navigation/agents/mechanize.rb +312 -0
  8. data/lib/scrubyt/core/navigation/fetch_action.rb +63 -0
  9. data/lib/scrubyt/core/navigation/navigation_actions.rb +107 -0
  10. data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
  11. data/lib/scrubyt/core/scraping/constraint.rb +169 -0
  12. data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
  13. data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
  14. data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
  15. data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
  16. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
  17. data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
  18. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
  19. data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
  20. data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
  21. data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
  22. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
  23. data/lib/scrubyt/core/scraping/pattern.rb +359 -0
  24. data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
  25. data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
  26. data/lib/scrubyt/core/shared/extractor.rb +183 -0
  27. data/lib/scrubyt/logging.rb +154 -0
  28. data/lib/scrubyt/output/post_processor.rb +139 -0
  29. data/lib/scrubyt/output/result.rb +44 -0
  30. data/lib/scrubyt/output/result_dumper.rb +154 -0
  31. data/lib/scrubyt/output/result_node.rb +145 -0
  32. data/lib/scrubyt/output/scrubyt_result.rb +42 -0
  33. data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
  34. data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
  35. data/lib/scrubyt/utils/shared_utils.rb +58 -0
  36. data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
  37. data/lib/scrubyt/utils/xpathutils.rb +202 -0
  38. data/test/blackbox_test.rb +60 -0
  39. data/test/blackbox_tests/basic/multi_root.rb +6 -0
  40. data/test/blackbox_tests/basic/simple.rb +5 -0
  41. data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
  42. data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
  43. data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
  44. data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
  45. metadata +120 -0
@@ -0,0 +1,14 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Apply different functions on the input document</tt>
4
+ #Before the document is passed to Hpricot for parsing, we may need
5
+ #to do different stuff with it which are clumsy/not appropriate/impossible
6
+ #to do once the document is loaded.
7
+ class PreFilterDocument
8
+ #Replace <br/> tags with newlines
9
+ def self.br_to_newline(doc)
10
+ doc.gsub(/<br[ \/]*>/i, "\r\n")
11
+ doc = doc.tr("\240"," ")
12
+ end #end of function br_to_newline
13
+ end #end of class PreFilterDocument
14
+ end #end of module Scrubyt
@@ -0,0 +1,90 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Selecting results based on indices</tt>
4
+ #
5
+ #If the results is list-like (as opposed to a 'hard' result, like a _price_ or a _title_),
6
+ #probably with a variable count of results (like tags, authors etc.), you may need just
7
+ #specific elements - like the last one, every third one, or at specific indices.
8
+ #In this case you should use the select_indices syntax.
9
+ class ResultIndexer
10
+ attr_reader :indices_to_extract
11
+
12
+ def initialize(*args)
13
+ select_indices(*args)
14
+ end
15
+
16
+ ##
17
+ #Perform selection of the desires result instances, based on their indices
18
+ def select_indices_to_extract(ary)
19
+ return ary if @indices_to_extract == nil
20
+ to_keep = []
21
+ @indices_to_extract.each {|e|
22
+ if e.is_a? Symbol
23
+ case e
24
+ when :first
25
+ to_keep << 0
26
+ when :last
27
+ to_keep << ary.size-1
28
+ when :all_but_last
29
+ (0..ary.size-2).each {|i| to_keep << i}
30
+ when :all_but_first
31
+ (1..ary.size-1).each {|i| to_keep << i}
32
+ when :every_even
33
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 1)}
34
+ when :every_odd
35
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
36
+ when :every_second
37
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
38
+ when :every_third
39
+ (0..ary.size).each {|i| to_keep << i if (i % 3 == 0)}
40
+ when :every_fourth
41
+ (0..ary.size).each {|i| to_keep << i if (i % 4 == 0)}
42
+ end
43
+ end
44
+ }
45
+ @indices_to_extract.each {|i| to_keep << i if !i.is_a? Symbol}
46
+ to_keep.sort!
47
+ ary.reject! {|e| !to_keep.include? ary.index(e)}
48
+ ary
49
+ end
50
+
51
+ private
52
+ ##
53
+ #Do not return the whole result set, just specified indices - like
54
+ #first,last, every odd index, indices from [1..3] etc.
55
+ #
56
+ #This method can accept:
57
+ #- a range, like (2..3)
58
+ #- an array of indices, like [1,2,3]
59
+ #- specified set of keywords:
60
+ # - :first
61
+ # - :last
62
+ # - :every_even
63
+ # - :every_odd
64
+ # (there can be more of these keywords in one select_indices call)
65
+ def select_indices(*args)
66
+ indices_to_grab = args[0]
67
+ case indices_to_grab.class.to_s
68
+ when "Range"
69
+ @indices_to_extract = indices_to_grab.to_a
70
+ when "Array"
71
+ nested_arrays = []
72
+ indices_to_grab.each {|e|
73
+ if e.is_a? Array
74
+ nested_arrays << e
75
+ elsif e.is_a? Range
76
+ nested_arrays << e.to_a
77
+ end
78
+ }
79
+ @indices_to_extract = indices_to_grab
80
+ nested_arrays.each {|a| a.each {|e| @indices_to_extract << e if !@indices_to_extract.include? e }}
81
+ @indices_to_extract.reject! {|e| ((e.is_a? Range) || (e.is_a? Array)) }
82
+ when "Symbol"
83
+ #parse this when we already have the results
84
+ @indices_to_extract = [indices_to_grab]
85
+ else
86
+ puts "Invalid index specification"
87
+ end
88
+ end #end of function select_indices
89
+ end #end of class ResultIndexer
90
+ end #end of module Scrubyt
@@ -0,0 +1,183 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Driving the whole extraction process</tt>
4
+ #
5
+ #Extractor is a performer class - it gets an extractor definition and carries
6
+ #out the actions and evaluates the wrappers sequentially.
7
+ #
8
+ #Originally also the navigation actions were here, but since the class got too
9
+ #big, they were factored out to an own class, NavigationAction.
10
+ class Extractor
11
+ include FetchAction
12
+
13
+ attr_accessor :result, :evaluating_extractor_definition, :mode, :root_patterns, :next_page_pattern#, :hpricot_doc, :current_doc_url
14
+
15
+ #The definition of the extractor is passed through this method
16
+ def self.define(mode=nil, &extractor_definition)
17
+ if mode.is_a?(Hash)
18
+ if mode[:agent]==:firefox
19
+ FetchAction.class_eval do
20
+ include Navigation::Firewatir
21
+ end
22
+ else
23
+ FetchAction.class_eval do
24
+ include Navigation::Mechanize
25
+ end
26
+ end
27
+ else
28
+ FetchAction.class_eval do
29
+ include Navigation::Mechanize
30
+ end
31
+ end
32
+ extractor = self.new(mode, extractor_definition)
33
+ extractor.result
34
+ end
35
+
36
+ def self.load(filename)
37
+ define(&eval(IO.read(filename)))
38
+ end
39
+
40
+ def initialize(mode, extractor_definition)
41
+ @mode = mode
42
+ @root_patterns = []
43
+ @next_page_pattern = nil
44
+ # @hpricot_doc = nil
45
+ # @hpricot_doc_url = nil
46
+ @evaluating_extractor_definition = false
47
+ @next_page_list = []
48
+ @processed_pages = []
49
+
50
+ backtrace = SharedUtils.get_backtrace
51
+ parts = backtrace[1].split(':')
52
+ source_file = parts[0]
53
+
54
+ Scrubyt.log :MODE, mode == :production ? 'Production' : 'Learning'
55
+
56
+ @evaluating_extractor_definition = true
57
+ context = Object.new
58
+ context.extend NavigationActions
59
+ context.instance_eval do
60
+ def extractor=(value)
61
+ @extractor = value
62
+ end
63
+
64
+ def next_page(*args)
65
+ @extractor.next_page_pattern = Scrubyt::Pattern.new('next_page', args, @extractor)
66
+ end
67
+
68
+ def method_missing(method_name, *args, &block)
69
+ root_pattern = Scrubyt::Pattern.new(method_name.to_s, args, @extractor, nil, &block)
70
+ @extractor.root_patterns << root_pattern
71
+ root_pattern
72
+ end
73
+ end
74
+ FetchAction.extractor = self
75
+ context.extractor = self
76
+ context.instance_eval(&extractor_definition)
77
+ @evaluating_extractor_definition = false
78
+
79
+ if @root_patterns.empty?
80
+ # TODO: this should be an exception
81
+ Scrubyt.log :ERROR, 'No extractor defined, exiting...'
82
+ exit
83
+ end
84
+
85
+ #Once all is set up, evaluate the extractor from the root pattern!
86
+ root_results = evaluate_extractor
87
+ FetchAction.close_firefox if @mode.is_a?(Hash) && @mode[:close]
88
+
89
+
90
+ @result = ScrubytResult.new('root')
91
+ @result.push(*@root_results)
92
+ @result.root_patterns = @root_patterns
93
+ @result.source_file = source_file
94
+ @result.source_proc = extractor_definition
95
+
96
+ #Return the root pattern
97
+ Scrubyt.log :INFO, 'Extraction finished succesfully!'
98
+ end
99
+
100
+ def get_hpricot_doc
101
+ FetchAction.get_hpricot_doc
102
+ end
103
+
104
+ def get_current_doc_url
105
+ FetchAction.get_current_doc_url
106
+ end
107
+
108
+ def get_detail_pattern_relations
109
+ @detail_pattern_relations
110
+ end
111
+
112
+ def get_mode
113
+ @mode
114
+ end
115
+
116
+ def get_original_host_name
117
+ @original_host_name
118
+ end
119
+
120
+ def add_to_next_page_list(result_node)
121
+ if result_node.result.is_a? Hpricot::Elem
122
+ node = XPathUtils.find_nearest_node_with_attribute(result_node.result, 'href')
123
+ return if node == nil || node.attributes['href'] == nil
124
+ href = node.attributes['href'].gsub('&amp;') {'&'}
125
+ elsif result_node.result.is_a? String
126
+ href = result_node.result
127
+ end
128
+ url = href #TODO need absolute address here 1/4
129
+ @next_page_list << url
130
+ end
131
+
132
+ def evaluate_extractor
133
+ @root_results ||= []
134
+ current_page_count = 1
135
+ xpath = nil
136
+ catch :quit_next_page_loop do
137
+ loop do
138
+ url = get_current_doc_url #TODO need absolute address here 2/4
139
+ @processed_pages << url
140
+ @root_patterns.each do |root_pattern|
141
+ @root_results.push(*root_pattern.evaluate(get_hpricot_doc, nil))
142
+ end
143
+
144
+ node = nil
145
+ while @processed_pages.include? url #TODO need absolute address here 3/4
146
+ if !@next_page_pattern.nil?
147
+ if @next_page_pattern.options[:limit] == current_page_count
148
+ throw :quit_next_page_loop
149
+ end
150
+ unless @next_page_pattern.filters[0].generate_XPath_for_example(true)
151
+ throw :quit_next_page_loop
152
+ end
153
+ xpath = @next_page_pattern.filters[0].xpath
154
+ node = (get_hpricot_doc/xpath).last
155
+ node = XPathUtils.find_nearest_node_with_attribute(node, 'href')
156
+ if node == nil || node.attributes['href'] == nil
157
+ throw :quit_next_page_loop
158
+ end
159
+ href = node.attributes['href'].gsub('&amp;') {'&'}
160
+ throw :quit_next_page_loop if href == nil
161
+ url = href #TODO need absolute address here 4/4
162
+ else
163
+ throw :quit_next_page_loop if @next_page_list.empty?
164
+ url = @next_page_list.pop
165
+ end
166
+ end
167
+
168
+ restore_host_name
169
+ if url == "#"
170
+ FetchAction.click_by_xpath_without_evaluate(xpath)
171
+ else
172
+ FetchAction.fetch(url)
173
+ end
174
+
175
+ current_page_count += 1
176
+ end
177
+ end
178
+ @root_patterns = []
179
+ @root_results
180
+ end
181
+
182
+ end
183
+ end
@@ -0,0 +1,154 @@
1
+ #
2
+ # TODO: if multiline messages aren't needed, then remove them.
3
+ #
4
+ # TODO: switch to the conventional Ruby logger interface,
5
+ # or create an adapter to it. If the former, then decided what to
6
+ # do with the unit tests.
7
+ #
8
+
9
+ module Scrubyt
10
+ # Logging is disabled by default. It can be enabled as follows:
11
+ #
12
+ # Scrubyt.logger = Scrubyt::Logger.new # logs *all* messages to STDERR
13
+ #
14
+ def self.logger=(logger)
15
+ @logger = logger
16
+ end
17
+
18
+ # Simple logger implementation, based on Scrubyt's original logging style.
19
+ # Messages will be sent to STDERR. Logging can be limited to certain message
20
+ # levels by specifying them on initialization, e.g.
21
+ #
22
+ # Scrubyt::Logger.new(:ACTION, :ERROR) # will only log action/error messages
23
+ #
24
+ class Logger
25
+ class Message
26
+ def initialize(level, text)
27
+ @level, @text = level.to_s, text.to_s
28
+ end
29
+
30
+ def to_s
31
+ prefix + @text
32
+ end
33
+
34
+ protected
35
+
36
+ def prefix
37
+ @prefix ||= "[#{@level}] "
38
+ end
39
+ end
40
+
41
+ class MultiLineMessage < Message
42
+ def initialize(level, lines)
43
+ super level, lines.shift
44
+
45
+ @lines = lines
46
+ end
47
+
48
+ def to_s
49
+ [ super, indented_lines ] * "\n"
50
+ end
51
+
52
+ private
53
+
54
+ def indented_lines
55
+ @lines.inject([]) { |lines, line| lines << indented(line) } * "\n"
56
+ end
57
+
58
+ def indented(line)
59
+ ' ' * prefix.length + line
60
+ end
61
+ end
62
+
63
+ def initialize(*levels)
64
+ @levels = levels
65
+ end
66
+
67
+ def log(level, message)
68
+ return unless logging?(level)
69
+
70
+ message_class = message.is_a?(Array) ? MultiLineMessage : Message
71
+
72
+ output_stream.puts message_class.new(level, message)
73
+ end
74
+
75
+ def output_stream
76
+ @output_stream || STDERR
77
+ end
78
+
79
+ attr_writer :output_stream
80
+
81
+ private
82
+
83
+ def logging?(level)
84
+ @levels.empty? || @levels.include?(level)
85
+ end
86
+ end
87
+
88
+ def self.log(level, message)
89
+ return if logger.nil?
90
+
91
+ logger.log(level, message)
92
+ end
93
+
94
+ private
95
+
96
+ def self.logger
97
+ @logger
98
+ end
99
+ end
100
+
101
+
102
+ if __FILE__ == $0 then
103
+
104
+ require 'test/unit'
105
+
106
+ class ScrubytLoggingTestCase < Test::Unit::TestCase
107
+ class FauxOutputStream < Array
108
+ def puts(object)
109
+ self << object.to_s
110
+ end
111
+ end
112
+
113
+ def setup_logger_with_faux_output_stream!(*logger_args)
114
+ @stream = FauxOutputStream.new
115
+ logger = Scrubyt::Logger.new(*logger_args)
116
+ logger.output_stream = @stream
117
+ Scrubyt.logger = logger
118
+ end
119
+
120
+ def test_that_logging_works_with_nil_logger
121
+ Scrubyt.logger = nil
122
+ assert_nothing_raised { Scrubyt.log(:ERROR, 'message') }
123
+ end
124
+
125
+ def test_simple_messages_are_output_correctly
126
+ setup_logger_with_faux_output_stream!
127
+
128
+ Scrubyt.log :ACTION, 'i just did something'
129
+
130
+ assert_equal 1, @stream.size
131
+ assert_equal '[ACTION] i just did something', @stream.first
132
+ end
133
+
134
+ def test_that_multiline_messages_are_output_correctly
135
+ setup_logger_with_faux_output_stream!
136
+
137
+ Scrubyt.log :ERROR, ['something bad happened', 'dear oh dear']
138
+
139
+ assert_equal 1, @stream.size
140
+ assert_equal "[ERROR] something bad happened\n dear oh dear", @stream.first
141
+ end
142
+
143
+ def test_that_loggers_can_be_limited_to_specfied_message_levels
144
+ setup_logger_with_faux_output_stream! :ERROR
145
+
146
+ Scrubyt.log :ACTION, 'i just did something'
147
+ Scrubyt.log :ERROR, 'something bad happened'
148
+
149
+ assert_equal 1, @stream.size
150
+ assert_equal '[ERROR] something bad happened', @stream.first
151
+ end
152
+ end
153
+
154
+ end
@@ -0,0 +1,139 @@
1
+ module Scrubyt
2
+
3
+ ########################################## NOT USED ANY MORE ##########################################
4
+ require 'set'
5
+ ##
6
+ #=<tt>Post processing results after the extraction</tt>
7
+ #Some things can not be carried out during evaluation - for example
8
+ #the ensure_presence_of_pattern constraint (since the evaluation is top
9
+ #to bottom, at a given point we don't know yet whether the currently
10
+ #evaluated pattern will have a child pattern or not) or removing unneeded
11
+ #results caused by evaluating multiple filters.
12
+ #
13
+ #The sole purpose of this class is to execute these post-processing tasks.
14
+ class PostProcessor
15
+ ##
16
+ #This is just a convenience method do call all the postprocessing
17
+ #functionality and checks
18
+ def self.apply_post_processing(root_pattern)
19
+ ensure_presence_of_pattern_full(root_pattern)
20
+ remove_multiple_filter_duplicates(root_pattern) if root_pattern.children[0].filters.size > 1
21
+ report_if_no_results(root_pattern) if root_pattern.evaluation_context.extractor.get_mode != :production
22
+ end
23
+
24
+ ##
25
+ #Apply the ensure_presence_of_pattern constraint on
26
+ #the full extractor
27
+ def self.ensure_presence_of_pattern_full(pattern)
28
+ ensure_presence_of_pattern(pattern)
29
+ pattern.children.each {|child| ensure_presence_of_pattern_full(child)}
30
+ end
31
+
32
+ ##
33
+ #Remove unneeded results of a pattern (caused by evaluating multiple filters)
34
+ #See for example the B&N scenario - the book titles are extracted two times
35
+ #for every pattern (since both examples generate the same XPath for them)
36
+ #but since always only one of the results has a price, the other is discarded
37
+ def self.remove_multiple_filter_duplicates(pattern)
38
+ remove_multiple_filter_duplicates_intern(pattern) if pattern.parent_of_leaf
39
+ pattern.children.each {|child| remove_multiple_filter_duplicates(child)}
40
+ end
41
+
42
+ ##
43
+ #Issue an error report if the document did not extract anything.
44
+ #Probably this is because the structure of the page changed or
45
+ #because of some rather nasty bug - in any case, something wrong
46
+ #is going on, and we need to inform the user about this!
47
+ def self.report_if_no_results(root_pattern)
48
+ results_found = false
49
+ root_pattern.children.each {|child| return if (child.result.childmap.size > 0)}
50
+
51
+ Scrubyt.log :WARNING, [
52
+ "The extractor did not find any result instances. Most probably this is wrong.",
53
+ "Check your extractor and if you are sure it should work, report a bug!"
54
+ ]
55
+ end
56
+
57
+ private
58
+ def self.ensure_presence_of_pattern(pattern)
59
+ #holds the name of those child patterns which have to be present as children of the input parameter
60
+ epop_names = pattern.constraints.select {|c| c.type == Scrubyt::Constraint::CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN}.map {|c| c.target}
61
+ return if epop_names.empty?
62
+ #all_parent_values holds instances extracted by pattern
63
+ all_parent_values = []
64
+ pattern.result.childmap.each { |h| all_parent_values << h.values }
65
+ all_parent_values.flatten!
66
+ #indices of result instances (of pattern) we are going to remove
67
+ results_to_remove = Set.new
68
+ pattern.children.each do |child_pattern|
69
+ #all_child_values holds instances extracted by child_pattern
70
+ all_child_values = []
71
+ child_pattern.result.childmap.each { |h| all_child_values << h.values }
72
+ all_child_values.flatten!
73
+
74
+ #populate results_to_remove
75
+ i = 0
76
+ all_parent_values.each do |parent_value|
77
+ #Hey! Not just the direct children but all the ancestors
78
+ @found_ancestor = false
79
+ check_ancestors(parent_value, all_child_values)
80
+
81
+ results_to_remove << i if (!@found_ancestor && (epop_names.include? child_pattern.name))
82
+ i += 1
83
+ end
84
+ end
85
+ #based on results_to_remove, populate the array 'rejected' which holds the actual instances
86
+ #(and not indices, as in the case of results_to_remove!). In other words, we are mapping
87
+ #results_to_remove indices to their actual instances
88
+ rejected = []
89
+ i = -1
90
+ pattern.result.childmap.each do |h|
91
+ h.each { |k,v| rejected = v.reject {|e| i += 1; !results_to_remove.include? i } }
92
+ end
93
+
94
+ #Finally, do the actual delete!
95
+ pattern.result.childmap.each { |h| h.each { |k,v| rejected.each { |r| v.delete(r)} } }
96
+ end
97
+
98
+ def self.check_ancestors(parent_value, all_child_values)
99
+ parent_value.children.each { |child| @found_ancestor = true if all_child_values.include? child } if
100
+ parent_value.is_a? Hpricot::Elem
101
+ parent_value.children.each { |child| check_ancestors(child, all_child_values) if child.is_a? Hpricot::Elem } if parent_value.is_a? Hpricot::Elem
102
+ end
103
+
104
+ def self.remove_multiple_filter_duplicates_intern(pattern)
105
+ possible_duplicates = {}
106
+ longest_result = 0
107
+ pattern.result.childmap.each { |r|
108
+ r.each do |k,v|
109
+ v.each do |x|
110
+ all_child_results = []
111
+ pattern.children.each { |child|
112
+ temp_res = child.result.lookup(x)
113
+ all_child_results << temp_res if temp_res != nil
114
+ }
115
+ next if all_child_results.size <= 1
116
+ longest_result = all_child_results.map {|e| e.size}.max
117
+ all_child_results.each { |r| (r.size+1).upto(longest_result) { r << nil } }
118
+ possible_duplicates[x] = all_child_results.transpose
119
+ end
120
+ end
121
+ }
122
+ #Determine the 'real' duplicates
123
+ real_duplicates = {}
124
+ possible_duplicates.each { |k,v|
125
+ next if v.size == 1
126
+ v.each { |r| real_duplicates[k] = r }
127
+ }
128
+
129
+ #Finally, remove them!
130
+ pattern.children.each { |child|
131
+ child.result.childmap.each { |r|
132
+ r.each { |k,v|
133
+ real_duplicates[k].each {|e| v.delete e} if real_duplicates.keys.include? k
134
+ }
135
+ }
136
+ }
137
+ end #end of function
138
+ end #end of class PostProcessor
139
+ end #end of module Scrubyt