sutch-scrubyt 0.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/CHANGELOG +350 -0
  2. data/COPYING +340 -0
  3. data/README +121 -0
  4. data/Rakefile +101 -0
  5. data/lib/scrubyt.rb +45 -0
  6. data/lib/scrubyt/core/navigation/agents/firewatir.rb +253 -0
  7. data/lib/scrubyt/core/navigation/agents/mechanize.rb +289 -0
  8. data/lib/scrubyt/core/navigation/fetch_action.rb +54 -0
  9. data/lib/scrubyt/core/navigation/navigation_actions.rb +95 -0
  10. data/lib/scrubyt/core/scraping/compound_example.rb +30 -0
  11. data/lib/scrubyt/core/scraping/constraint.rb +169 -0
  12. data/lib/scrubyt/core/scraping/constraint_adder.rb +49 -0
  13. data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +14 -0
  14. data/lib/scrubyt/core/scraping/filters/base_filter.rb +112 -0
  15. data/lib/scrubyt/core/scraping/filters/constant_filter.rb +9 -0
  16. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +37 -0
  17. data/lib/scrubyt/core/scraping/filters/download_filter.rb +64 -0
  18. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +9 -0
  19. data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +13 -0
  20. data/lib/scrubyt/core/scraping/filters/script_filter.rb +11 -0
  21. data/lib/scrubyt/core/scraping/filters/text_filter.rb +34 -0
  22. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +138 -0
  23. data/lib/scrubyt/core/scraping/pattern.rb +359 -0
  24. data/lib/scrubyt/core/scraping/pre_filter_document.rb +14 -0
  25. data/lib/scrubyt/core/scraping/result_indexer.rb +90 -0
  26. data/lib/scrubyt/core/shared/extractor.rb +168 -0
  27. data/lib/scrubyt/logging.rb +154 -0
  28. data/lib/scrubyt/output/post_processor.rb +139 -0
  29. data/lib/scrubyt/output/result.rb +44 -0
  30. data/lib/scrubyt/output/result_dumper.rb +154 -0
  31. data/lib/scrubyt/output/result_node.rb +140 -0
  32. data/lib/scrubyt/output/scrubyt_result.rb +42 -0
  33. data/lib/scrubyt/utils/compound_example_lookup.rb +50 -0
  34. data/lib/scrubyt/utils/ruby_extensions.rb +85 -0
  35. data/lib/scrubyt/utils/shared_utils.rb +58 -0
  36. data/lib/scrubyt/utils/simple_example_lookup.rb +40 -0
  37. data/lib/scrubyt/utils/xpathutils.rb +202 -0
  38. data/test/blackbox_test.rb +60 -0
  39. data/test/blackbox_tests/basic/multi_root.rb +6 -0
  40. data/test/blackbox_tests/basic/simple.rb +5 -0
  41. data/test/blackbox_tests/detail_page/one_detail_page.rb +9 -0
  42. data/test/blackbox_tests/detail_page/two_detail_pages.rb +9 -0
  43. data/test/blackbox_tests/next_page/next_page_link.rb +7 -0
  44. data/test/blackbox_tests/next_page/page_list_links.rb +7 -0
  45. metadata +117 -0
@@ -0,0 +1,14 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Apply different functions on the input document</tt>
4
+ #Before the document is passed to Hpricot for parsing, we may need
5
+ #to do different stuff with it which are clumsy/not appropriate/impossible
6
+ #to do once the document is loaded.
7
+ class PreFilterDocument
8
+ #Replace <br/> tags with newlines
9
+ def self.br_to_newline(doc)
10
+ doc.gsub(/<br[ \/]*>/i, "\r\n")
11
+ doc = doc.tr("\240"," ")
12
+ end #end of function br_to_newline
13
+ end #end of class PreFilterDocument
14
+ end #end of module Scrubyt
@@ -0,0 +1,90 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Selecting results based on indices</tt>
4
+ #
5
+ #If the results is list-like (as opposed to a 'hard' result, like a _price_ or a _title_),
6
+ #probably with a variable count of results (like tags, authors etc.), you may need just
7
+ #specific elements - like the last one, every third one, or at specific indices.
8
+ #In this case you should use the select_indices syntax.
9
+ class ResultIndexer
10
+ attr_reader :indices_to_extract
11
+
12
+ def initialize(*args)
13
+ select_indices(*args)
14
+ end
15
+
16
+ ##
17
+ #Perform selection of the desires result instances, based on their indices
18
+ def select_indices_to_extract(ary)
19
+ return ary if @indices_to_extract == nil
20
+ to_keep = []
21
+ @indices_to_extract.each {|e|
22
+ if e.is_a? Symbol
23
+ case e
24
+ when :first
25
+ to_keep << 0
26
+ when :last
27
+ to_keep << ary.size-1
28
+ when :all_but_last
29
+ (0..ary.size-2).each {|i| to_keep << i}
30
+ when :all_but_first
31
+ (1..ary.size-1).each {|i| to_keep << i}
32
+ when :every_even
33
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 1)}
34
+ when :every_odd
35
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
36
+ when :every_second
37
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
38
+ when :every_third
39
+ (0..ary.size).each {|i| to_keep << i if (i % 3 == 0)}
40
+ when :every_fourth
41
+ (0..ary.size).each {|i| to_keep << i if (i % 4 == 0)}
42
+ end
43
+ end
44
+ }
45
+ @indices_to_extract.each {|i| to_keep << i if !i.is_a? Symbol}
46
+ to_keep.sort!
47
+ ary.reject! {|e| !to_keep.include? ary.index(e)}
48
+ ary
49
+ end
50
+
51
+ private
52
+ ##
53
+ #Do not return the whole result set, just specified indices - like
54
+ #first,last, every odd index, indices from [1..3] etc.
55
+ #
56
+ #This method can accept:
57
+ #- a range, like (2..3)
58
+ #- an array of indices, like [1,2,3]
59
+ #- specified set of keywords:
60
+ # - :first
61
+ # - :last
62
+ # - :every_even
63
+ # - :every_odd
64
+ # (there can be more of these keywords in one select_indices call)
65
+ def select_indices(*args)
66
+ indices_to_grab = args[0]
67
+ case indices_to_grab.class.to_s
68
+ when "Range"
69
+ @indices_to_extract = indices_to_grab.to_a
70
+ when "Array"
71
+ nested_arrays = []
72
+ indices_to_grab.each {|e|
73
+ if e.is_a? Array
74
+ nested_arrays << e
75
+ elsif e.is_a? Range
76
+ nested_arrays << e.to_a
77
+ end
78
+ }
79
+ @indices_to_extract = indices_to_grab
80
+ nested_arrays.each {|a| a.each {|e| @indices_to_extract << e if !@indices_to_extract.include? e }}
81
+ @indices_to_extract.reject! {|e| ((e.is_a? Range) || (e.is_a? Array)) }
82
+ when "Symbol"
83
+ #parse this when we already have the results
84
+ @indices_to_extract = [indices_to_grab]
85
+ else
86
+ puts "Invalid index specification"
87
+ end
88
+ end #end of function select_indices
89
+ end #end of class ResultIndexer
90
+ end #end of module Scrubyt
@@ -0,0 +1,168 @@
1
+ module Scrubyt
2
+ ##
3
+ #=<tt>Driving the whole extraction process</tt>
4
+ #
5
+ #Extractor is a performer class - it gets an extractor definition and carries
6
+ #out the actions and evaluates the wrappers sequentially.
7
+ #
8
+ #Originally also the navigation actions were here, but since the class got too
9
+ #big, they were factored out to an own class, NavigationAction.
10
+ class Extractor
11
+ include FetchAction
12
+
13
+ attr_accessor :result, :evaluating_extractor_definition, :mode, :root_patterns, :next_page_pattern#, :hpricot_doc, :current_doc_url
14
+
15
+ #The definition of the extractor is passed through this method
16
+ def self.define(mode=nil, &extractor_definition)
17
+ if mode.is_a?(Hash)
18
+ if mode[:agent]==:firefox
19
+ FetchAction.class_eval do
20
+ include Navigation::Firewatir
21
+ end
22
+ else
23
+ FetchAction.class_eval do
24
+ include Navigation::Mechanize
25
+ end
26
+ end
27
+ else
28
+ FetchAction.class_eval do
29
+ include Navigation::Mechanize
30
+ end
31
+ end
32
+ extractor = self.new(mode, extractor_definition)
33
+ extractor.result
34
+ end
35
+
36
+ def self.load(filename)
37
+ define(&eval(IO.read(filename)))
38
+ end
39
+
40
+ def initialize(mode, extractor_definition)
41
+ @mode = mode
42
+ @root_patterns = []
43
+ @next_page_pattern = nil
44
+ # @hpricot_doc = nil
45
+ # @hpricot_doc_url = nil
46
+ @evaluating_extractor_definition = false
47
+ @next_page_list = []
48
+ @processed_pages = []
49
+
50
+ backtrace = SharedUtils.get_backtrace
51
+ parts = backtrace[1].split(':')
52
+ source_file = parts[0]
53
+
54
+ Scrubyt.log :MODE, mode == :production ? 'Production' : 'Learning'
55
+
56
+ @evaluating_extractor_definition = true
57
+ context = Object.new
58
+ context.extend NavigationActions
59
+ context.instance_eval do
60
+ def extractor=(value)
61
+ @extractor = value
62
+ end
63
+
64
+ def next_page(*args)
65
+ @extractor.next_page_pattern = Scrubyt::Pattern.new('next_page', args, @extractor)
66
+ end
67
+
68
+ def method_missing(method_name, *args, &block)
69
+ root_pattern = Scrubyt::Pattern.new(method_name.to_s, args, @extractor, nil, &block)
70
+ @extractor.root_patterns << root_pattern
71
+ root_pattern
72
+ end
73
+ end
74
+ context.extractor = self
75
+ context.instance_eval(&extractor_definition)
76
+ @evaluating_extractor_definition = false
77
+
78
+ if @root_patterns.empty?
79
+ # TODO: this should be an exception
80
+ Scrubyt.log :ERROR, 'No extractor defined, exiting...'
81
+ exit
82
+ end
83
+
84
+ #Once all is set up, evaluate the extractor from the root pattern!
85
+ root_results = evaluate_extractor
86
+ FetchAction.close_firefox if @mode.is_a?(Hash) && @mode[:close]
87
+
88
+ @result = ScrubytResult.new('root')
89
+ @result.push(*root_results)
90
+ @result.root_patterns = @root_patterns
91
+ @result.source_file = source_file
92
+ @result.source_proc = extractor_definition
93
+
94
+ #Return the root pattern
95
+ Scrubyt.log :INFO, 'Extraction finished succesfully!'
96
+ end
97
+
98
+ def get_hpricot_doc
99
+ FetchAction.get_hpricot_doc
100
+ end
101
+
102
+ def get_current_doc_url
103
+ FetchAction.get_current_doc_url
104
+ end
105
+
106
+ def get_detail_pattern_relations
107
+ @detail_pattern_relations
108
+ end
109
+
110
+ def get_mode
111
+ @mode
112
+ end
113
+
114
+ def get_original_host_name
115
+ @original_host_name
116
+ end
117
+
118
+ def add_to_next_page_list(result_node)
119
+ if result_node.result.is_a? Hpricot::Elem
120
+ node = XPathUtils.find_nearest_node_with_attribute(result_node.result, 'href')
121
+ return if node == nil || node.attributes['href'] == nil
122
+ href = node.attributes['href'].gsub('&amp;') {'&'}
123
+ elsif result_node.result.is_a? String
124
+ href = result_node.result
125
+ end
126
+ url = href #TODO need absolute address here 1/4
127
+ @next_page_list << url
128
+ end
129
+
130
+ def evaluate_extractor
131
+ root_results = []
132
+ current_page_count = 1
133
+ catch :quit_next_page_loop do
134
+ loop do
135
+ url = get_current_doc_url #TODO need absolute address here 2/4
136
+ @processed_pages << url
137
+ @root_patterns.each do |root_pattern|
138
+ root_results.push(*root_pattern.evaluate(get_hpricot_doc, nil))
139
+ end
140
+
141
+ while @processed_pages.include? url #TODO need absolute address here 3/4
142
+ if !@next_page_pattern.nil?
143
+ throw :quit_next_page_loop if @next_page_pattern.options[:limit] == current_page_count
144
+ throw :quit_next_page_loop unless @next_page_pattern.filters[0].generate_XPath_for_example(true)
145
+ xpath = @next_page_pattern.filters[0].xpath
146
+ node = (get_hpricot_doc/xpath).map.last
147
+ node = XPathUtils.find_nearest_node_with_attribute(node, 'href')
148
+ throw :quit_next_page_loop if node == nil || node.attributes['href'] == nil
149
+ href = node.attributes['href'].gsub('&amp;') {'&'}
150
+ throw :quit_next_page_loop if href == nil
151
+ url = href #TODO need absolute address here 4/4
152
+ else
153
+ throw :quit_next_page_loop if @next_page_list.empty?
154
+ url = @next_page_list.pop
155
+ end
156
+ end
157
+
158
+ restore_host_name
159
+ FetchAction.fetch(url)
160
+
161
+ current_page_count += 1
162
+ end
163
+ end
164
+ root_results
165
+ end
166
+
167
+ end
168
+ end
@@ -0,0 +1,154 @@
1
+ #
2
+ # TODO: if multiline messages aren't needed, then remove them.
3
+ #
4
+ # TODO: switch to the conventional Ruby logger interface,
5
+ # or create an adapter to it. If the former, then decided what to
6
+ # do with the unit tests.
7
+ #
8
+
9
+ module Scrubyt
10
+ # Logging is disabled by default. It can be enabled as follows:
11
+ #
12
+ # Scrubyt.logger = Scrubyt::Logger.new # logs *all* messages to STDERR
13
+ #
14
+ def self.logger=(logger)
15
+ @logger = logger
16
+ end
17
+
18
+ # Simple logger implementation, based on Scrubyt's original logging style.
19
+ # Messages will be sent to STDERR. Logging can be limited to certain message
20
+ # levels by specifying them on initialization, e.g.
21
+ #
22
+ # Scrubyt::Logger.new(:ACTION, :ERROR) # will only log action/error messages
23
+ #
24
+ class Logger
25
+ class Message
26
+ def initialize(level, text)
27
+ @level, @text = level.to_s, text.to_s
28
+ end
29
+
30
+ def to_s
31
+ prefix + @text
32
+ end
33
+
34
+ protected
35
+
36
+ def prefix
37
+ @prefix ||= "[#{@level}] "
38
+ end
39
+ end
40
+
41
+ class MultiLineMessage < Message
42
+ def initialize(level, lines)
43
+ super level, lines.shift
44
+
45
+ @lines = lines
46
+ end
47
+
48
+ def to_s
49
+ [ super, indented_lines ] * "\n"
50
+ end
51
+
52
+ private
53
+
54
+ def indented_lines
55
+ @lines.inject([]) { |lines, line| lines << indented(line) } * "\n"
56
+ end
57
+
58
+ def indented(line)
59
+ ' ' * prefix.length + line
60
+ end
61
+ end
62
+
63
+ def initialize(*levels)
64
+ @levels = levels
65
+ end
66
+
67
+ def log(level, message)
68
+ return unless logging?(level)
69
+
70
+ message_class = message.is_a?(Array) ? MultiLineMessage : Message
71
+
72
+ output_stream.puts message_class.new(level, message)
73
+ end
74
+
75
+ def output_stream
76
+ @output_stream || STDERR
77
+ end
78
+
79
+ attr_writer :output_stream
80
+
81
+ private
82
+
83
+ def logging?(level)
84
+ @levels.empty? || @levels.include?(level)
85
+ end
86
+ end
87
+
88
+ def self.log(level, message)
89
+ return if logger.nil?
90
+
91
+ logger.log(level, message)
92
+ end
93
+
94
+ private
95
+
96
+ def self.logger
97
+ @logger
98
+ end
99
+ end
100
+
101
+
102
+ if __FILE__ == $0 then
103
+
104
+ require 'test/unit'
105
+
106
+ class ScrubytLoggingTestCase < Test::Unit::TestCase
107
+ class FauxOutputStream < Array
108
+ def puts(object)
109
+ self << object.to_s
110
+ end
111
+ end
112
+
113
+ def setup_logger_with_faux_output_stream!(*logger_args)
114
+ @stream = FauxOutputStream.new
115
+ logger = Scrubyt::Logger.new(*logger_args)
116
+ logger.output_stream = @stream
117
+ Scrubyt.logger = logger
118
+ end
119
+
120
+ def test_that_logging_works_with_nil_logger
121
+ Scrubyt.logger = nil
122
+ assert_nothing_raised { Scrubyt.log(:ERROR, 'message') }
123
+ end
124
+
125
+ def test_simple_messages_are_output_correctly
126
+ setup_logger_with_faux_output_stream!
127
+
128
+ Scrubyt.log :ACTION, 'i just did something'
129
+
130
+ assert_equal 1, @stream.size
131
+ assert_equal '[ACTION] i just did something', @stream.first
132
+ end
133
+
134
+ def test_that_multiline_messages_are_output_correctly
135
+ setup_logger_with_faux_output_stream!
136
+
137
+ Scrubyt.log :ERROR, ['something bad happened', 'dear oh dear']
138
+
139
+ assert_equal 1, @stream.size
140
+ assert_equal "[ERROR] something bad happened\n dear oh dear", @stream.first
141
+ end
142
+
143
+ def test_that_loggers_can_be_limited_to_specfied_message_levels
144
+ setup_logger_with_faux_output_stream! :ERROR
145
+
146
+ Scrubyt.log :ACTION, 'i just did something'
147
+ Scrubyt.log :ERROR, 'something bad happened'
148
+
149
+ assert_equal 1, @stream.size
150
+ assert_equal '[ERROR] something bad happened', @stream.first
151
+ end
152
+ end
153
+
154
+ end
@@ -0,0 +1,139 @@
1
+ module Scrubyt
2
+
3
+ ########################################## NOT USED ANY MORE ##########################################
4
+ require 'set'
5
+ ##
6
+ #=<tt>Post processing results after the extraction</tt>
7
+ #Some things can not be carried out during evaluation - for example
8
+ #the ensure_presence_of_pattern constraint (since the evaluation is top
9
+ #to bottom, at a given point we don't know yet whether the currently
10
+ #evaluated pattern will have a child pattern or not) or removing unneeded
11
+ #results caused by evaluating multiple filters.
12
+ #
13
+ #The sole purpose of this class is to execute these post-processing tasks.
14
+ class PostProcessor
15
+ ##
16
+ #This is just a convenience method do call all the postprocessing
17
+ #functionality and checks
18
+ def self.apply_post_processing(root_pattern)
19
+ ensure_presence_of_pattern_full(root_pattern)
20
+ remove_multiple_filter_duplicates(root_pattern) if root_pattern.children[0].filters.size > 1
21
+ report_if_no_results(root_pattern) if root_pattern.evaluation_context.extractor.get_mode != :production
22
+ end
23
+
24
+ ##
25
+ #Apply the ensure_presence_of_pattern constraint on
26
+ #the full extractor
27
+ def self.ensure_presence_of_pattern_full(pattern)
28
+ ensure_presence_of_pattern(pattern)
29
+ pattern.children.each {|child| ensure_presence_of_pattern_full(child)}
30
+ end
31
+
32
+ ##
33
+ #Remove unneeded results of a pattern (caused by evaluating multiple filters)
34
+ #See for example the B&N scenario - the book titles are extracted two times
35
+ #for every pattern (since both examples generate the same XPath for them)
36
+ #but since always only one of the results has a price, the other is discarded
37
+ def self.remove_multiple_filter_duplicates(pattern)
38
+ remove_multiple_filter_duplicates_intern(pattern) if pattern.parent_of_leaf
39
+ pattern.children.each {|child| remove_multiple_filter_duplicates(child)}
40
+ end
41
+
42
+ ##
43
+ #Issue an error report if the document did not extract anything.
44
+ #Probably this is because the structure of the page changed or
45
+ #because of some rather nasty bug - in any case, something wrong
46
+ #is going on, and we need to inform the user about this!
47
+ def self.report_if_no_results(root_pattern)
48
+ results_found = false
49
+ root_pattern.children.each {|child| return if (child.result.childmap.size > 0)}
50
+
51
+ Scrubyt.log :WARNING, [
52
+ "The extractor did not find any result instances. Most probably this is wrong.",
53
+ "Check your extractor and if you are sure it should work, report a bug!"
54
+ ]
55
+ end
56
+
57
+ private
58
+ def self.ensure_presence_of_pattern(pattern)
59
+ #holds the name of those child patterns which have to be present as children of the input parameter
60
+ epop_names = pattern.constraints.select {|c| c.type == Scrubyt::Constraint::CONSTRAINT_TYPE_ENSURE_PRESENCE_OF_PATTERN}.map {|c| c.target}
61
+ return if epop_names.empty?
62
+ #all_parent_values holds instances extracted by pattern
63
+ all_parent_values = []
64
+ pattern.result.childmap.each { |h| all_parent_values << h.values }
65
+ all_parent_values.flatten!
66
+ #indices of result instances (of pattern) we are going to remove
67
+ results_to_remove = Set.new
68
+ pattern.children.each do |child_pattern|
69
+ #all_child_values holds instances extracted by child_pattern
70
+ all_child_values = []
71
+ child_pattern.result.childmap.each { |h| all_child_values << h.values }
72
+ all_child_values.flatten!
73
+
74
+ #populate results_to_remove
75
+ i = 0
76
+ all_parent_values.each do |parent_value|
77
+ #Hey! Not just the direct children but all the ancestors
78
+ @found_ancestor = false
79
+ check_ancestors(parent_value, all_child_values)
80
+
81
+ results_to_remove << i if (!@found_ancestor && (epop_names.include? child_pattern.name))
82
+ i += 1
83
+ end
84
+ end
85
+ #based on results_to_remove, populate the array 'rejected' which holds the actual instances
86
+ #(and not indices, as in the case of results_to_remove!). In other words, we are mapping
87
+ #results_to_remove indices to their actual instances
88
+ rejected = []
89
+ i = -1
90
+ pattern.result.childmap.each do |h|
91
+ h.each { |k,v| rejected = v.reject {|e| i += 1; !results_to_remove.include? i } }
92
+ end
93
+
94
+ #Finally, do the actual delete!
95
+ pattern.result.childmap.each { |h| h.each { |k,v| rejected.each { |r| v.delete(r)} } }
96
+ end
97
+
98
+ def self.check_ancestors(parent_value, all_child_values)
99
+ parent_value.children.each { |child| @found_ancestor = true if all_child_values.include? child } if
100
+ parent_value.is_a? Hpricot::Elem
101
+ parent_value.children.each { |child| check_ancestors(child, all_child_values) if child.is_a? Hpricot::Elem } if parent_value.is_a? Hpricot::Elem
102
+ end
103
+
104
+ def self.remove_multiple_filter_duplicates_intern(pattern)
105
+ possible_duplicates = {}
106
+ longest_result = 0
107
+ pattern.result.childmap.each { |r|
108
+ r.each do |k,v|
109
+ v.each do |x|
110
+ all_child_results = []
111
+ pattern.children.each { |child|
112
+ temp_res = child.result.lookup(x)
113
+ all_child_results << temp_res if temp_res != nil
114
+ }
115
+ next if all_child_results.size <= 1
116
+ longest_result = all_child_results.map {|e| e.size}.max
117
+ all_child_results.each { |r| (r.size+1).upto(longest_result) { r << nil } }
118
+ possible_duplicates[x] = all_child_results.transpose
119
+ end
120
+ end
121
+ }
122
+ #Determine the 'real' duplicates
123
+ real_duplicates = {}
124
+ possible_duplicates.each { |k,v|
125
+ next if v.size == 1
126
+ v.each { |r| real_duplicates[k] = r }
127
+ }
128
+
129
+ #Finally, remove them!
130
+ pattern.children.each { |child|
131
+ child.result.childmap.each { |r|
132
+ r.each { |k,v|
133
+ real_duplicates[k].each {|e| v.delete e} if real_duplicates.keys.include? k
134
+ }
135
+ }
136
+ }
137
+ end #end of function
138
+ end #end of class PostProcessor
139
+ end #end of module Scrubyt