scrubyt 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. data/CHANGELOG +59 -12
  2. data/Rakefile +2 -2
  3. data/lib/scrubyt.rb +24 -6
  4. data/lib/scrubyt/core/navigation/fetch_action.rb +91 -56
  5. data/lib/scrubyt/core/navigation/navigation_actions.rb +32 -22
  6. data/lib/scrubyt/core/scraping/constraint.rb +53 -57
  7. data/lib/scrubyt/core/scraping/constraint_adder.rb +15 -38
  8. data/lib/scrubyt/core/scraping/filters/attribute_filter.rb +17 -0
  9. data/lib/scrubyt/core/scraping/filters/base_filter.rb +111 -0
  10. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +14 -0
  11. data/lib/scrubyt/core/scraping/filters/download_filter.rb +49 -0
  12. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -0
  13. data/lib/scrubyt/core/scraping/filters/regexp_filter.rb +17 -0
  14. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +121 -0
  15. data/lib/scrubyt/core/scraping/pattern.rb +292 -157
  16. data/lib/scrubyt/core/scraping/result_indexer.rb +51 -47
  17. data/lib/scrubyt/core/shared/evaluation_context.rb +3 -42
  18. data/lib/scrubyt/core/shared/extractor.rb +122 -163
  19. data/lib/scrubyt/output/export.rb +59 -174
  20. data/lib/scrubyt/output/post_processor.rb +4 -3
  21. data/lib/scrubyt/output/result.rb +8 -9
  22. data/lib/scrubyt/output/result_dumper.rb +81 -42
  23. data/lib/scrubyt/utils/compound_example_lookup.rb +11 -11
  24. data/lib/scrubyt/utils/ruby_extensions.rb +113 -0
  25. data/lib/scrubyt/utils/shared_utils.rb +39 -26
  26. data/lib/scrubyt/utils/simple_example_lookup.rb +6 -6
  27. data/lib/scrubyt/utils/xpathutils.rb +31 -30
  28. data/test/unittests/constraint_test.rb +11 -7
  29. data/test/unittests/extractor_test.rb +6 -6
  30. data/test/unittests/filter_test.rb +66 -66
  31. metadata +22 -15
  32. data/lib/scrubyt/core/scraping/filter.rb +0 -201
@@ -1,18 +1,18 @@
1
1
  module Scrubyt
2
- ##
3
- #=<tt>Selecting results based on indices</tt>
4
- #
5
- #If the results is list-like (as opposed to a 'hard' result, like a _price_ or a _title_),
6
- #probably with a variable count of results (like tags, authors etc.), you may need just
7
- #specific elements - like the last one, every third one, or at specific indices.
8
- #In this case you should use the select_indices syntax.
2
+ ##
3
+ #=<tt>Selecting results based on indices</tt>
4
+ #
5
+ #If the results is list-like (as opposed to a 'hard' result, like a _price_ or a _title_),
6
+ #probably with a variable count of results (like tags, authors etc.), you may need just
7
+ #specific elements - like the last one, every third one, or at specific indices.
8
+ #In this case you should use the select_indices syntax.
9
9
  class ResultIndexer
10
10
  attr_reader :indices_to_extract
11
-
11
+
12
12
  def initialize(*args)
13
13
  select_indices(*args)
14
14
  end
15
-
15
+
16
16
  ##
17
17
  #Perform selection of the desires result instances, based on their indices
18
18
  def select_indices_to_extract(ary)
@@ -21,24 +21,24 @@ module Scrubyt
21
21
  @indices_to_extract.each {|e|
22
22
  if e.is_a? Symbol
23
23
  case e
24
- when :first
25
- to_keep << 0
26
- when :last
27
- to_keep << ary.size-1
28
- when :all_but_last
29
- (0..ary.size-2).each {|i| to_keep << i}
30
- when :all_but_first
31
- (1..ary.size-1).each {|i| to_keep << i}
32
- when :every_even
33
- (0..ary.size).each {|i| to_keep << i if (i % 2 == 1)}
34
- when :every_odd
35
- (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
36
- when :every_second
37
- (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
38
- when :every_third
39
- (0..ary.size).each {|i| to_keep << i if (i % 3 == 0)}
40
- when :every_fourth
41
- (0..ary.size).each {|i| to_keep << i if (i % 4 == 0)}
24
+ when :first
25
+ to_keep << 0
26
+ when :last
27
+ to_keep << ary.size-1
28
+ when :all_but_last
29
+ (0..ary.size-2).each {|i| to_keep << i}
30
+ when :all_but_first
31
+ (1..ary.size-1).each {|i| to_keep << i}
32
+ when :every_even
33
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 1)}
34
+ when :every_odd
35
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
36
+ when :every_second
37
+ (0..ary.size).each {|i| to_keep << i if (i % 2 == 0)}
38
+ when :every_third
39
+ (0..ary.size).each {|i| to_keep << i if (i % 3 == 0)}
40
+ when :every_fourth
41
+ (0..ary.size).each {|i| to_keep << i if (i % 4 == 0)}
42
42
  end
43
43
  end
44
44
  }
@@ -48,7 +48,11 @@ module Scrubyt
48
48
  ary
49
49
  end
50
50
 
51
- private
51
+ # def to_sexp
52
+ # [:array, *@indices_to_extract.collect { |index| [:lit, index] }]
53
+ # end
54
+
55
+ private
52
56
  ##
53
57
  #Do not return the whole result set, just specified indices - like
54
58
  #first,last, every odd index, indices from [1..3] etc.
@@ -65,25 +69,25 @@ private
65
69
  def select_indices(*args)
66
70
  indices_to_grab = args[0]
67
71
  case indices_to_grab.class.to_s
68
- when "Range"
69
- @indices_to_extract = indices_to_grab.to_a
70
- when "Array"
71
- nested_arrays = []
72
- indices_to_grab.each {|e|
73
- if e.is_a? Array
74
- nested_arrays << e
75
- elsif e.is_a? Range
76
- nested_arrays << e.to_a
77
- end
78
- }
79
- @indices_to_extract = indices_to_grab
80
- nested_arrays.each {|a| a.each {|e| @indices_to_extract << e if !@indices_to_extract.include? e }}
81
- @indices_to_extract.reject! {|e| ((e.is_a? Range) || (e.is_a? Array)) }
82
- when "Symbol"
83
- #parse this when we already have the results
84
- @indices_to_extract = [indices_to_grab]
85
- else
86
- puts "Invalid index specification"
72
+ when "Range"
73
+ @indices_to_extract = indices_to_grab.to_a
74
+ when "Array"
75
+ nested_arrays = []
76
+ indices_to_grab.each {|e|
77
+ if e.is_a? Array
78
+ nested_arrays << e
79
+ elsif e.is_a? Range
80
+ nested_arrays << e.to_a
81
+ end
82
+ }
83
+ @indices_to_extract = indices_to_grab
84
+ nested_arrays.each {|a| a.each {|e| @indices_to_extract << e if !@indices_to_extract.include? e }}
85
+ @indices_to_extract.reject! {|e| ((e.is_a? Range) || (e.is_a? Array)) }
86
+ when "Symbol"
87
+ #parse this when we already have the results
88
+ @indices_to_extract = [indices_to_grab]
89
+ else
90
+ puts "Invalid index specification"
87
91
  end
88
92
  end #end of function select_indices
89
93
  end #end of class ResultIndexer
@@ -13,15 +13,14 @@ module Scrubyt
13
13
  #two classes need to communicate frequently as well as share different information
14
14
  #and this is accomplished through EvaluationContext.
15
15
  class EvaluationContext
16
- attr_accessor :root_pattern, :document_index, :block_count,
17
- :extractor, :uri_builder
16
+ attr_accessor :root_pattern, :document_index, :extractor, :uri_builder, :evaluating_extractor_definition
18
17
 
19
18
  def initialize
20
19
  @root_pattern = nil
21
20
  @next_page = nil
22
- @block_count = 0
23
21
  @document_index = 0
24
22
  @extractor = nil
23
+ @evaluating_extractor_definition = false
25
24
  end
26
25
 
27
26
  ##
@@ -33,6 +32,7 @@ module Scrubyt
33
32
  uri_builder.generate_next_uri
34
33
  return nil if temp_document == nil
35
34
  clear_sources_and_sinks(@root_pattern)
35
+ FetchAction.restore_host_name
36
36
  @extractor.fetch(temp_document)
37
37
  attach_current_document
38
38
  end
@@ -49,15 +49,6 @@ module Scrubyt
49
49
  @root_pattern.result.add_result(@root_pattern.filters[0].source,
50
50
  @root_pattern.filters[0].sink)
51
51
  end
52
-
53
- ##
54
- #Based on the given examples, calculate the XPaths for the tree patterns
55
- def setup_examples
56
- get_root_pattern(nil)
57
- mark_leaf_parents(@root_pattern)
58
- generate_examples(@root_pattern)
59
- check_for_multipe_examples(@root_pattern)
60
- end
61
52
 
62
53
  ##
63
54
  #After crawling to the new page, the sources and sinks need to be cleaned
@@ -85,35 +76,5 @@ module Scrubyt
85
76
  end
86
77
  @uri_builder = URIBuilder.new(pattern,args)
87
78
  end
88
-
89
- def get_root_pattern(pattern)
90
- if @root_pattern == nil
91
- while (pattern.parent != nil)
92
- get_root_pattern(pattern.parent)
93
- end
94
- @root_pattern = pattern
95
- end
96
- end
97
-
98
- private
99
- def mark_leaf_parents(pattern)
100
- pattern.children.each { |child|
101
- pattern.parent_of_leaf = true if child.children.size == 0
102
- }
103
- pattern.children.each { |child| mark_leaf_parents(child) }
104
- end
105
-
106
- ##
107
- #Check the tree and turn all the XPaths for the examples (but the topmost one)
108
- #into relative ones
109
- def check_for_multipe_examples(pattern)
110
- pattern.children.each {|child_pattern| check_for_multipe_examples(child_pattern) }
111
- pattern.filters.each { |filter| filter.setup_relative_XPaths } if pattern.type == Pattern::PATTERN_TYPE_TREE
112
- end
113
-
114
- def generate_examples(pattern)
115
- pattern.children.each {|child_pattern| generate_examples(child_pattern) }
116
- pattern.filters.each { |filter| filter.generate_XPath_for_example(false) } if pattern.type == Pattern::PATTERN_TYPE_TREE
117
- end #end of function generate_examples
118
79
  end #end of class EvaluationContext
119
80
  end #end of module Scrubyt
@@ -1,20 +1,19 @@
1
- require 'open-uri'
2
- require 'rubygems'
3
- require 'mechanize'
4
- require 'hpricot'
5
-
6
1
  module Scrubyt
7
- ##
8
- #=<tt>Driving the whole extraction process</tt>
9
- #
10
- #Extractor is a performer class - it gets an extractor definition and carries
11
- #out the actions and evaluates the wrappers sequentially.
12
- #
13
- #Originally also the navigation actions were here, but since the class got too
14
- #big, they were factored out to an own class, NavigationAction.
15
- class Extractor
2
+ ##
3
+ #=<tt>Driving the whole extraction process</tt>
4
+ #
5
+ #Extractor is a performer class - it gets an extractor definition and carries
6
+ #out the actions and evaluates the wrappers sequentially.
7
+ #
8
+ #Originally also the navigation actions were here, but since the class got too
9
+ #big, they were factored out to an own class, NavigationAction.
10
+ class Extractor
16
11
  #The definition of the extractor is passed through this method
17
12
  def self.define(mode=nil, &extractor_definition)
13
+ backtrace = SharedUtils.get_backtrace
14
+ parts = backtrace[1].split(':')
15
+ source_file = parts[0]
16
+
18
17
  @@mode = mode
19
18
  #We are keeping the relations between the detail patterns and their root patterns
20
19
  @@detail_extractor_to_pattern_name = {}
@@ -25,17 +24,19 @@ module Scrubyt
25
24
  puts "[MODE] #{mode_name}"
26
25
  NavigationActions.new
27
26
  @@evaluation_context = EvaluationContext.new
28
- #Hack up an artificial root pattern (i.e. do not return the pattern which
27
+ #Hack up an artificial root pattern (i.e. do not return the pattern which
29
28
  #is the root one in the user's definition, but rather the real (invisible)
30
29
  #root pattern
31
- evaluated_extractor = (class_eval(&extractor_definition))
32
- if evaluated_extractor == nil
30
+ @@evaluation_context.evaluating_extractor_definition = true
31
+ class_eval(&extractor_definition)
32
+ @@evaluation_context.evaluating_extractor_definition = false
33
+ root_pattern = @@evaluation_context.root_pattern
34
+ if root_pattern.nil?
33
35
  puts "No extractor defined, exiting..."
34
36
  exit
35
37
  end
36
- root_pattern = evaluated_extractor.parent
37
- #Recursively match data based on examples
38
- @@evaluation_context.setup_examples
38
+ root_pattern.source_file = source_file
39
+ root_pattern.source_proc = extractor_definition
39
40
  #Once all is set up, evaluate the extractor from the root pattern!
40
41
  evaluate_extractor(root_pattern)
41
42
  #Apply all postprocess steps
@@ -45,168 +46,126 @@ module Scrubyt
45
46
  root_pattern
46
47
  end
47
48
 
48
- #Evaluate a subexttractor (i.e. an extractor on a detail page).
49
- #The url passed to this function is automatically loaded.
50
- #The definition of the subextractor is passed as a block
51
- #
52
- #!!!! THIS CODE IS A MESS, IT needs to be refactored ASAP....
53
- def self.evaluate_subextractor(url, parent_pattern)
54
- if @@detail_pattern_relations.keys.include? @@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]
55
- detail_root = @@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]].parent
56
- detail_root.result = Result.new
57
- detail_root.last_result = nil
58
- @@original_evaluation_context.push @@evaluation_context
59
- @@evaluation_context = EvaluationContext.new
60
- @@evaluation_context.clear_sources_and_sinks detail_root
61
- FetchAction.restore_host_name
62
- fetch url
63
- @@evaluation_context.extractor = self
64
- @@evaluation_context.root_pattern = detail_root
65
- @@evaluation_context.attach_current_document
66
- evaluate_extractor detail_root
67
- @@evaluation_context = @@original_evaluation_context.pop
68
- detail_root.to_xml
69
- else
70
- @@original_evaluation_context ||= []
71
- FetchAction.restore_host_name
72
- @@original_evaluation_context.push @@evaluation_context
73
- @@evaluation_context = EvaluationContext.new
74
- fetch url
75
- evaluated_extractor = (class_eval(&parent_pattern.referenced_extractor))
76
- root_pattern = evaluated_extractor.parent
77
- @@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]] = root_pattern.children[0]
78
- @@evaluation_context.setup_examples
79
- evaluate_extractor(root_pattern)
80
- #Apply all postprocess steps
81
- PostProcessor.apply_post_processing(root_pattern)
82
- #Return the root pattern
83
- #puts "Extracted detail page"
84
- @@evaluation_context = @@original_evaluation_context.pop
85
- root_pattern.to_xml
49
+ #Evaluate a subexttractor (i.e. an extractor on a detail page).
50
+ #The url passed to this function is automatically loaded.
51
+ #The definition of the subextractor is passed as a block
52
+ #
53
+ #!!!! THIS CODE IS A MESS, IT needs to be refactored ASAP....
54
+ def self.evaluate_subextractor(url, parent_pattern, resolve)
55
+ if @@detail_pattern_relations.keys.include? @@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]
56
+ detail_root = @@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]].parent
57
+ detail_root.result = Result.new
58
+ detail_root.last_result = nil
59
+ FetchAction.store_page
60
+ @@original_evaluation_context.push @@evaluation_context
61
+ @@host_stack.push FetchAction.get_host_name
62
+ @@evaluation_context = EvaluationContext.new
63
+ @@evaluation_context.clear_sources_and_sinks detail_root
64
+ FetchAction.restore_host_name
65
+ fetch url, :resolve => resolve
66
+ @@evaluation_context.extractor = self
67
+ @@evaluation_context.root_pattern = detail_root
68
+ @@evaluation_context.attach_current_document
69
+ evaluate_extractor detail_root
70
+ @@evaluation_context = @@original_evaluation_context.pop
71
+ FetchAction.restore_page
72
+ FetchAction.store_host_name(@@host_stack.pop)
73
+ detail_root.to_xml
74
+ else
75
+ @@original_evaluation_context ||= []
76
+ @@host_stack ||= []
77
+ FetchAction.store_page
78
+ @@original_evaluation_context.push @@evaluation_context
79
+ @@host_stack.push FetchAction.get_host_name
80
+ @@evaluation_context = EvaluationContext.new
81
+ FetchAction.restore_host_name
82
+ fetch url, :resolve => resolve
83
+ evaluated_extractor = (class_eval(&parent_pattern.referenced_extractor))
84
+ root_pattern = evaluated_extractor.parent
85
+ @@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]] = root_pattern.children[0]
86
+ evaluate_extractor(root_pattern)
87
+ #Apply all postprocess steps
88
+ PostProcessor.apply_post_processing(root_pattern)
89
+ @@evaluation_context = @@original_evaluation_context.pop
90
+ FetchAction.restore_page
91
+ FetchAction.store_host_name(@@host_stack.pop)
92
+ root_pattern.to_xml
93
+ end
86
94
  end
87
- end
88
95
 
89
- #build the current wrapper
90
- def self.method_missing(method_name, *args, &block)
91
- if NavigationActions::KEYWORDS.include? method_name.to_s
92
- NavigationActions.send(method_name, *args)
93
- return
94
- end
95
- pattern = Scrubyt::Pattern.new(method_name.to_s, *args)
96
- check_if_shortcut_pattern(pattern)
97
- check_if_detail_page(pattern, args)
98
- pattern.evaluation_context = @@evaluation_context
99
- if @parent == nil
96
+ #build the current wrapper
97
+ def self.method_missing(method_name, *args, &block)
98
+ if NavigationActions::KEYWORDS.include? method_name.to_s
99
+ NavigationActions.send(method_name, *args)
100
+ return
101
+ end
100
102
  if method_name.to_s == 'next_page'
103
+ pattern = Scrubyt::Pattern.new(method_name.to_s, args, @@evaluation_context)
104
+ pattern.evaluation_context = @@evaluation_context
105
+
101
106
  @@evaluation_context.setup_uri_builder(pattern, args)
102
107
  @@next_patterns[@@last_root_pattern] = @@evaluation_context.uri_builder
103
- p @@last_root_pattern.children[0].name
104
- return @@last_pattern
105
108
  else
109
+ raise "Only one root pattern allowed" if !@@evaluation_context.root_pattern.nil?
106
110
  #Create a root pattern
107
- root_pattern = Scrubyt::Pattern.new('root', :type => :root)
111
+ root_pattern = Scrubyt::Pattern.new('root', [:type => :root], @@evaluation_context)
108
112
  @@last_root_pattern = root_pattern
109
- root_pattern.evaluation_context = @@evaluation_context
110
113
  @@evaluation_context.root_pattern = root_pattern
111
114
  @@evaluation_context.extractor = self
112
- #add the currently active document to the root pattern
115
+ #add the currently active document to the root pattern
113
116
  @@evaluation_context.attach_current_document
114
- @@evaluation_context.root_pattern.add_child_pattern(pattern)
115
- @@evaluation_context.block_count = 0
117
+ pattern = Scrubyt::Pattern.new(method_name.to_s, args, @@evaluation_context, root_pattern, &block)
118
+ root_pattern.children << pattern
119
+ pattern
116
120
  end
117
- else
118
- @parent.add_child_pattern(pattern) if @parent != nil
119
121
  end
120
- if block_given?
121
- @@evaluation_context.block_count = @@evaluation_context.block_count + 1
122
- @stack ||=[]
123
- @parent = pattern
124
- @stack.push @parent
125
- class_eval(&block)
126
- @stack.pop
127
- @parent = @stack.last
128
- end
129
- @@last_pattern = pattern
130
- end
131
-
132
- #Shortcut patterns, as their name says, are a shortcut for creating patterns
133
- #from predefined rules; for example:
134
- #
135
- # detail_url
136
- #
137
- # is equivalent to
138
- #
139
- # detail_url 'href', type => :attribute
140
- #
141
- #i.e. the system figures out on it's own that because of the postfix, the
142
- #example should be looked up (but it should never override the user input!)
143
- #another example (will be available later):
144
- #
145
- # every_img
146
- #
147
- # is equivivalent to
148
- #
149
- # every_img '//img'
150
- #
151
- def self.check_if_shortcut_pattern(pattern)
152
- case pattern.name
153
- when /.+_url/
154
- #make sure that we are not overriding the user's settings
155
- if !pattern.examples
156
- pattern.filters[0].example = 'href'
157
- pattern.type = Scrubyt::Pattern::PATTERN_TYPE_ATTRIBUTE
158
- end
122
+
123
+ def self.add_detail_extractor_to_pattern_name(referenced_extractor, pattern)
124
+ @@detail_extractor_to_pattern_name[referenced_extractor] ||= [] << pattern
125
+ end
126
+
127
+ def self.get_detail_extractor(parent_pattern)
128
+ @@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]].parent
129
+ end
130
+
131
+ def self.get_hpricot_doc
132
+ NavigationActions.get_hpricot_doc
133
+ end
134
+
135
+ def self.get_current_doc_url
136
+ NavigationActions.get_current_doc_url
137
+ end
138
+
139
+ def self.get_detail_pattern_relations
140
+ @@detail_pattern_relations
141
+ end
142
+
143
+ def self.get_host_name
144
+ NavigationActions.get_host_name
145
+ end
146
+
147
+ def self.get_mode
148
+ @@mode
149
+ end
150
+
151
+ def self.get_original_host_name
152
+ @@original_host_name
159
153
  end
160
- end
161
-
162
- #Check whether the currently created pattern is a detail pattern (i.e. it refrences
163
- #a subextractor). Also check if the currently created pattern is
164
- #an ancestor of a detail pattern , and store this in a hash if yes (to be able to
165
- #traverse the pattern structure on detail pages as well).
166
- def self.check_if_detail_page(pattern, args)
167
- return if args.size == 0
168
- return if !args[0].is_a? Hash
169
- return if !args[0][:references]
170
- referenced_extractor = args[0][:references]
171
- pattern.type = Scrubyt::Pattern::PATTERN_TYPE_DETAIL
172
- pattern.referenced_extractor = referenced_extractor
173
- @@detail_extractor_to_pattern_name[referenced_extractor] ||= []
174
- @@detail_extractor_to_pattern_name[referenced_extractor] = @@detail_extractor_to_pattern_name[referenced_extractor] << pattern
175
- end
176
-
177
- def self.get_hpricot_doc
178
- NavigationActions.get_hpricot_doc
179
- end
180
-
181
- def self.get_current_doc_url
182
- NavigationActions.get_current_doc_url
183
- end
184
-
185
- def self.get_detail_pattern_relations
186
- @@detail_pattern_relations
187
- end
188
-
189
- def self.get_mode
190
- @@mode
191
- end
192
-
193
- private
154
+
155
+ private
156
+
194
157
  def self.evaluate_extractor(root_pattern)
195
158
  if @@next_patterns[root_pattern]
196
159
  current_page_count = 1
197
160
  loop do
198
- really_evaluate_extractor(root_pattern)
199
- break if (@@next_patterns[root_pattern].limit == current_page_count || @@evaluation_context.crawl_to_new_page(root_pattern, @@next_patterns[root_pattern]) == nil)
161
+ root_pattern.evaluate(nil)
162
+ break if (@@next_patterns[root_pattern].limit == current_page_count || !@@evaluation_context.crawl_to_new_page(root_pattern, @@next_patterns[root_pattern]))
200
163
  current_page_count += 1 if @@next_patterns[root_pattern].limit != nil
201
164
  end
202
- else
203
- really_evaluate_extractor(root_pattern)
165
+ else
166
+ root_pattern.evaluate(nil)
204
167
  end
205
- end
206
-
207
- def self.really_evaluate_extractor(pattern)
208
- pattern.evaluate
209
- pattern.children.each { |child| really_evaluate_extractor child }
210
- end #end of method evaluate_wrapper
168
+ end
169
+
211
170
  end #end of class Extractor
212
- end #end of module Scrubyt
171
+ end #end of module Scrubyt