ariel 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/README +49 -83
  2. data/bin/ariel +29 -20
  3. data/examples/google_calculator/structure.rb +2 -2
  4. data/examples/google_calculator/structure.yaml +13 -15
  5. data/examples/raa/labeled/highline.html +5 -4
  6. data/examples/raa/labeled/mongrel.html +9 -8
  7. data/examples/raa/structure.rb +4 -2
  8. data/examples/raa/structure.yaml +94 -78
  9. data/lib/ariel.rb +71 -33
  10. data/lib/ariel/{candidate_selector.rb → candidate_refiner.rb} +39 -38
  11. data/lib/ariel/label_utils.rb +46 -18
  12. data/lib/ariel/labeled_document_loader.rb +77 -0
  13. data/lib/ariel/learner.rb +60 -38
  14. data/lib/ariel/log.rb +67 -0
  15. data/lib/ariel/node.rb +52 -0
  16. data/lib/ariel/node/extracted.rb +90 -0
  17. data/lib/ariel/node/structure.rb +91 -0
  18. data/lib/ariel/rule.rb +114 -32
  19. data/lib/ariel/rule_set.rb +34 -15
  20. data/lib/ariel/token.rb +9 -3
  21. data/lib/ariel/token_stream.rb +32 -17
  22. data/lib/ariel/wildcards.rb +19 -15
  23. data/test/fixtures.rb +45 -3
  24. data/test/specs/candidate_refiner_spec.rb +48 -0
  25. data/test/specs/label_utils_spec.rb +97 -0
  26. data/test/specs/learner_spec.rb +39 -0
  27. data/test/specs/node_extracted_spec.rb +90 -0
  28. data/test/specs/node_spec.rb +76 -0
  29. data/test/specs/node_structure_spec.rb +74 -0
  30. data/test/specs/rule_set_spec.rb +85 -0
  31. data/test/specs/rule_spec.rb +110 -0
  32. data/test/specs/token_stream_spec.rb +100 -7
  33. metadata +21 -28
  34. data/lib/ariel/example_document_loader.rb +0 -59
  35. data/lib/ariel/extracted_node.rb +0 -20
  36. data/lib/ariel/node_like.rb +0 -26
  37. data/lib/ariel/structure_node.rb +0 -75
  38. data/test/ariel_test_case.rb +0 -15
  39. data/test/test_candidate_selector.rb +0 -58
  40. data/test/test_example_document_loader.rb +0 -7
  41. data/test/test_label_utils.rb +0 -15
  42. data/test/test_learner.rb +0 -38
  43. data/test/test_rule.rb +0 -38
  44. data/test/test_structure_node.rb +0 -81
  45. data/test/test_token.rb +0 -16
  46. data/test/test_token_stream.rb +0 -82
  47. data/test/test_wildcards.rb +0 -18
@@ -1,34 +1,53 @@
1
1
  module Ariel
2
2
 
3
- # A RuleSet acts as a container for a StructureNode's start and end rules.
3
+ # A RuleSet acts as a container for a Node::Structure's start and end rules.
4
4
  # These are stored as an ordered array and are applied in turn until there is
5
5
  # a successful match. A RuleSet takes responsibility for applying start and
6
- # end rules to extract an ExtractedNode.
6
+ # end rules to extract an Node::Extracted.
7
7
  class RuleSet
8
8
  def initialize(start_rules, end_rules)
9
9
  @start_rules=start_rules
10
10
  @end_rules=end_rules
11
11
  end
12
12
 
13
+ # Returns an array of the extracted tokenstreams. An empty array is returned
14
+ # if the rules cannot be applied.
15
+ # TODO: Think more about the way list iteration rules are applied
13
16
  def apply_to(tokenstream)
14
- start_idx=nil
15
- end_idx=nil
17
+ start_idxs=nil
18
+ end_idxs=nil
16
19
  @start_rules.each do |rule|
17
- start_idx=rule.apply_to tokenstream
18
- break if start_idx
20
+ start_idxs=rule.apply_to tokenstream
21
+ break if !start_idxs.empty?
19
22
  end
20
23
  @end_rules.each do |rule|
21
- end_idx=rule.apply_to tokenstream
22
- break if end_idx
24
+ end_idxs=rule.apply_to tokenstream
25
+ end_idxs.reverse! #So the start_idxs and end_idxs match up
26
+ break if !end_idxs.empty?
23
27
  end
24
- if start_idx && end_idx
25
- debug "RuleSet matched with start_idx=#{start_idx} and end_idx=#{end_idx}"
26
- return nil if end_idx < start_idx
27
- return tokenstream.slice_by_token_index(start_idx, end_idx)
28
- else
29
- debug "No valid match was found"
30
- return nil
28
+ result=[]
29
+ unless start_idxs.empty? && end_idxs.empty?
30
+ # Following expression deals with the case where the first start rule
31
+ # matches after the first end rule, indicating that all tokens up to the
32
+ # end rule match should be a list item
33
+ if start_idxs.first > end_idxs.first
34
+ start_idxs.insert(0, 0)
35
+ end
36
+ if end_idxs.last < start_idxs.last
37
+ end_idxs << (tokenstream.size - 1)
38
+ end
39
+ Log.debug "RuleSet matched with start_idxs=#{start_idxs.inspect} and end_idxs=#{end_idxs.inspect}"
40
+ start_idxs.zip(end_idxs) do |start_idx, end_idx|
41
+ if start_idx && end_idx
42
+ next if start_idx > end_idx
43
+ result << tokenstream.slice_by_token_index(start_idx, end_idx)
44
+ yield result.last if block_given?
45
+ else
46
+ break
47
+ end
48
+ end
31
49
  end
50
+ return result
32
51
  end
33
52
  end
34
53
  end
@@ -36,9 +36,10 @@ module Ariel
36
36
  @start_loc <=> t.start_loc
37
37
  end
38
38
 
39
- # Accepts either a string or symbol representing a wildcard in
40
- # Wildcards#list. Returns true if the whole Token is consumed by the wildcard or the
41
- # string is equal to Token#text, and false if the match fails. Raises an
39
+ # Accepts either a string a symbol representing a wildcard in
40
+ # Wildcards#list or an an arbitrary regex. Returns true if the
41
+ # whole Token is consumed by the wildcard or the string is equal
42
+ # to Token#text, and false if the match fails. Raises an
42
43
  # error if the passed symbol is not a member of Wildcards#list.
43
44
  def matches?(landmark)
44
45
  if landmark.kind_of? Symbol or landmark.kind_of? Regexp
@@ -64,5 +65,10 @@ module Ariel
64
65
  def matching_wildcards
65
66
  return Wildcards.matching(self.text)
66
67
  end
68
+
69
+ # Redefined for caching purposes. This proved to be too slow.
70
+ # def hash
71
+ # [@text, @start_loc, @end_loc, @label_tag].hash
72
+ # end
67
73
  end
68
74
  end
@@ -16,19 +16,21 @@ module Ariel
16
16
  class TokenStream
17
17
  include Enumerable
18
18
  attr_accessor :tokens, :cur_pos, :label_index, :original_text
19
-
20
- def initialize()
21
- @tokens=[]
22
- @cur_pos=0
23
- @original_text = ""
24
- @token_regexen = [
19
+
20
+ TOKEN_REGEXEN = [
25
21
  Wildcards.list[:html_tag], # Match html tags that don't have attributes
26
22
  /\d+/, # Match any numbers, probably good to make a split
27
23
  /\b\w+\b/, # Pick up words, will split at punctuation
28
24
  /\S/ # Grab any characters left over that aren't whitespace
29
25
  ]
30
- @label_tag_regexen = [LabelUtils.any_label_regex]
26
+ LABEL_TAG_REGEXEN = [LabelUtils.any_label_regex]
27
+
28
+ def initialize()
29
+ @tokens=[]
30
+ @cur_pos=0
31
+ @original_text = ""
31
32
  @reversed=false
33
+ @contains_label_tags=false
32
34
  end
33
35
 
34
36
  # The tokenizer operates on a string by splitting it at every point it
@@ -37,7 +39,7 @@ module Ariel
37
39
  # offsets. The same is then done with the next regular expression on each of
38
40
  # these split strings, and new tokens are created with the correct offset in
39
41
  # the original text. Any characters left unmatched by any of the regular
40
- # expressions in @token_regexen are discarded. This approach allows a
42
+ # expressions in TokenStream::TOKEN_REGEXEN are discarded. This approach allows a
41
43
  # hierarchy of regular expressions to work simply and easily. A simple
42
44
  # regular expression to match html tags might operate first, and then later
43
45
  # expressions that pick up runs of word characters can operate on what's
@@ -45,16 +47,25 @@ module Ariel
45
47
  # tokenizer will first remove and discard any occurences of label_tags (as
46
48
  # defined by the Regex set in LabelUtils) before matching and adding tokens.
47
49
  # Any label_tag tokens will be marked as such upon creation.
48
- def tokenize(input, contains_labels=false)
50
+ def tokenize(input, contains_label_tags=false)
49
51
  string_array=[[input, 0]]
50
52
  @original_text = input
51
- @original_text_contains_labels=contains_labels
52
- @label_tag_regexen.each {|regex| split_string_array_by_regex(string_array, regex, false)} if contains_labels
53
- @token_regexen.each {|regex| split_string_array_by_regex(string_array, regex)}
53
+ @contains_label_tags=contains_label_tags
54
+ LABEL_TAG_REGEXEN.each {|regex| split_string_array_by_regex(string_array, regex, false)} if contains_label_tags
55
+ TOKEN_REGEXEN.each {|regex| split_string_array_by_regex(string_array, regex)}
54
56
  @tokens.sort!
55
57
  @tokens.size
56
58
  end
57
59
 
60
+ # Note, token.cache_hash!=token.reverse.reverse.cache_hash.
61
+ def cache_hash
62
+ [@tokens, @reversed].hash
63
+ end
64
+
65
+ def contains_label_tags?
66
+ @contains_label_tags
67
+ end
68
+
58
69
  # Goes through all stored Token instances, removing them if
59
70
  # Token#is_label_tag? Called after a labeled document has been extracted to
60
71
  # a tree ready for the rule learning process.
@@ -100,7 +111,7 @@ module Ariel
100
111
  raise ArgumentError, "Given string position does not match the start of any token"
101
112
  else
102
113
  @label_index = token_pos
103
- debug "Token ##{label_index} - \"#{@tokens[label_index].text}\" labeled."
114
+ Log.debug "Token ##{label_index} - \"#{@tokens[label_index].text}\" labeled."
104
115
  return @label_index
105
116
  end
106
117
  end
@@ -111,14 +122,14 @@ module Ariel
111
122
  # examples). See also TokenStream#raw_text
112
123
  def text(l_index=0, r_index=-1)
113
124
  out=raw_text(l_index, r_index)
114
- if @original_text_contains_labels
125
+ if contains_label_tags?
115
126
  LabelUtils.clean_string(out)
116
127
  else
117
128
  out
118
129
  end
119
130
  end
120
131
 
121
- # Returns all text represented by the instance's stored tokens it will not
132
+ # Returns all text represented by the instance's stored tokens. It will not
122
133
  # strip label tags even if the stream is marked to contain them. However,
123
134
  # you should not expect to get the raw_text once any label_tags have been
124
135
  # filtered (TokenStream#remove_label_tags).
@@ -141,7 +152,7 @@ module Ariel
141
152
  end
142
153
  end
143
154
 
144
- # Return to the beginning of the TokenStream.
155
+ # Return to the beginning of the TokenStream. Returns self.
145
156
  def rewind
146
157
  @cur_pos=0
147
158
  self
@@ -166,7 +177,6 @@ module Ariel
166
177
  if label_index
167
178
  @label_index = reverse_pos(@label_index)
168
179
  end
169
- @cur_pos = reverse_pos(@cur_pos)
170
180
  @reversed=!@reversed
171
181
  return self
172
182
  end
@@ -176,6 +186,11 @@ module Ariel
176
186
  def reversed?
177
187
  @reversed
178
188
  end
189
+
190
+ # Returns the number of tokens in the TokenStream
191
+ def size
192
+ @tokens.size
193
+ end
179
194
 
180
195
  # Takes a list of Strings and Symbols as its arguments representing text to be matched in
181
196
  # individual tokens and Wildcards. For a match to be a
@@ -1,8 +1,7 @@
1
1
  module Ariel
2
2
  # Contains all wildcards to be used in rule generation.
3
3
  class Wildcards
4
- private_class_method :new
5
- @@list = {
4
+ @list = {
6
5
  :anything=>/.+/,
7
6
  :numeric=>/\d+/,
8
7
  :alpha_numeric=>/\w+/,
@@ -12,22 +11,27 @@ module Ariel
12
11
  :html_tag=>/<\/?\w+>|<\w+\s+\/>/,
13
12
  :punctuation=>/[[:punct:]]+/
14
13
  }
15
- # Returns the hash of wildcard name (symbol) and regular expression pairs.
16
- def self.list
17
- @@list
18
- end
19
14
 
20
- # Given a string, will return an array of symbols from Wildcards::list that
21
- # match it.
22
- def self.matching(string)
23
- matches=[]
24
- @@list.each do |name, regex|
25
- if string[regex]==string
26
- yield name if block_given?
27
- matches << name
15
+ class << self
16
+ private :new
17
+ # Returns the hash of wildcard name (symbol) and regular expression pairs.
18
+ def list
19
+ @list
20
+ end
21
+
22
+ # Given a string, will return an array of symbols from Wildcards::list that
23
+ # match it.
24
+ def matching(string)
25
+ matches=[]
26
+ @list.each do |name, regex|
27
+ if string[regex]==string
28
+ yield name if block_given?
29
+ matches << name
30
+ end
28
31
  end
32
+ matches
29
33
  end
30
- matches
34
+
31
35
  end
32
36
  end
33
37
  end
@@ -4,7 +4,7 @@ Title: <l:title>The test of the Century</l:title>
4
4
  <l:content><b>Excerpt</b>: <i><l:excerpt>A look back at what could be considered the greatest ever test.</l:excerpt></i>
5
5
  <l:body>There was once a test designed to assess whether apply_extraction_tree_on worked.</l:body></l:content>
6
6
  EOS
7
- @@labeled_document_structure = Ariel::StructureNode.new do |r|
7
+ @@labeled_document_structure = Ariel::Node::Structure.new do |r|
8
8
  r.item :title
9
9
  r.item :content do |c|
10
10
  c.item :excerpt
@@ -24,12 +24,39 @@ Title: <l:title>Another example</l:title>
24
24
  <l:body>I love to write examples, you love to read them, ruby loves to process them.
25
25
  In conclusion, we're has happy as can be.</l:body>
26
26
  <l:comment_list>Comments:
27
- <l:comment>Title:<l:title>Great example</l:title>
27
+ <ol>
28
+ <li><l:comment>Title:<l:title>Great example</l:title>
28
29
  <l:author>Adoring fan</l:author>
29
30
  <l:body>Always love reading your examples, keep up the great work.</l:body>
30
- </l:comment></l:comment_list>
31
+ </l:comment></li>
32
+ <li><l:comment>Title: <l:title>Some advice</l:title>
33
+ <l:author>Wise old man</l:author>
34
+ <l:body>Keep your friends close and your enemies closer.</l:body>
35
+ </l:comment></li></l:comment_list>
31
36
  EOS
32
37
 
38
+ @@labeled_document_with_list_structure = Ariel::Node::Structure.new do |r|
39
+ r.item :title
40
+ r.item :body
41
+ r.item :comment_list do |c|
42
+ c.list_item :comment do |d|
43
+ d.item :author
44
+ d.item :body
45
+ end
46
+ end
47
+ end
48
+
49
+ title_ruleset=Ariel::RuleSet.new [Ariel::Rule.new([[":"]], :forward)], [Ariel::Rule.new([["love", "I"]], :back)]
50
+ body_ruleset=Ariel::RuleSet.new [Ariel::Rule.new([["example"]], :forward)], [Ariel::Rule.new([["Comments"]], :back)]
51
+ c_list_ruleset=Ariel::RuleSet.new [Ariel::Rule.new([["be", "."]], :forward)], [Ariel::Rule.new([], :back)]
52
+ comment_ruleset=Ariel::RuleSet.new [Ariel::Rule.new([["<li>"]], :forward, true)], [Ariel::Rule.new([["</li>"]], :back, true)]
53
+
54
+ s=@@labeled_document_with_list_structure
55
+ s.title.ruleset=title_ruleset
56
+ s.body.ruleset=body_ruleset
57
+ s.comment_list.ruleset=c_list_ruleset
58
+ s.comment_list.comment.ruleset=comment_ruleset
59
+
33
60
  @@labeled_addresses=Array.new(4) {Ariel::TokenStream.new}
34
61
  @@labeled_addresses[0].tokenize("513 Pico <b>Venice</b>, Phone: 1-<b>800</b>-555-1515")
35
62
  @@labeled_addresses[0].set_label_at 36
@@ -40,4 +67,19 @@ EOS
40
67
  @@labeled_addresses[3].tokenize("403 La Tijera, <b> Watts </b>, Phone: (310) 798-0008")
41
68
  @@labeled_addresses[3].set_label_at 39
42
69
 
70
+ # This example is from the STALKER paper, it suggests that SkipTo('<p><i>')
71
+ # would extract the start of the list, and the rules SkipTo '<i>' and SkipTo
72
+ # '</i>' would locate the start and end of each list item. If the first found
73
+ # end_loc and before the first start_loc, it should be assumed all tokens from
74
+ # 0...end_loc are one item.
75
+ @@unlabeled_restaurant_example=<<EOS
76
+ <p> Name: <b> Yala </b><p> Cuisine: Thai <p><i>
77
+ 4000 Colfax, Phoenix, AZ 85258 (602) 508-1570
78
+ </i><br><i>
79
+ 523 Vernon, Las Vegas, NV 89104 (702) 578-2293
80
+ </i><br><i>
81
+ 403 Pico, LA, CA 90007 (213) 798-0008
82
+ </i>
83
+ EOS
84
+
43
85
  end
@@ -0,0 +1,48 @@
1
+ require 'ariel'
2
+ require 'fixtures'
3
+
4
+ include Fixtures
5
+
6
+ context "Refining non exhaustive rule candidates" do
7
+ setup do
8
+ @candidates=[]
9
+ @candidates << Ariel::Rule.new([[:anything]], :forward)
10
+ @candidates << Ariel::Rule.new([[:numeric], [:numeric], [:numeric]], :forward) #late
11
+ @candidates << Ariel::Rule.new([["("]], :forward)
12
+ @candidates << Ariel::Rule.new([[:numeric, :alpha_numeric]], :forward)
13
+ @refiner=Ariel::CandidateRefiner.new(@candidates, @@labeled_addresses)
14
+ end
15
+
16
+ specify "refine_by_match_type should not change the list of candidates if all rules match one of the given types" do
17
+ @refiner.refine_by_match_type :fail, :early, :late, :perfect
18
+ @refiner.candidates.should_equal @candidates
19
+ end
20
+
21
+ specify "refine_by_match_type should remove all candidates that don't match the given type from the candidates list" do
22
+ @refiner.refine_by_match_type :late
23
+ @refiner.candidates.size.should_equal 1
24
+ @candidates[1].should_equal @refiner.candidates[0]
25
+ end
26
+
27
+ specify "refine_by_fewer wildcards should leave only those rules with the lowest number of wildcards" do
28
+ @refiner.refine_by_fewer_wildcards
29
+ @refiner.candidates.size.should_equal 1
30
+ @refiner.candidates[0].should_equal @candidates[2]
31
+ end
32
+
33
+ specify "refine_by_label_proximity should leave only those candidates that match closest to the label" do
34
+ @refiner.refine_by_label_proximity
35
+ @refiner.candidates.size.should_equal 1
36
+ @refiner.candidates[0].should_equal @candidates[2]
37
+ end
38
+
39
+ specify "refine_by_longer_end_landmarks should leave only those candidates with the longest end landmark" do
40
+ @refiner.refine_by_longer_end_landmarks
41
+ @refiner.candidates.size.should_equal 1
42
+ @refiner.candidates[0].should_equal @candidates[3]
43
+ end
44
+
45
+ specify "random_from_remaining should return a random candidate from those remaining in the candidate list" do
46
+ @candidates.should_include(@refiner.random_from_remaining)
47
+ end
48
+ end
@@ -0,0 +1,97 @@
1
+ require 'ariel'
2
+ require 'fixtures'
3
+ include Fixtures
4
+
5
+ context "Querying LabelUtils for label tag locating Regular Expressions" do
6
+ specify "label_regex should return an array of two Regexp to locate a start tag or an end tag with the given tag contents" do
7
+ s_regex, e_regex = Ariel::LabelUtils.label_regex('example')
8
+ s_tag="<l:example>"
9
+ e_tag="</l:example>"
10
+ s_tag.should_match s_regex
11
+ e_tag.should_not_match s_regex
12
+ s_tag.should_not_match e_regex
13
+ e_tag.should_match e_regex
14
+ "<l:fail>".should_not_match s_regex
15
+ end
16
+
17
+ specify "label_regex should by default return a pair of labels that will match any valid label tags" do
18
+ s_regex, e_regex = Ariel::LabelUtils.label_regex
19
+ "<l:randomexample>".should_match s_regex
20
+ "</l:unrandomexample>".should_match e_regex
21
+ "<l:foo>".should_not_match e_regex
22
+ end
23
+
24
+ specify "any_label_regex should return a regex that will match any valid open or closing label tags" do
25
+ regex=Ariel::LabelUtils.any_label_regex
26
+ regex.should_be_a_kind_of Regexp
27
+ %w[<l:foo> <l:bar> </l:foo> </l:bar>].each {|tag| tag.should_match regex}
28
+ %w[<l:foo <l/trunk> </l:** <a> </b>].each {|tag| tag.should_not_match regex}
29
+ end
30
+ end
31
+
32
+ context "Extracting a labeled region from a node" do
33
+ setup do
34
+ @tokenstream_with_label_tags = Ariel::TokenStream.new
35
+ @tokenstream_with_label_tags.tokenize @@labeled_document, true
36
+ @parent_extracted_node=Ariel::Node::Extracted.new(:root, @tokenstream_with_label_tags, @@labeled_document_structure)
37
+ @title_result=Ariel::LabelUtils.extract_labeled_region(@@labeled_document_structure.title, @parent_extracted_node)
38
+ end
39
+
40
+ specify "extract_labeled_region should return an array containing the region corresponding to the given structure node as a Node::Extracted" do
41
+ @title_result.should_be_a_kind_of Array
42
+ @title_result[0].should_be_an_instance_of Ariel::Node::Extracted
43
+ @title_result.size.should_equal 1
44
+ @title_result[0].tokenstream.tokens.should_equal @tokenstream_with_label_tags.tokens[3..7]
45
+ end
46
+
47
+ specify "Should return an empty array if the match fails" do
48
+ Ariel::LabelUtils.extract_labeled_region(Ariel::Node::Structure.new(:non_existent), @parent_extracted_node).should_equal []
49
+ end
50
+
51
+ specify "Extracted node should have the correct node_name" do
52
+ @title_result[0].node_name.should_equal :title
53
+ end
54
+
55
+ specify "Extracted node should be added as a child to the parent extracted node" do
56
+ @title_result.should_equal @parent_extracted_node.children.values
57
+ end
58
+ end
59
+
60
+ context "Extracting labeled list items from a node" do
61
+ setup do
62
+ @structure=@@labeled_document_with_list_structure
63
+ @tokenstream=Ariel::TokenStream.new
64
+ @tokenstream.tokenize @@labeled_document_with_list, true
65
+ @tokenstream = @tokenstream.slice_by_token_index 39, 95
66
+ @parent_extracted_node=Ariel::Node::Extracted.new(:comment_list, @tokenstream, @@labeled_document_with_list_structure.comment_list)
67
+ @result = Ariel::LabelUtils.extract_labeled_region(@structure.comment_list.comment, @parent_extracted_node)
68
+ end
69
+
70
+ specify "Should return an array containing each list_item" do
71
+ @result.size.should_equal 2
72
+ @result.each {|extracted_node| extracted_node.should_be_an_instance_of Ariel::Node::Extracted}
73
+ @tokenstream.tokens[5..28].should_equal @result[0].tokenstream.tokens
74
+ @tokenstream.tokens[33..54].should_equal @result[1].tokenstream.tokens
75
+
76
+ end
77
+
78
+ specify "Should name each list item itemname_num" do
79
+ @result[0].node_name.should_equal :comment_0
80
+ @result[1].node_name.should_equal :comment_1
81
+ end
82
+
83
+ specify "Should add each list_item as as a child of the parent extracted node" do
84
+ children=@parent_extracted_node.children.values
85
+ children.size.should_equal 2
86
+ children.each {|child| @result.should_include child}
87
+ end
88
+
89
+ specify "Should return an empty array if no list items are extracted" do
90
+ stream=Ariel::TokenStream.new
91
+ stream.tokenize "No labels here", true
92
+ @parent_extracted_node.tokenstream=stream
93
+ result = Ariel::LabelUtils.extract_labeled_region(@structure.comment_list.comment, @parent_extracted_node)
94
+ result.should_equal []
95
+ end
96
+
97
+ end