ariel 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +49 -83
- data/bin/ariel +29 -20
- data/examples/google_calculator/structure.rb +2 -2
- data/examples/google_calculator/structure.yaml +13 -15
- data/examples/raa/labeled/highline.html +5 -4
- data/examples/raa/labeled/mongrel.html +9 -8
- data/examples/raa/structure.rb +4 -2
- data/examples/raa/structure.yaml +94 -78
- data/lib/ariel.rb +71 -33
- data/lib/ariel/{candidate_selector.rb → candidate_refiner.rb} +39 -38
- data/lib/ariel/label_utils.rb +46 -18
- data/lib/ariel/labeled_document_loader.rb +77 -0
- data/lib/ariel/learner.rb +60 -38
- data/lib/ariel/log.rb +67 -0
- data/lib/ariel/node.rb +52 -0
- data/lib/ariel/node/extracted.rb +90 -0
- data/lib/ariel/node/structure.rb +91 -0
- data/lib/ariel/rule.rb +114 -32
- data/lib/ariel/rule_set.rb +34 -15
- data/lib/ariel/token.rb +9 -3
- data/lib/ariel/token_stream.rb +32 -17
- data/lib/ariel/wildcards.rb +19 -15
- data/test/fixtures.rb +45 -3
- data/test/specs/candidate_refiner_spec.rb +48 -0
- data/test/specs/label_utils_spec.rb +97 -0
- data/test/specs/learner_spec.rb +39 -0
- data/test/specs/node_extracted_spec.rb +90 -0
- data/test/specs/node_spec.rb +76 -0
- data/test/specs/node_structure_spec.rb +74 -0
- data/test/specs/rule_set_spec.rb +85 -0
- data/test/specs/rule_spec.rb +110 -0
- data/test/specs/token_stream_spec.rb +100 -7
- metadata +21 -28
- data/lib/ariel/example_document_loader.rb +0 -59
- data/lib/ariel/extracted_node.rb +0 -20
- data/lib/ariel/node_like.rb +0 -26
- data/lib/ariel/structure_node.rb +0 -75
- data/test/ariel_test_case.rb +0 -15
- data/test/test_candidate_selector.rb +0 -58
- data/test/test_example_document_loader.rb +0 -7
- data/test/test_label_utils.rb +0 -15
- data/test/test_learner.rb +0 -38
- data/test/test_rule.rb +0 -38
- data/test/test_structure_node.rb +0 -81
- data/test/test_token.rb +0 -16
- data/test/test_token_stream.rb +0 -82
- data/test/test_wildcards.rb +0 -18
data/lib/ariel/log.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Ariel
|
4
|
+
|
5
|
+
# Very simple Log class. By default outputs to stdout and ignored messages
|
6
|
+
# below :info level. Should probably get rid of the usage of Singleton as it's
|
7
|
+
# used very little, with the classes eigenclass/singleton class used mostly
|
8
|
+
# for the same purpose. Use Log.set_level to lower/raise the logging level.
|
9
|
+
class Log
|
10
|
+
include Singleton
|
11
|
+
|
12
|
+
SEVERITY={:debug=>0, :info=>1, :warn=>2, :error=>3}
|
13
|
+
|
14
|
+
# Level defaults to :debug if $DEBUG is set and :info if not.
|
15
|
+
def initialize
|
16
|
+
self.class.output_to_stdout
|
17
|
+
if $DEBUG
|
18
|
+
self.class.set_level :debug
|
19
|
+
else
|
20
|
+
self.class.set_level :info
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class << self
|
25
|
+
SEVERITY.keys.each do |level|
|
26
|
+
define_method(level) {|message| instance; log message, level}
|
27
|
+
end
|
28
|
+
|
29
|
+
# Set the log level to the given key from the SEVERITY constant.
|
30
|
+
def set_level(level)
|
31
|
+
if SEVERITY.has_key? level
|
32
|
+
@log_level=level
|
33
|
+
else
|
34
|
+
raise ArgumentError, "Invalid log level given"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def current_level
|
39
|
+
@log_level
|
40
|
+
end
|
41
|
+
|
42
|
+
def output_to_stdout
|
43
|
+
@output=:stdout
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sends all output to a file called debug.log in the current directory.
|
47
|
+
def output_to_file
|
48
|
+
@output=:file
|
49
|
+
end
|
50
|
+
|
51
|
+
# Not intended to be used directly, preferred to use the methods
|
52
|
+
# corresponding to different serverity levels.
|
53
|
+
def log(message, level)
|
54
|
+
if SEVERITY[@log_level] <= SEVERITY[level]
|
55
|
+
message = "#{level}: #{message}"
|
56
|
+
if @output==:file
|
57
|
+
File.open('debug.log', 'ab') {|f| f.puts message }
|
58
|
+
elsif @output==:stdout
|
59
|
+
puts message
|
60
|
+
end
|
61
|
+
return message
|
62
|
+
end
|
63
|
+
return nil
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/ariel/node.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
module Ariel
|
2
|
+
|
3
|
+
# A generic Node object. As an end user, you have no need to use this. All
|
4
|
+
# children are stored in a hash. #id and #type are undefined so they can be
|
5
|
+
# used freely as part of a Node::Structure
|
6
|
+
class Node
|
7
|
+
removed_methods=[:id, :type]
|
8
|
+
removed_methods.each {|meth| undef_method meth}
|
9
|
+
attr_accessor :parent, :children, :node_name
|
10
|
+
|
11
|
+
# If the name is a string, it's converted to a symbol. If not it's just
|
12
|
+
# stored as is.
|
13
|
+
def initialize(name)
|
14
|
+
@children={}
|
15
|
+
if name.kind_of? String
|
16
|
+
@node_name=name.to_sym
|
17
|
+
else
|
18
|
+
@node_name=name
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Given a Node object and a name, adds a child to the array of children,
|
23
|
+
# setting its parent as the current node, as well as creating an accessor
|
24
|
+
# method matching that name.
|
25
|
+
def add_child(node)
|
26
|
+
@children[node.node_name]=node
|
27
|
+
node.parent = self
|
28
|
+
# Trick stolen from OpenStruct
|
29
|
+
meta = class << self; self; end
|
30
|
+
meta.send(:define_method, node.node_name.to_s.to_sym) {@children[node.node_name]}
|
31
|
+
end
|
32
|
+
|
33
|
+
# Yields each descendant node. If passed true will also yield itself.
|
34
|
+
def each_descendant(include_self=false)
|
35
|
+
if include_self
|
36
|
+
node_queue=[self]
|
37
|
+
else
|
38
|
+
node_queue=self.children.values
|
39
|
+
end
|
40
|
+
until node_queue.empty? do
|
41
|
+
node_queue.concat node_queue.first.children.values
|
42
|
+
yield node_queue.shift
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def inspect
|
47
|
+
["#{self.class.name} - node_name=#{self.node_name.inspect};",
|
48
|
+
"parent=#{self.parent ? self.parent.node_name.inspect : nil.inspect };",
|
49
|
+
"children=#{self.children.keys.inspect};"].join ' '
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'ariel/node'
|
2
|
+
|
3
|
+
module Ariel
|
4
|
+
|
5
|
+
# Each Node::Extracted has a name, a TokenStream and a structure which points to
|
6
|
+
# the relevant Node::Structure. Skip straight to #search, #/ and #at for the
|
7
|
+
# query interface. This is strongly recommended over using the built in method
|
8
|
+
# accessors (a method isn't defined if a given field isn't extracted, so
|
9
|
+
# you're going to have to catch a lot of potential errors).
|
10
|
+
class Node::Extracted < Node
|
11
|
+
attr_accessor :tokenstream, :structure_node
|
12
|
+
|
13
|
+
def initialize(name, tokenstream, structure)
|
14
|
+
super(name)
|
15
|
+
@structure_node=structure
|
16
|
+
@tokenstream=tokenstream
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns the text contained in the TokenStream.
|
20
|
+
def extracted_text
|
21
|
+
tokenstream.text
|
22
|
+
end
|
23
|
+
|
24
|
+
# Index based accessor for the Node::Extracted's children. Supports Range objects.
|
25
|
+
# Aims to provide behaviour that makes sense, especially when a Node has
|
26
|
+
# list children. Node::Extracted#[0..0] will return an array, while
|
27
|
+
# Node::Extracted[0] will not. This behaviour is the same as Ruby's standard
|
28
|
+
# Array class.
|
29
|
+
def [](*args)
|
30
|
+
dont_splat=false #determines whether to splat or not if there is only a single result
|
31
|
+
args.collect! do |arg|
|
32
|
+
if arg.kind_of? Range
|
33
|
+
arg=arg.to_a
|
34
|
+
dont_splat=true
|
35
|
+
end
|
36
|
+
arg
|
37
|
+
end
|
38
|
+
args.flatten!
|
39
|
+
dont_splat=true if args.size > 1
|
40
|
+
result=@children.values_at(*args).compact
|
41
|
+
if result.size==1 && dont_splat==true
|
42
|
+
return result
|
43
|
+
else
|
44
|
+
return *result
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# The preferred way of querying extracted information. If nothing was
|
49
|
+
# extracted, an empty array is returned. This is much safer than using
|
50
|
+
# Node::Extracted accessors. Consider if your code is reading
|
51
|
+
# doc.address.phone_number.area_code - this will raise an error if any one of
|
52
|
+
# these were not extracted. (doc/'address/phone_number/area_code') is
|
53
|
+
# preferred. Numbered list_items can be queried e.g. (doc/'comment_list/2'),
|
54
|
+
# and basic globbing is supported: (doc/'*/*/title').
|
55
|
+
def search(search_string)
|
56
|
+
queue=search_string.split '/'
|
57
|
+
current_term=queue.shift
|
58
|
+
return [self] if current_term.nil? #If for some reason nothing is given in the search string
|
59
|
+
matches=[]
|
60
|
+
if current_term=='*'
|
61
|
+
new_matches=self.children.values
|
62
|
+
new_matches.sort! {|a, b| a.node_name <=> b.node_name} rescue nil #is this evil?
|
63
|
+
matches.concat new_matches
|
64
|
+
elsif current_term[/\d+/]==current_term
|
65
|
+
matches << @children[current_term.to_i]
|
66
|
+
else
|
67
|
+
matches << @children[current_term.to_sym]
|
68
|
+
end
|
69
|
+
if queue.empty?
|
70
|
+
return matches.flatten.compact
|
71
|
+
else
|
72
|
+
return matches.collect {|match| match.search(queue.join('/'))}.flatten.compact
|
73
|
+
end
|
74
|
+
end
|
75
|
+
alias :/ :search
|
76
|
+
|
77
|
+
# Acts exactly like #search, but returns only the first match or nil if
|
78
|
+
# there are no matches.
|
79
|
+
def at(search_string)
|
80
|
+
self.search(search_string).first
|
81
|
+
end
|
82
|
+
|
83
|
+
def inspect
|
84
|
+
[super,
|
85
|
+
"structure_node=#{self.structure_node.node_name.inspect};",
|
86
|
+
"extracted_text=\"#{text=self.extracted_text; text.size > 100 ? text[0..100]+'...' : text}\";"
|
87
|
+
].join ' '
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'ariel/node'
|
2
|
+
|
3
|
+
module Ariel
|
4
|
+
|
5
|
+
# Implements a Node object used to represent the structure of the document
|
6
|
+
# tree. Each node stores start and end rules to extract the desired content
|
7
|
+
# from its parent node. Could be viewed as a rule-storing object.
|
8
|
+
class Node::Structure < Node
|
9
|
+
attr_accessor :ruleset, :node_type
|
10
|
+
|
11
|
+
def initialize(name=:root, type=:not_list, &block)
|
12
|
+
super(name)
|
13
|
+
@node_type=type
|
14
|
+
yield self if block_given?
|
15
|
+
end
|
16
|
+
|
17
|
+
# Used to extend an already created Node. e.g.
|
18
|
+
# node.extend_structure do |r|
|
19
|
+
# r.item :new_field1
|
20
|
+
# r.item :new_field2
|
21
|
+
# end
|
22
|
+
def extend_structure(&block)
|
23
|
+
yield self if block_given?
|
24
|
+
end
|
25
|
+
|
26
|
+
# Given a Node to apply it's rules to, this function will create a new node
|
27
|
+
# and add it as a child of the given node. It returns an array of the items
|
28
|
+
# extracted by the rule
|
29
|
+
def extract_from(node)
|
30
|
+
extractions=[]
|
31
|
+
i=0
|
32
|
+
return extractions if @ruleset.nil? #no extractions if no rule has been learnt
|
33
|
+
@ruleset.apply_to(node.tokenstream) do |newstream|
|
34
|
+
if self.node_type==:list_item
|
35
|
+
new_node_name=i
|
36
|
+
i+=1
|
37
|
+
else
|
38
|
+
new_node_name=@node_name
|
39
|
+
end
|
40
|
+
extracted_node = Node::Extracted.new(new_node_name, newstream, self)
|
41
|
+
node.add_child extracted_node
|
42
|
+
extractions << extracted_node
|
43
|
+
end
|
44
|
+
return extractions
|
45
|
+
end
|
46
|
+
|
47
|
+
# Applies the extraction rules stored in the current Node::Structure and all its
|
48
|
+
# descendant children.
|
49
|
+
def apply_extraction_tree_on(root_node, extract_labels=false)
|
50
|
+
extraction_queue = [root_node]
|
51
|
+
until extraction_queue.empty? do
|
52
|
+
new_parent = extraction_queue.shift
|
53
|
+
new_parent.structure_node.children.values.each do |child|
|
54
|
+
if extract_labels
|
55
|
+
extractions=LabelUtils.extract_labeled_region(child, new_parent)
|
56
|
+
else
|
57
|
+
extractions=child.extract_from(new_parent)
|
58
|
+
end
|
59
|
+
extractions.each {|extracted_node| extraction_queue.push extracted_node}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
return root_node
|
63
|
+
end
|
64
|
+
|
65
|
+
# Use when defining any object that occurs once. #list is a synonym, but
|
66
|
+
# it's recommended you use it when defining a container for list_items. The
|
67
|
+
# children of a list_item are just items. e.g.
|
68
|
+
# <tt>structure = Ariel::Node::Structure.new do |r|
|
69
|
+
# r.list :comments do |c| # r.item :comments would be equivalent, but less readable
|
70
|
+
# c.list_item :comment do |c|
|
71
|
+
# c.item :author # Now these are just normal items, as they are extracted once from their parent
|
72
|
+
# c.item :date
|
73
|
+
# c.item :body
|
74
|
+
# end
|
75
|
+
# end
|
76
|
+
# end
|
77
|
+
def item(name, &block)
|
78
|
+
self.add_child(Node::Structure.new(name, &block))
|
79
|
+
end
|
80
|
+
# Extracting a list is really the same as extracting a normal item, but
|
81
|
+
# people probably still prefer to call a list a list.
|
82
|
+
alias :list :item
|
83
|
+
|
84
|
+
# See the docs for #item for a discussion of when to use #item and when to
|
85
|
+
# use #list_item.
|
86
|
+
def list_item(name, &block)
|
87
|
+
self.add_child(Node::Structure.new(name, :list_item, &block))
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
data/lib/ariel/rule.rb
CHANGED
@@ -5,17 +5,23 @@ module Ariel
|
|
5
5
|
# Rule#landmarks. A Rule also has a direction :forward or :back, which
|
6
6
|
# determines whether it is applied from the end or beginning of a tokenstream.
|
7
7
|
class Rule
|
8
|
-
attr_accessor :landmarks, :direction
|
8
|
+
attr_accessor :landmarks, :direction, :exhaustive
|
9
9
|
@@RuleMatchData=Struct.new(:token_loc, :type)
|
10
|
-
|
10
|
+
@@cache={}
|
11
|
+
|
11
12
|
# A rule's direction can be :back or :forward, which determines whether it
|
12
13
|
# is applied from the start of end of the TokenStream. The landmark array
|
13
14
|
# contains an array for each landmark, which consists of one or more
|
14
|
-
# features. e.g. Rule.new(
|
15
|
-
def initialize(direction,
|
15
|
+
# features. e.g. Rule.new([[:anything, "Example"], ["Test"]], :forward).
|
16
|
+
def initialize(landmarks, direction, exhaustive=false)
|
16
17
|
@landmarks=landmarks
|
17
18
|
raise(ArgumentError, "Not a valid direction") unless [:forward, :back].include?(direction)
|
18
19
|
@direction=direction
|
20
|
+
@exhaustive=exhaustive
|
21
|
+
end
|
22
|
+
|
23
|
+
def exhaustive?
|
24
|
+
@exhaustive
|
19
25
|
end
|
20
26
|
|
21
27
|
# Two rules are equal if they have the same list of landmarks and the same
|
@@ -26,12 +32,12 @@ module Ariel
|
|
26
32
|
alias :eql? :==
|
27
33
|
|
28
34
|
def hash
|
29
|
-
[@landmarks, @direction].hash
|
35
|
+
[@landmarks, @direction, @exhaustive].hash
|
30
36
|
end
|
31
37
|
|
32
38
|
# Returns a rule that contains a given range of
|
33
39
|
def partial(range)
|
34
|
-
return Rule.new(@
|
40
|
+
return Rule.new(@landmarks[range], @direction)
|
35
41
|
end
|
36
42
|
|
37
43
|
def deep_clone
|
@@ -57,7 +63,7 @@ module Ariel
|
|
57
63
|
end
|
58
64
|
|
59
65
|
# Given a TokenStream and a rule, applies the rule on the stream and
|
60
|
-
# returns
|
66
|
+
# returns an empty array if the match fails and an array of token_locs if the match succeeds.
|
61
67
|
# Yields a RuleMatchData Struct with accessors token_loc (the position of the match in the stream)
|
62
68
|
# and type if a block is given. type is nil if the TokenStream has no label,
|
63
69
|
# :perfect if all tokens up to the labeled token are consumed, :early if the rule's final position
|
@@ -65,33 +71,23 @@ module Ariel
|
|
65
71
|
# token_loc is the position in the stream as it was passed in. That is, the
|
66
72
|
# token_loc is always from the left of the given stream whether it is in a
|
67
73
|
# reversed state or not.
|
68
|
-
def apply_to(tokenstream)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
unless target.skip_to(*landmark)
|
79
|
-
return nil
|
74
|
+
def apply_to(tokenstream)
|
75
|
+
target=self.class.prepare_tokenstream(tokenstream, @direction)
|
76
|
+
cache_check=@@cache[[tokenstream.cache_hash, self.hash]]
|
77
|
+
if cache_check
|
78
|
+
token_locs=cache_check
|
79
|
+
else
|
80
|
+
token_locs=[]
|
81
|
+
while result=seek_landmarks(target)
|
82
|
+
token_locs << correct_match_location(tokenstream, result)
|
83
|
+
break unless exhaustive?
|
80
84
|
end
|
85
|
+
@@cache[[tokenstream.cache_hash, self.hash]]=token_locs
|
81
86
|
end
|
82
|
-
|
83
|
-
|
84
|
-
token_loc = tokenstream.reverse_pos(token_loc) #Return position from left of given stream
|
85
|
-
end
|
86
|
-
md = @@RuleMatchData.new(token_loc)
|
87
|
-
if target.label_index
|
88
|
-
idx = target.label_index
|
89
|
-
md.type = :perfect if token_loc == idx
|
90
|
-
md.type = :early if token_loc < idx
|
91
|
-
md.type = :late if token_loc > idx
|
87
|
+
if block_given?
|
88
|
+
generate_match_data(target, token_locs).each {|md| yield md}
|
92
89
|
end
|
93
|
-
|
94
|
-
return token_loc
|
90
|
+
return token_locs
|
95
91
|
end
|
96
92
|
|
97
93
|
# Returns true or false depending on if the match of this rule on the given
|
@@ -99,8 +95,9 @@ module Ariel
|
|
99
95
|
# :perfect, :early, :fail and :late). Only valid on streams with labels
|
100
96
|
def matches(tokenstream, *types)
|
101
97
|
raise ArgumentError, "No match types given" if types.empty?
|
98
|
+
raise ArgumentError, "Only applicable to tokenstreams containing a label" if tokenstream.label_index.nil?
|
102
99
|
match = nil
|
103
|
-
apply_to(tokenstream) {|md| match=md.type}
|
100
|
+
apply_to(tokenstream) {|md| match=md.type if md.type;}
|
104
101
|
match = :fail if match.nil?
|
105
102
|
if types.include? match
|
106
103
|
return true
|
@@ -108,5 +105,90 @@ module Ariel
|
|
108
105
|
return false
|
109
106
|
end
|
110
107
|
end
|
108
|
+
|
109
|
+
# Only used in rule learning on labeled tokenstreams. Needed to provide the
|
110
|
+
# match index most relevant to the currently labeled list item. A preference
|
111
|
+
# of :early or :late can be passed, which will only return a
|
112
|
+
# token_loc before the stream's label_index or after the label_index.
|
113
|
+
def closest_match(tokenstream, preference=:none)
|
114
|
+
token_locs=self.apply_to(tokenstream)
|
115
|
+
return find_closest_match(token_locs, tokenstream.label_index)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Reverses the given tokenstream if necessary based on its current direction, and
|
119
|
+
# the direction given (corresponding to the sort of rule you hope to apply
|
120
|
+
# to it).
|
121
|
+
def self.prepare_tokenstream(tokenstream, direction)
|
122
|
+
if tokenstream.reversed?
|
123
|
+
target=tokenstream if direction==:back
|
124
|
+
target=tokenstream.reverse if direction==:forward
|
125
|
+
elsif not tokenstream.reversed?
|
126
|
+
target=tokenstream if direction==:forward
|
127
|
+
target=tokenstream.reverse if direction==:back
|
128
|
+
end
|
129
|
+
target.rewind #rules are applied from the beginning of the stream
|
130
|
+
return target
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
# Finds the sequence of landmarks contained in the Rule instance in the
|
136
|
+
# given tokenstream. The logic of reversing or rewinding the stream if necessary
|
137
|
+
# is left to the method that uses it. Returns the match location from the
|
138
|
+
# beginning of whatever tokenstream it was passed. This location should be
|
139
|
+
# corrected by correct_match_location
|
140
|
+
def seek_landmarks(tokenstream)
|
141
|
+
@landmarks.each do |landmark|
|
142
|
+
unless tokenstream.skip_to(*landmark)
|
143
|
+
return nil
|
144
|
+
end
|
145
|
+
end
|
146
|
+
return tokenstream.cur_pos
|
147
|
+
end
|
148
|
+
|
149
|
+
# Takes the original tokenstream passed to apply_to and reverses the match
|
150
|
+
# location is required, so the match location returned to the user will be
|
151
|
+
# the index from the left of the passed tokenstream.
|
152
|
+
def correct_match_location(tokenstream, match_loc)
|
153
|
+
if tokenstream.reversed?
|
154
|
+
result=match_loc if @direction==:back
|
155
|
+
result=tokenstream.reverse_pos(match_loc) if @direction==:forward
|
156
|
+
elsif not tokenstream.reversed?
|
157
|
+
result=match_loc if @direction==:forward
|
158
|
+
result=tokenstream.reverse_pos(match_loc) if @direction==:back
|
159
|
+
end
|
160
|
+
return result
|
161
|
+
end
|
162
|
+
|
163
|
+
def generate_match_data(tokenstream, token_locs)
|
164
|
+
result=[]
|
165
|
+
if tokenstream.label_index
|
166
|
+
closest_match=find_closest_match(token_locs, tokenstream.label_index)
|
167
|
+
end
|
168
|
+
token_locs.each do |token_loc|
|
169
|
+
md = @@RuleMatchData.new(token_loc)
|
170
|
+
if tokenstream.label_index && token_loc==closest_match
|
171
|
+
idx = tokenstream.label_index
|
172
|
+
md.type = :perfect if token_loc == idx
|
173
|
+
md.type = :early if token_loc < idx
|
174
|
+
md.type = :late if token_loc > idx
|
175
|
+
end
|
176
|
+
result << md
|
177
|
+
end
|
178
|
+
return result
|
179
|
+
end
|
180
|
+
|
181
|
+
def find_closest_match(token_locs, label_index, preference=:none)
|
182
|
+
if preference==:early
|
183
|
+
token_locs = token_locs.reject {|token_loc| token_loc > label_index}
|
184
|
+
elsif preference==:late
|
185
|
+
token_locs = token_locs.reject {|token_loc| token_loc | label_index}
|
186
|
+
end
|
187
|
+
token_locs.sort_by {|token_loc| (label_index-token_loc).abs}.first
|
188
|
+
end
|
189
|
+
|
190
|
+
def self.clear_cache
|
191
|
+
@@cache.clear
|
192
|
+
end
|
111
193
|
end
|
112
194
|
end
|