rley 0.6.01 → 0.6.02
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/examples/NLP/engtagger.rb +58 -60
- data/lib/rley/constants.rb +1 -1
- metadata +2 -33
- data/examples/general/SRL/lib/ast_builder.rb +0 -382
- data/examples/general/SRL/lib/grammar.rb +0 -106
- data/examples/general/SRL/lib/regex/abstract_method.rb +0 -35
- data/examples/general/SRL/lib/regex/alternation.rb +0 -27
- data/examples/general/SRL/lib/regex/anchor.rb +0 -45
- data/examples/general/SRL/lib/regex/atomic_expression.rb +0 -16
- data/examples/general/SRL/lib/regex/capturing_group.rb +0 -51
- data/examples/general/SRL/lib/regex/char_class.rb +0 -38
- data/examples/general/SRL/lib/regex/char_range.rb +0 -51
- data/examples/general/SRL/lib/regex/char_shorthand.rb +0 -50
- data/examples/general/SRL/lib/regex/character.rb +0 -204
- data/examples/general/SRL/lib/regex/compound_expression.rb +0 -57
- data/examples/general/SRL/lib/regex/concatenation.rb +0 -29
- data/examples/general/SRL/lib/regex/expression.rb +0 -60
- data/examples/general/SRL/lib/regex/lookaround.rb +0 -50
- data/examples/general/SRL/lib/regex/match_option.rb +0 -34
- data/examples/general/SRL/lib/regex/monadic_expression.rb +0 -28
- data/examples/general/SRL/lib/regex/multiplicity.rb +0 -91
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +0 -27
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +0 -60
- data/examples/general/SRL/lib/regex/quantifiable.rb +0 -22
- data/examples/general/SRL/lib/regex/repetition.rb +0 -29
- data/examples/general/SRL/lib/regex/wildcard.rb +0 -23
- data/examples/general/SRL/lib/regex_repr.rb +0 -13
- data/examples/general/SRL/lib/tokenizer.rb +0 -147
- data/examples/general/SRL/spec/integration_spec.rb +0 -448
- data/examples/general/SRL/spec/regex/character_spec.rb +0 -166
- data/examples/general/SRL/spec/regex/multiplicity_spec.rb +0 -79
- data/examples/general/SRL/spec/spec_helper.rb +0 -25
- data/examples/general/SRL/spec/tokenizer_spec.rb +0 -148
- data/examples/general/SRL/srl_demo.rb +0 -75
@@ -1,57 +0,0 @@
|
|
1
|
-
# File: compound_expression.rb
|
2
|
-
|
3
|
-
require_relative 'expression' # Access the superclass
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# Abstract class. An element that is part of a regular expression &
|
7
|
-
# that has its own child sub-expressions.
|
8
|
-
class CompoundExpression < Expression
|
9
|
-
# Redefined method. Return false since it may have one or more children.
|
10
|
-
def atomic?
|
11
|
-
return false
|
12
|
-
end
|
13
|
-
|
14
|
-
=begin
|
15
|
-
# Build a depth-first in-order children visitor.
|
16
|
-
# The visitor is implemented as an Enumerator.
|
17
|
-
def df_visitor()
|
18
|
-
root = children # The visit will start from the children of this object
|
19
|
-
|
20
|
-
visitor = Enumerator.new do |result| # result is a Yielder
|
21
|
-
# Initialization part: will run once
|
22
|
-
visit_stack = [ root ] # The LIFO queue of nodes to visit
|
23
|
-
|
24
|
-
begin # Traversal part (as a loop)
|
25
|
-
top = visit_stack.pop()
|
26
|
-
if top.kind_of?(Array)
|
27
|
-
if top.empty?
|
28
|
-
next
|
29
|
-
else
|
30
|
-
currChild = top.pop()
|
31
|
-
visit_stack.push top
|
32
|
-
end
|
33
|
-
else
|
34
|
-
currChild = top
|
35
|
-
end
|
36
|
-
|
37
|
-
result << currChild # Return the visited child
|
38
|
-
|
39
|
-
unless currChild.atomic?
|
40
|
-
children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
|
41
|
-
visit_stack.push(children_to_enqueue)
|
42
|
-
end
|
43
|
-
end until visit_stack.empty?
|
44
|
-
end
|
45
|
-
end
|
46
|
-
=end
|
47
|
-
|
48
|
-
protected
|
49
|
-
|
50
|
-
# Abstract method. Return the text representation of the child (if any)
|
51
|
-
def all_child_text()
|
52
|
-
abstract_method
|
53
|
-
end
|
54
|
-
end # class
|
55
|
-
end # module
|
56
|
-
|
57
|
-
# End of file
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# File: concatenation.rb
|
2
|
-
|
3
|
-
require_relative 'polyadic_expression' # Access the superclass
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# Abstract class. A n-ary matching operator.
|
7
|
-
# It succeeds when each child succeeds to match the subject text in the same
|
8
|
-
# serial arrangement than defined by this concatenation.
|
9
|
-
class Concatenation < PolyadicExpression
|
10
|
-
# Constructor.
|
11
|
-
def initialize(*theChildren)
|
12
|
-
super(theChildren)
|
13
|
-
end
|
14
|
-
|
15
|
-
protected
|
16
|
-
|
17
|
-
# Conversion method re-definition.
|
18
|
-
# Purpose: Return the String representation of the concatented expressions.
|
19
|
-
def text_repr()
|
20
|
-
outcome = children.inject('') do |result, aChild|
|
21
|
-
result << aChild.to_str
|
22
|
-
end
|
23
|
-
|
24
|
-
return outcome
|
25
|
-
end
|
26
|
-
end # class
|
27
|
-
end # module
|
28
|
-
|
29
|
-
# End of file
|
@@ -1,60 +0,0 @@
|
|
1
|
-
# File: expression.rb
|
2
|
-
|
3
|
-
require_relative 'abstract_method'
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# Abstract class. The generalization of any valid regular (sub)expression.
|
7
|
-
class Expression
|
8
|
-
attr_accessor :begin_anchor
|
9
|
-
attr_accessor :end_anchor
|
10
|
-
|
11
|
-
# Constructor
|
12
|
-
def initialize(); end
|
13
|
-
|
14
|
-
# Abstract method. Return true iff the expression is atomic
|
15
|
-
# (= may not have any child).
|
16
|
-
def atomic?()
|
17
|
-
abstract_method
|
18
|
-
end
|
19
|
-
|
20
|
-
# Abstract method. Return the number of values that match this expression.
|
21
|
-
# [_parent_options] an Hash of matching options. They are overridden
|
22
|
-
# by options with same name that are bound to this object.
|
23
|
-
def cardinality(_parent_options)
|
24
|
-
abstract_method
|
25
|
-
end
|
26
|
-
|
27
|
-
# Determine the matching options to apply to this object, given the options
|
28
|
-
# coming from the parent
|
29
|
-
# and options that are local to this object. Local options take precedence.
|
30
|
-
# @param theParentOptions [Hash] matching options. They are overridden
|
31
|
-
# by options with same name that are bound to this object.
|
32
|
-
def options(theParentOptions)
|
33
|
-
resulting_options = theParentOptions.merge(@local_options)
|
34
|
-
return resulting_options
|
35
|
-
end
|
36
|
-
|
37
|
-
# Template method.
|
38
|
-
# Purpose: Return the String representation of the expression.
|
39
|
-
def to_str()
|
40
|
-
result = ''
|
41
|
-
result << prefix
|
42
|
-
result << text_repr
|
43
|
-
result << suffix
|
44
|
-
|
45
|
-
return result
|
46
|
-
end
|
47
|
-
|
48
|
-
protected
|
49
|
-
|
50
|
-
def prefix()
|
51
|
-
begin_anchor ? begin_anchor.to_str : ''
|
52
|
-
end
|
53
|
-
|
54
|
-
def suffix()
|
55
|
-
end_anchor ? end_anchor.to_str : ''
|
56
|
-
end
|
57
|
-
end # class
|
58
|
-
end # module
|
59
|
-
|
60
|
-
# End of file
|
@@ -1,50 +0,0 @@
|
|
1
|
-
# File: Lookaround.rb
|
2
|
-
|
3
|
-
########################
|
4
|
-
# TODO: make it a binary expression
|
5
|
-
########################
|
6
|
-
|
7
|
-
|
8
|
-
require_relative 'polyadic_expression' # Access the superclass
|
9
|
-
|
10
|
-
module Regex # This module is used as a namespace
|
11
|
-
# Lookaround is a zero-width assertion just like the start and end of line
|
12
|
-
# anchors.
|
13
|
-
# The difference is that lookarounds will actually match characters, but only
|
14
|
-
# return the result of the match: match or no match.
|
15
|
-
# That is why they are called "assertions". They do not consume characters
|
16
|
-
# from the subject, but only assert whether a match is possible or not.
|
17
|
-
class Lookaround < PolyadicExpression
|
18
|
-
# The "direction" of the lookaround. Can be ahead or behind. It specifies
|
19
|
-
# the relative position of the expression to match compared to
|
20
|
-
# the current 'position' in the subject text.
|
21
|
-
attr_reader(:dir)
|
22
|
-
|
23
|
-
# The kind indicates whether the assertion is positive
|
24
|
-
# (succeeds when there is a match) or negative
|
25
|
-
# (assertion succeeds when there is NO match).
|
26
|
-
attr_reader(:kind)
|
27
|
-
|
28
|
-
# Constructor.
|
29
|
-
# [assertedExpression] A sub-expression to match.
|
30
|
-
# [theDir] One of the following values: [ :ahead, :behind ]
|
31
|
-
# [theKind] One of the following values: [ :positive, :negative ]
|
32
|
-
def initialize(assertedExpression, theDir, theKind)
|
33
|
-
super([assertedExpression])
|
34
|
-
@dir = theDir
|
35
|
-
@kind = theKind
|
36
|
-
end
|
37
|
-
|
38
|
-
# Conversion method re-definition.
|
39
|
-
# Purpose: Return the String representation of the captured expression.
|
40
|
-
def to_str()
|
41
|
-
result = children[0].to_str
|
42
|
-
dir_syntax = (dir == :ahead) ? '' : '<'
|
43
|
-
kind_syntax = (kind == :positive) ? '=' : '!'
|
44
|
-
result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ')'
|
45
|
-
return result
|
46
|
-
end
|
47
|
-
end # class
|
48
|
-
end # module
|
49
|
-
|
50
|
-
# End of file
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# File: MatchOption.rb
|
2
|
-
|
3
|
-
module Regex # This module is used as a namespace
|
4
|
-
# Represents an option that influences the way a regular (sub)expression
|
5
|
-
# can perform its matching.
|
6
|
-
class MatchOption
|
7
|
-
# The symbolic name of the option
|
8
|
-
attr_reader(:name)
|
9
|
-
|
10
|
-
# An indicator that tells whether the option is turned on or off
|
11
|
-
attr_reader(:setting)
|
12
|
-
|
13
|
-
# Constructor.
|
14
|
-
def initialize(theName, theSetting)
|
15
|
-
@name = theName
|
16
|
-
@setting = theSetting
|
17
|
-
end
|
18
|
-
|
19
|
-
# Equality operator
|
20
|
-
def ==(other)
|
21
|
-
return true if object_id == other.object_id
|
22
|
-
|
23
|
-
if other.kind_of?(MatchOption)
|
24
|
-
isEqual = ((name == other.name) && (setting == other.setting))
|
25
|
-
else
|
26
|
-
isEqual = false
|
27
|
-
end
|
28
|
-
|
29
|
-
return isEqual
|
30
|
-
end
|
31
|
-
end # class
|
32
|
-
end # module
|
33
|
-
|
34
|
-
# End of file
|
@@ -1,28 +0,0 @@
|
|
1
|
-
# File: monadic_expression.rb
|
2
|
-
|
3
|
-
require_relative 'compound_expression' # Access the superclass
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# Abstract class. An element that is part of a regular expression &
|
7
|
-
# that can have up to one child sub-expression.
|
8
|
-
class MonadicExpression < CompoundExpression
|
9
|
-
# The (optional) child sub-expression
|
10
|
-
attr_reader(:child)
|
11
|
-
|
12
|
-
# Constructor.
|
13
|
-
def initialize(theChild)
|
14
|
-
super()
|
15
|
-
@child = theChild
|
16
|
-
end
|
17
|
-
|
18
|
-
protected
|
19
|
-
|
20
|
-
# Return the text representation of the child (if any)
|
21
|
-
def all_child_text()
|
22
|
-
result = child.nil? ? '' : child.to_str
|
23
|
-
|
24
|
-
return result
|
25
|
-
end
|
26
|
-
end # class
|
27
|
-
end # module
|
28
|
-
# End of file
|
@@ -1,91 +0,0 @@
|
|
1
|
-
# File: Multiplicity.rb
|
2
|
-
|
3
|
-
module SRL
|
4
|
-
module Regex # This module is used as a namespace
|
5
|
-
# The multiplicity specifies by how much a given expression can be repeated.
|
6
|
-
class Multiplicity
|
7
|
-
# The lowest acceptable repetition count
|
8
|
-
attr_reader(:lower_bound)
|
9
|
-
|
10
|
-
# The highest possible repetition count
|
11
|
-
attr_reader(:upper_bound)
|
12
|
-
|
13
|
-
# An indicator that specifies how to repeat (:greedy, :lazy, :possessive)
|
14
|
-
attr_reader(:policy)
|
15
|
-
|
16
|
-
# @param aLowerBound [Integer]
|
17
|
-
# @param anUpperBound [Integer, Symbol] integer or :more symbol
|
18
|
-
# @param aPolicy [Symbol] One of: (:greedy, :lazy, :possessive)
|
19
|
-
def initialize(aLowerBound, anUpperBound, aPolicy)
|
20
|
-
@lower_bound = valid_lower_bound(aLowerBound)
|
21
|
-
@upper_bound = valid_upper_bound(anUpperBound)
|
22
|
-
@policy = valid_policy(aPolicy)
|
23
|
-
end
|
24
|
-
|
25
|
-
# Purpose: Return the String representation of the multiplicity.
|
26
|
-
def to_str()
|
27
|
-
case upper_bound
|
28
|
-
when :more
|
29
|
-
case lower_bound
|
30
|
-
when 0
|
31
|
-
subresult = '*'
|
32
|
-
when 1
|
33
|
-
subresult = '+'
|
34
|
-
else
|
35
|
-
subresult = "{#{lower_bound},}"
|
36
|
-
end
|
37
|
-
|
38
|
-
when lower_bound
|
39
|
-
subresult = "{#{lower_bound}}"
|
40
|
-
else
|
41
|
-
if [lower_bound, upper_bound] == [0, 1]
|
42
|
-
subresult = '?'
|
43
|
-
else
|
44
|
-
subresult = "{#{lower_bound},#{upper_bound}}"
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
suffix = case policy
|
49
|
-
when :greedy
|
50
|
-
''
|
51
|
-
when :lazy
|
52
|
-
'?'
|
53
|
-
when :possessive
|
54
|
-
'+'
|
55
|
-
end
|
56
|
-
|
57
|
-
return subresult + suffix
|
58
|
-
end
|
59
|
-
|
60
|
-
private
|
61
|
-
|
62
|
-
# Validation method. Return the validated lower bound value
|
63
|
-
def valid_lower_bound(aLowerBound)
|
64
|
-
err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
|
65
|
-
raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
|
66
|
-
return aLowerBound
|
67
|
-
end
|
68
|
-
|
69
|
-
# Validation method. Return the validated lower bound value
|
70
|
-
def valid_upper_bound(anUpperBound)
|
71
|
-
err_msg = "Invalid upper bound of repetition count #{anUpperBound}"
|
72
|
-
unless anUpperBound.kind_of?(Integer) || (anUpperBound == :more)
|
73
|
-
raise StandardError, err_msg
|
74
|
-
end
|
75
|
-
|
76
|
-
return anUpperBound
|
77
|
-
end
|
78
|
-
|
79
|
-
# Validation method. Return the validated policy value.
|
80
|
-
def valid_policy(aPolicy)
|
81
|
-
err_msg = "Invalid repetition policy '#{aPolicy}'."
|
82
|
-
valid_policies = %i[greedy lazy possessive]
|
83
|
-
raise StandardError, err_msg unless valid_policies.include? aPolicy
|
84
|
-
|
85
|
-
return aPolicy
|
86
|
-
end
|
87
|
-
end # class
|
88
|
-
end # module
|
89
|
-
end # module
|
90
|
-
|
91
|
-
# End of file
|
@@ -1,27 +0,0 @@
|
|
1
|
-
# File: non_capturing_group.rb
|
2
|
-
|
3
|
-
require_relative 'monadic_expression' # Access the superclass
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# A non-capturing group, in other word it is a pure grouping
|
7
|
-
# of sub-expressions
|
8
|
-
class NonCapturingGroup < MonadicExpression
|
9
|
-
# Constructor.
|
10
|
-
# [aChildExpression] A sub-expression to match. When successful
|
11
|
-
# the matching text is assigned to the capture variable.
|
12
|
-
def initialize(aChildExpression)
|
13
|
-
super(aChildExpression)
|
14
|
-
end
|
15
|
-
|
16
|
-
protected
|
17
|
-
|
18
|
-
# Conversion method re-definition.
|
19
|
-
# Purpose: Return the String representation of the captured expression.
|
20
|
-
def text_repr()
|
21
|
-
result = '(?:' + all_child_text + ')'
|
22
|
-
return result
|
23
|
-
end
|
24
|
-
end # class
|
25
|
-
end # module
|
26
|
-
|
27
|
-
# End of file
|
@@ -1,60 +0,0 @@
|
|
1
|
-
# File: polyadic_expression.rb
|
2
|
-
|
3
|
-
require_relative 'compound_expression' # Access the superclass
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# Abstract class. An element that is part of a regular expression &
|
7
|
-
# that has its own child sub-expressions.
|
8
|
-
class PolyadicExpression < CompoundExpression
|
9
|
-
# The aggregation of child elements
|
10
|
-
attr_reader(:children)
|
11
|
-
|
12
|
-
# Constructor.
|
13
|
-
def initialize(theChildren)
|
14
|
-
super()
|
15
|
-
@children = theChildren
|
16
|
-
end
|
17
|
-
|
18
|
-
# Append the given child to the list of children.
|
19
|
-
# TODO: assess whether to defer to a subclass NAryExpression
|
20
|
-
def <<(aChild)
|
21
|
-
@children << aChild
|
22
|
-
|
23
|
-
return self
|
24
|
-
end
|
25
|
-
|
26
|
-
# Build a depth-first in-order children visitor.
|
27
|
-
# The visitor is implemented as an Enumerator.
|
28
|
-
def df_visitor()
|
29
|
-
root = children # The visit will start from the children of this object
|
30
|
-
|
31
|
-
visitor = Enumerator.new do |result| # result is a Yielder
|
32
|
-
# Initialization part: will run once
|
33
|
-
visit_stack = [root] # The LIFO queue of nodes to visit
|
34
|
-
|
35
|
-
begin # Traversal part (as a loop)
|
36
|
-
top = visit_stack.pop
|
37
|
-
if top.kind_of?(Array)
|
38
|
-
next if top.empty?
|
39
|
-
currChild = top.pop
|
40
|
-
visit_stack.push top
|
41
|
-
else
|
42
|
-
currChild = top
|
43
|
-
end
|
44
|
-
|
45
|
-
result << currChild # Return the visited child
|
46
|
-
|
47
|
-
unless currChild.atomic?
|
48
|
-
# in-order traversal implies LIFO queue
|
49
|
-
children_to_enqueue = currChild.children.reverse
|
50
|
-
visit_stack.push(children_to_enqueue)
|
51
|
-
end
|
52
|
-
end until visit_stack.empty?
|
53
|
-
end
|
54
|
-
|
55
|
-
return visitor
|
56
|
-
end
|
57
|
-
end # class
|
58
|
-
end # module
|
59
|
-
|
60
|
-
# End of file
|