srl_ruby 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +4 -0
- data/.rubocop.yml +3 -0
- data/.yardopts +6 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +66 -0
- data/Rakefile +16 -0
- data/bin/srl_ruby +58 -0
- data/lib/regex/abstract_method.rb +35 -0
- data/lib/regex/alternation.rb +27 -0
- data/lib/regex/anchor.rb +45 -0
- data/lib/regex/atomic_expression.rb +16 -0
- data/lib/regex/capturing_group.rb +51 -0
- data/lib/regex/char_class.rb +38 -0
- data/lib/regex/char_range.rb +51 -0
- data/lib/regex/char_shorthand.rb +50 -0
- data/lib/regex/character.rb +204 -0
- data/lib/regex/compound_expression.rb +57 -0
- data/lib/regex/concatenation.rb +29 -0
- data/lib/regex/expression.rb +60 -0
- data/lib/regex/lookaround.rb +50 -0
- data/lib/regex/match_option.rb +34 -0
- data/lib/regex/monadic_expression.rb +28 -0
- data/lib/regex/multiplicity.rb +91 -0
- data/lib/regex/non_capturing_group.rb +27 -0
- data/lib/regex/polyadic_expression.rb +60 -0
- data/lib/regex/quantifiable.rb +22 -0
- data/lib/regex/repetition.rb +29 -0
- data/lib/regex/wildcard.rb +23 -0
- data/lib/srl_ruby/ast_builder.rb +384 -0
- data/lib/srl_ruby/grammar.rb +106 -0
- data/lib/srl_ruby/regex_repr.rb +13 -0
- data/lib/srl_ruby/tokenizer.rb +147 -0
- data/lib/srl_ruby/version.rb +3 -0
- data/lib/srl_ruby.rb +4 -0
- data/spec/integration_spec.rb +451 -0
- data/spec/regex/character_spec.rb +166 -0
- data/spec/regex/multiplicity_spec.rb +79 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/srl_ruby/srl_ruby_spec.rb +7 -0
- data/spec/srl_ruby/tokenizer_spec.rb +147 -0
- data/srl_ruby.gemspec +58 -0
- metadata +150 -0
@@ -0,0 +1,204 @@
|
|
1
|
+
# File: character.rb
|
2
|
+
|
3
|
+
require_relative 'atomic_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# A regular expression that matches a specific character in a given character set
|
7
|
+
class Character < AtomicExpression
|
8
|
+
# Constant with all special 2-characters escape sequences
|
9
|
+
DigramSequences = {
|
10
|
+
"\\a" => 0x7, # alarm
|
11
|
+
"\\n" => 0xA, # newline
|
12
|
+
"\\r" => 0xD, # carriage return
|
13
|
+
"\\t" => 0x9, # tab
|
14
|
+
"\\e" => 0x1B, # escape
|
15
|
+
"\\f" => 0xC, # form feed
|
16
|
+
"\\v" => 0xB, # vertical feed
|
17
|
+
# Single octal digit literals
|
18
|
+
"\\0" => 0,
|
19
|
+
"\\1" => 1,
|
20
|
+
"\\2" => 2,
|
21
|
+
"\\3" => 3,
|
22
|
+
"\\4" => 4,
|
23
|
+
"\\5" => 5,
|
24
|
+
"\\6" => 6,
|
25
|
+
"\\7" => 7
|
26
|
+
}.freeze
|
27
|
+
|
28
|
+
MetaChars = '\^$+?.'.freeze
|
29
|
+
|
30
|
+
# The integer value that uniquely identifies the character.
|
31
|
+
attr_reader(:codepoint)
|
32
|
+
|
33
|
+
# The initial text representation of the character (if any).
|
34
|
+
attr_reader(:lexeme)
|
35
|
+
|
36
|
+
# Constructor.
|
37
|
+
# [aValue] Initialize the character with a either a String literal or a
|
38
|
+
# codepoint value.
|
39
|
+
# Examples:
|
40
|
+
# Initializing with codepoint value...
|
41
|
+
# RegAn::Character.new(0x3a3) # Represents: Σ
|
42
|
+
# (Unicode GREEK CAPITAL LETTER SIGMA)
|
43
|
+
# RegAn::Character.new(931) # Also represents: Σ (931 dec == 3a3 hex)
|
44
|
+
#
|
45
|
+
# Initializing with a single character string
|
46
|
+
# RegAn::Character.new(?\u03a3) # Also represents: Σ
|
47
|
+
# RegAn::Character.new('Σ') # Obviously, represents a Σ
|
48
|
+
#
|
49
|
+
# Initializing with an escape sequence string
|
50
|
+
# Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
|
51
|
+
# \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B),
|
52
|
+
# \f (form feed, 0xC)
|
53
|
+
# \uXXXX where XXXX is a 4 hex digits integer value, \u{X...}, \ooo (octal)
|
54
|
+
# \xXX (hex)
|
55
|
+
# Any other escaped character will be treated as a literal character
|
56
|
+
# RegAn::Character.new('\n') # Represents a newline
|
57
|
+
# RegAn::Character.new('\u03a3') # Represents a Σ
|
58
|
+
def initialize(aValue)
|
59
|
+
case aValue
|
60
|
+
when String
|
61
|
+
if aValue.size == 1
|
62
|
+
# Literal single character case...
|
63
|
+
@codepoint = self.class.char2codepoint(aValue)
|
64
|
+
else
|
65
|
+
# Should be an escape sequence...
|
66
|
+
@codepoint = self.class.esc2codepoint(aValue)
|
67
|
+
end
|
68
|
+
@lexeme = aValue
|
69
|
+
|
70
|
+
when Integer
|
71
|
+
@codepoint = aValue
|
72
|
+
else
|
73
|
+
raise StandardError, "Cannot initialize a Character with a '#{aValue}'."
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Convertion method that returns a character given a codepoint (integer) value.
|
78
|
+
# Example:
|
79
|
+
# RegAn::Character::codepoint2char(0x3a3) # Returns: Σ (
|
80
|
+
# The Unicode GREEK CAPITAL LETTER SIGMA)
|
81
|
+
def self.codepoint2char(aCodepoint)
|
82
|
+
return [aCodepoint].pack('U') # Remark: chr() fails with codepoints > 256
|
83
|
+
end
|
84
|
+
|
85
|
+
# Convertion method that returns the codepoint for the given single character.
|
86
|
+
# Example:
|
87
|
+
# RegAn::Character::char2codepoint('Σ') # Returns: 0x3a3
|
88
|
+
def self.char2codepoint(aChar)
|
89
|
+
return aChar.ord
|
90
|
+
end
|
91
|
+
|
92
|
+
# Convertion method that returns the codepoint for the given escape
|
93
|
+
# sequence (a String).
|
94
|
+
# Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
|
95
|
+
# \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B), \f (form feed,
|
96
|
+
# 0xC), \v (vertical feed, 0xB)
|
97
|
+
# \uXXXX where XXXX is a 4 hex digits integer value, \u{X...}, \ooo (octal)
|
98
|
+
# \xXX (hex)
|
99
|
+
# Any other escaped character will be treated as a literal character
|
100
|
+
# Example:
|
101
|
+
# RegAn::Character::esc2codepoint('\n') # Returns: 0xd
|
102
|
+
def self.esc2codepoint(anEscapeSequence)
|
103
|
+
msg = "Escape sequence #{anEscapeSequence} does not begin with a backslash (\)."
|
104
|
+
raise StandardError, msg unless anEscapeSequence[0] == "\\"
|
105
|
+
result = (anEscapeSequence.length == 2)? digram2codepoint(anEscapeSequence) : esc_number2codepoint(anEscapeSequence)
|
106
|
+
|
107
|
+
return result
|
108
|
+
end
|
109
|
+
|
110
|
+
# Return the character as a String object
|
111
|
+
def char()
|
112
|
+
self.class.codepoint2char(@codepoint)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Returns true iff this Character and parameter 'another' represent the same character.
|
116
|
+
# [another] any Object. The way the equality is tested depends on the another's class
|
117
|
+
# Example:
|
118
|
+
# newOne = Character.new(?\u03a3)
|
119
|
+
# newOne == newOne # true. Identity
|
120
|
+
# newOne == Character.new(?\u03a3) # true. Both have same codepoint
|
121
|
+
# newOne == ?\u03a3 # true. The single character String match exactly the char attribute.
|
122
|
+
# newOne == 0x03a3 # true. The Integer is compared to the codepoint value.
|
123
|
+
# Will test equality with any Object that knows the to_s method
|
124
|
+
def ==(other)
|
125
|
+
result = case other
|
126
|
+
when Character
|
127
|
+
self.to_str == other.to_str
|
128
|
+
|
129
|
+
when Integer
|
130
|
+
self.codepoint == other
|
131
|
+
|
132
|
+
when String
|
133
|
+
other.size > 1 ? false : to_str == other
|
134
|
+
|
135
|
+
else
|
136
|
+
# Unknown type: try with a convertion
|
137
|
+
self == other.to_s # Recursive call
|
138
|
+
end
|
139
|
+
|
140
|
+
return result
|
141
|
+
end
|
142
|
+
|
143
|
+
# Return a plain English description of the character
|
144
|
+
def explain()
|
145
|
+
return "the character '#{to_str}'"
|
146
|
+
end
|
147
|
+
|
148
|
+
protected
|
149
|
+
|
150
|
+
# Conversion method re-definition.
|
151
|
+
# Purpose: Return the String representation of the expression.
|
152
|
+
# If the Character was initially from a text (the lexeme), then the lexeme
|
153
|
+
# is returned back.
|
154
|
+
# Otherwise the character corresponding to the codepoint is returned.
|
155
|
+
def text_repr()
|
156
|
+
return char if lexeme.nil?
|
157
|
+
return lexeme.dup
|
158
|
+
end
|
159
|
+
|
160
|
+
# Convertion method that returns a codepoint for the given two characters
|
161
|
+
# (digram) escape sequence.
|
162
|
+
# Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
|
163
|
+
# \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B),
|
164
|
+
# \f (form feed, 0xC), \v (vertical feed, 0xB)
|
165
|
+
# Any other escape sequence will return the codepoint of the escaped
|
166
|
+
# character.
|
167
|
+
# [aDigram] A sequence of two characters that starts with a backslash.
|
168
|
+
def self.digram2codepoint(aDigram)
|
169
|
+
# Check that the digram is a special escape sequence
|
170
|
+
result = DigramSequences.fetch(aDigram, nil)
|
171
|
+
|
172
|
+
# If it not a special sequence, then escaped character is
|
173
|
+
# considered literally (the backslash is 'dummy')
|
174
|
+
result = char2codepoint(aDigram[-1]) if result.nil?
|
175
|
+
return result
|
176
|
+
end
|
177
|
+
|
178
|
+
private_class_method :digram2codepoint
|
179
|
+
|
180
|
+
# Convertion method that returns a codepoint for the given complex
|
181
|
+
# escape sequence.
|
182
|
+
# [anEscapeSequence] A String with the format:
|
183
|
+
# \uXXXX where XXXX is a 4 hex digits integer value,
|
184
|
+
# \u{X...} X 1 or more hex digits
|
185
|
+
# \ooo (1..3 octal digits literal)
|
186
|
+
# \xXX (1..2 hex digits literal)
|
187
|
+
def self.esc_number2codepoint(anEscapeSequence)
|
188
|
+
unless /^\\(?:(?:(?<prefix>[uxX])\{?(?<hexa>\h+)\}?)|(?<octal>[0-7]{1,3}))$/ =~ anEscapeSequence
|
189
|
+
raise StandardError, "Unsupported escape sequence #{anEscapeSequence}."
|
190
|
+
else
|
191
|
+
# Octal literal case?
|
192
|
+
return octal.oct if octal # shorterSeq =~ /[0-7]{1,3}/
|
193
|
+
|
194
|
+
# Extract the hexadecimal number
|
195
|
+
hexliteral = hexa # shorterSeq.sub(/^[xXu]\{?([0-9a-fA-F]+)}?$/, '\1')
|
196
|
+
return hexliteral.hex
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
private_class_method :esc_number2codepoint
|
201
|
+
end # class
|
202
|
+
end # module
|
203
|
+
|
204
|
+
# End of file
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# File: compound_expression.rb
|
2
|
+
|
3
|
+
require_relative 'expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. An element that is part of a regular expression &
|
7
|
+
# that has its own child sub-expressions.
|
8
|
+
class CompoundExpression < Expression
|
9
|
+
# Redefined method. Return false since it may have one or more children.
|
10
|
+
def atomic?
|
11
|
+
return false
|
12
|
+
end
|
13
|
+
|
14
|
+
=begin
|
15
|
+
# Build a depth-first in-order children visitor.
|
16
|
+
# The visitor is implemented as an Enumerator.
|
17
|
+
def df_visitor()
|
18
|
+
root = children # The visit will start from the children of this object
|
19
|
+
|
20
|
+
visitor = Enumerator.new do |result| # result is a Yielder
|
21
|
+
# Initialization part: will run once
|
22
|
+
visit_stack = [ root ] # The LIFO queue of nodes to visit
|
23
|
+
|
24
|
+
begin # Traversal part (as a loop)
|
25
|
+
top = visit_stack.pop()
|
26
|
+
if top.kind_of?(Array)
|
27
|
+
if top.empty?
|
28
|
+
next
|
29
|
+
else
|
30
|
+
currChild = top.pop()
|
31
|
+
visit_stack.push top
|
32
|
+
end
|
33
|
+
else
|
34
|
+
currChild = top
|
35
|
+
end
|
36
|
+
|
37
|
+
result << currChild # Return the visited child
|
38
|
+
|
39
|
+
unless currChild.atomic?
|
40
|
+
children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
|
41
|
+
visit_stack.push(children_to_enqueue)
|
42
|
+
end
|
43
|
+
end until visit_stack.empty?
|
44
|
+
end
|
45
|
+
end
|
46
|
+
=end
|
47
|
+
|
48
|
+
protected
|
49
|
+
|
50
|
+
# Abstract method. Return the text representation of the child (if any)
|
51
|
+
def all_child_text()
|
52
|
+
abstract_method
|
53
|
+
end
|
54
|
+
end # class
|
55
|
+
end # module
|
56
|
+
|
57
|
+
# End of file
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# File: concatenation.rb
|
2
|
+
|
3
|
+
require_relative 'polyadic_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. A n-ary matching operator.
|
7
|
+
# It succeeds when each child succeeds to match the subject text in the same
|
8
|
+
# serial arrangement than defined by this concatenation.
|
9
|
+
class Concatenation < PolyadicExpression
|
10
|
+
# Constructor.
|
11
|
+
def initialize(*theChildren)
|
12
|
+
super(theChildren)
|
13
|
+
end
|
14
|
+
|
15
|
+
protected
|
16
|
+
|
17
|
+
# Conversion method re-definition.
|
18
|
+
# Purpose: Return the String representation of the concatented expressions.
|
19
|
+
def text_repr()
|
20
|
+
outcome = children.inject('') do |result, aChild|
|
21
|
+
result << aChild.to_str
|
22
|
+
end
|
23
|
+
|
24
|
+
return outcome
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
|
29
|
+
# End of file
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# File: expression.rb
|
2
|
+
|
3
|
+
require_relative 'abstract_method'
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. The generalization of any valid regular (sub)expression.
|
7
|
+
class Expression
|
8
|
+
attr_accessor :begin_anchor
|
9
|
+
attr_accessor :end_anchor
|
10
|
+
|
11
|
+
# Constructor
|
12
|
+
def initialize(); end
|
13
|
+
|
14
|
+
# Abstract method. Return true iff the expression is atomic
|
15
|
+
# (= may not have any child).
|
16
|
+
def atomic?()
|
17
|
+
abstract_method
|
18
|
+
end
|
19
|
+
|
20
|
+
# Abstract method. Return the number of values that match this expression.
|
21
|
+
# [_parent_options] an Hash of matching options. They are overridden
|
22
|
+
# by options with same name that are bound to this object.
|
23
|
+
def cardinality(_parent_options)
|
24
|
+
abstract_method
|
25
|
+
end
|
26
|
+
|
27
|
+
# Determine the matching options to apply to this object, given the options
|
28
|
+
# coming from the parent
|
29
|
+
# and options that are local to this object. Local options take precedence.
|
30
|
+
# @param theParentOptions [Hash] matching options. They are overridden
|
31
|
+
# by options with same name that are bound to this object.
|
32
|
+
def options(theParentOptions)
|
33
|
+
resulting_options = theParentOptions.merge(@local_options)
|
34
|
+
return resulting_options
|
35
|
+
end
|
36
|
+
|
37
|
+
# Template method.
|
38
|
+
# Purpose: Return the String representation of the expression.
|
39
|
+
def to_str()
|
40
|
+
result = ''
|
41
|
+
result << prefix
|
42
|
+
result << text_repr
|
43
|
+
result << suffix
|
44
|
+
|
45
|
+
return result
|
46
|
+
end
|
47
|
+
|
48
|
+
protected
|
49
|
+
|
50
|
+
def prefix()
|
51
|
+
begin_anchor ? begin_anchor.to_str : ''
|
52
|
+
end
|
53
|
+
|
54
|
+
def suffix()
|
55
|
+
end_anchor ? end_anchor.to_str : ''
|
56
|
+
end
|
57
|
+
end # class
|
58
|
+
end # module
|
59
|
+
|
60
|
+
# End of file
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# File: Lookaround.rb
|
2
|
+
|
3
|
+
########################
|
4
|
+
# TODO: make it a binary expression
|
5
|
+
########################
|
6
|
+
|
7
|
+
|
8
|
+
require_relative 'polyadic_expression' # Access the superclass
|
9
|
+
|
10
|
+
module Regex # This module is used as a namespace
|
11
|
+
# Lookaround is a zero-width assertion just like the start and end of line
|
12
|
+
# anchors.
|
13
|
+
# The difference is that lookarounds will actually match characters, but only
|
14
|
+
# return the result of the match: match or no match.
|
15
|
+
# That is why they are called "assertions". They do not consume characters
|
16
|
+
# from the subject, but only assert whether a match is possible or not.
|
17
|
+
class Lookaround < PolyadicExpression
|
18
|
+
# The "direction" of the lookaround. Can be ahead or behind. It specifies
|
19
|
+
# the relative position of the expression to match compared to
|
20
|
+
# the current 'position' in the subject text.
|
21
|
+
attr_reader(:dir)
|
22
|
+
|
23
|
+
# The kind indicates whether the assertion is positive
|
24
|
+
# (succeeds when there is a match) or negative
|
25
|
+
# (assertion succeeds when there is NO match).
|
26
|
+
attr_reader(:kind)
|
27
|
+
|
28
|
+
# Constructor.
|
29
|
+
# [assertedExpression] A sub-expression to match.
|
30
|
+
# [theDir] One of the following values: [ :ahead, :behind ]
|
31
|
+
# [theKind] One of the following values: [ :positive, :negative ]
|
32
|
+
def initialize(assertedExpression, theDir, theKind)
|
33
|
+
super([assertedExpression])
|
34
|
+
@dir = theDir
|
35
|
+
@kind = theKind
|
36
|
+
end
|
37
|
+
|
38
|
+
# Conversion method re-definition.
|
39
|
+
# Purpose: Return the String representation of the captured expression.
|
40
|
+
def to_str()
|
41
|
+
result = children[0].to_str
|
42
|
+
dir_syntax = (dir == :ahead) ? '' : '<'
|
43
|
+
kind_syntax = (kind == :positive) ? '=' : '!'
|
44
|
+
result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ')'
|
45
|
+
return result
|
46
|
+
end
|
47
|
+
end # class
|
48
|
+
end # module
|
49
|
+
|
50
|
+
# End of file
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# File: MatchOption.rb
|
2
|
+
|
3
|
+
module Regex # This module is used as a namespace
|
4
|
+
# Represents an option that influences the way a regular (sub)expression
|
5
|
+
# can perform its matching.
|
6
|
+
class MatchOption
|
7
|
+
# The symbolic name of the option
|
8
|
+
attr_reader(:name)
|
9
|
+
|
10
|
+
# An indicator that tells whether the option is turned on or off
|
11
|
+
attr_reader(:setting)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(theName, theSetting)
|
15
|
+
@name = theName
|
16
|
+
@setting = theSetting
|
17
|
+
end
|
18
|
+
|
19
|
+
# Equality operator
|
20
|
+
def ==(other)
|
21
|
+
return true if object_id == other.object_id
|
22
|
+
|
23
|
+
if other.kind_of?(MatchOption)
|
24
|
+
isEqual = ((name == other.name) && (setting == other.setting))
|
25
|
+
else
|
26
|
+
isEqual = false
|
27
|
+
end
|
28
|
+
|
29
|
+
return isEqual
|
30
|
+
end
|
31
|
+
end # class
|
32
|
+
end # module
|
33
|
+
|
34
|
+
# End of file
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# File: monadic_expression.rb
|
2
|
+
|
3
|
+
require_relative 'compound_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. An element that is part of a regular expression &
|
7
|
+
# that can have up to one child sub-expression.
|
8
|
+
class MonadicExpression < CompoundExpression
|
9
|
+
# The (optional) child sub-expression
|
10
|
+
attr_reader(:child)
|
11
|
+
|
12
|
+
# Constructor.
|
13
|
+
def initialize(theChild)
|
14
|
+
super()
|
15
|
+
@child = theChild
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
|
20
|
+
# Return the text representation of the child (if any)
|
21
|
+
def all_child_text()
|
22
|
+
result = child.nil? ? '' : child.to_str
|
23
|
+
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
# End of file
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# File: Multiplicity.rb
|
2
|
+
|
3
|
+
module SRL
|
4
|
+
module Regex # This module is used as a namespace
|
5
|
+
# The multiplicity specifies by how much a given expression can be repeated.
|
6
|
+
class Multiplicity
|
7
|
+
# The lowest acceptable repetition count
|
8
|
+
attr_reader(:lower_bound)
|
9
|
+
|
10
|
+
# The highest possible repetition count
|
11
|
+
attr_reader(:upper_bound)
|
12
|
+
|
13
|
+
# An indicator that specifies how to repeat (:greedy, :lazy, :possessive)
|
14
|
+
attr_reader(:policy)
|
15
|
+
|
16
|
+
# @param aLowerBound [Integer]
|
17
|
+
# @param anUpperBound [Integer, Symbol] integer or :more symbol
|
18
|
+
# @param aPolicy [Symbol] One of: (:greedy, :lazy, :possessive)
|
19
|
+
def initialize(aLowerBound, anUpperBound, aPolicy)
|
20
|
+
@lower_bound = valid_lower_bound(aLowerBound)
|
21
|
+
@upper_bound = valid_upper_bound(anUpperBound)
|
22
|
+
@policy = valid_policy(aPolicy)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Purpose: Return the String representation of the multiplicity.
|
26
|
+
def to_str()
|
27
|
+
case upper_bound
|
28
|
+
when :more
|
29
|
+
case lower_bound
|
30
|
+
when 0
|
31
|
+
subresult = '*'
|
32
|
+
when 1
|
33
|
+
subresult = '+'
|
34
|
+
else
|
35
|
+
subresult = "{#{lower_bound},}"
|
36
|
+
end
|
37
|
+
|
38
|
+
when lower_bound
|
39
|
+
subresult = "{#{lower_bound}}"
|
40
|
+
else
|
41
|
+
if [lower_bound, upper_bound] == [0, 1]
|
42
|
+
subresult = '?'
|
43
|
+
else
|
44
|
+
subresult = "{#{lower_bound},#{upper_bound}}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
suffix = case policy
|
49
|
+
when :greedy
|
50
|
+
''
|
51
|
+
when :lazy
|
52
|
+
'?'
|
53
|
+
when :possessive
|
54
|
+
'+'
|
55
|
+
end
|
56
|
+
|
57
|
+
return subresult + suffix
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
# Validation method. Return the validated lower bound value
|
63
|
+
def valid_lower_bound(aLowerBound)
|
64
|
+
err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
|
65
|
+
raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
|
66
|
+
return aLowerBound
|
67
|
+
end
|
68
|
+
|
69
|
+
# Validation method. Return the validated lower bound value
|
70
|
+
def valid_upper_bound(anUpperBound)
|
71
|
+
err_msg = "Invalid upper bound of repetition count #{anUpperBound}"
|
72
|
+
unless anUpperBound.kind_of?(Integer) || (anUpperBound == :more)
|
73
|
+
raise StandardError, err_msg
|
74
|
+
end
|
75
|
+
|
76
|
+
return anUpperBound
|
77
|
+
end
|
78
|
+
|
79
|
+
# Validation method. Return the validated policy value.
|
80
|
+
def valid_policy(aPolicy)
|
81
|
+
err_msg = "Invalid repetition policy '#{aPolicy}'."
|
82
|
+
valid_policies = %i[greedy lazy possessive]
|
83
|
+
raise StandardError, err_msg unless valid_policies.include? aPolicy
|
84
|
+
|
85
|
+
return aPolicy
|
86
|
+
end
|
87
|
+
end # class
|
88
|
+
end # module
|
89
|
+
end # module
|
90
|
+
|
91
|
+
# End of file
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# File: non_capturing_group.rb
|
2
|
+
|
3
|
+
require_relative 'monadic_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# A non-capturing group, in other word it is a pure grouping
|
7
|
+
# of sub-expressions
|
8
|
+
class NonCapturingGroup < MonadicExpression
|
9
|
+
# Constructor.
|
10
|
+
# [aChildExpression] A sub-expression to match. When successful
|
11
|
+
# the matching text is assigned to the capture variable.
|
12
|
+
def initialize(aChildExpression)
|
13
|
+
super(aChildExpression)
|
14
|
+
end
|
15
|
+
|
16
|
+
protected
|
17
|
+
|
18
|
+
# Conversion method re-definition.
|
19
|
+
# Purpose: Return the String representation of the captured expression.
|
20
|
+
def text_repr()
|
21
|
+
result = '(?:' + all_child_text + ')'
|
22
|
+
return result
|
23
|
+
end
|
24
|
+
end # class
|
25
|
+
end # module
|
26
|
+
|
27
|
+
# End of file
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# File: polyadic_expression.rb
|
2
|
+
|
3
|
+
require_relative 'compound_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. An element that is part of a regular expression &
|
7
|
+
# that has its own child sub-expressions.
|
8
|
+
class PolyadicExpression < CompoundExpression
|
9
|
+
# The aggregation of child elements
|
10
|
+
attr_reader(:children)
|
11
|
+
|
12
|
+
# Constructor.
|
13
|
+
def initialize(theChildren)
|
14
|
+
super()
|
15
|
+
@children = theChildren
|
16
|
+
end
|
17
|
+
|
18
|
+
# Append the given child to the list of children.
|
19
|
+
# TODO: assess whether to defer to a subclass NAryExpression
|
20
|
+
def <<(aChild)
|
21
|
+
@children << aChild
|
22
|
+
|
23
|
+
return self
|
24
|
+
end
|
25
|
+
|
26
|
+
# Build a depth-first in-order children visitor.
|
27
|
+
# The visitor is implemented as an Enumerator.
|
28
|
+
def df_visitor()
|
29
|
+
root = children # The visit will start from the children of this object
|
30
|
+
|
31
|
+
visitor = Enumerator.new do |result| # result is a Yielder
|
32
|
+
# Initialization part: will run once
|
33
|
+
visit_stack = [root] # The LIFO queue of nodes to visit
|
34
|
+
|
35
|
+
begin # Traversal part (as a loop)
|
36
|
+
top = visit_stack.pop
|
37
|
+
if top.kind_of?(Array)
|
38
|
+
next if top.empty?
|
39
|
+
currChild = top.pop
|
40
|
+
visit_stack.push top
|
41
|
+
else
|
42
|
+
currChild = top
|
43
|
+
end
|
44
|
+
|
45
|
+
result << currChild # Return the visited child
|
46
|
+
|
47
|
+
unless currChild.atomic?
|
48
|
+
# in-order traversal implies LIFO queue
|
49
|
+
children_to_enqueue = currChild.children.reverse
|
50
|
+
visit_stack.push(children_to_enqueue)
|
51
|
+
end
|
52
|
+
end until visit_stack.empty?
|
53
|
+
end
|
54
|
+
|
55
|
+
return visitor
|
56
|
+
end
|
57
|
+
end # class
|
58
|
+
end # module
|
59
|
+
|
60
|
+
# End of file
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# File: quantifiable.rb
|
2
|
+
|
3
|
+
require_relative 'multiplicity'
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
module Quantifiable
|
7
|
+
# Redefined method. Return true since it may not have any child.
|
8
|
+
def quantified?
|
9
|
+
return @quantifier.nil? ? false : true
|
10
|
+
end
|
11
|
+
|
12
|
+
def quantifier
|
13
|
+
@quantifier
|
14
|
+
end
|
15
|
+
|
16
|
+
def quantifier=(aQuantifier)
|
17
|
+
@quantifier = aQuantifier
|
18
|
+
end
|
19
|
+
end # module
|
20
|
+
end # module
|
21
|
+
|
22
|
+
# End of file
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# File: repetition.rb
|
2
|
+
|
3
|
+
require_relative 'monadic_expression' # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. An unary matching operator.
|
7
|
+
# It succeeds when the specified repetition of the child expression
|
8
|
+
# succeeds to match the subject text in the same serial arrangement
|
9
|
+
class Repetition < MonadicExpression
|
10
|
+
attr_reader(:multiplicity)
|
11
|
+
|
12
|
+
# Constructor.
|
13
|
+
def initialize(childExpressionToRepeat, aMultiplicity)
|
14
|
+
super(childExpressionToRepeat)
|
15
|
+
@multiplicity = aMultiplicity
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
|
20
|
+
# Conversion method re-definition.
|
21
|
+
# Purpose: Return the String representation of the concatented expressions.
|
22
|
+
def text_repr()
|
23
|
+
result = all_child_text + multiplicity.to_str
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
|
29
|
+
# End of file
|