rley 0.5.08 → 0.5.09

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c616b691fb51ba2eb00a25fee75ff4a80093990
4
- data.tar.gz: 1039cfe8f29c8d1ec7c88fa83c18f9173763b8f2
3
+ metadata.gz: d95e1ed6a38d8e1ed70e456f46c812275a1d4530
4
+ data.tar.gz: fcf7b54ff98d107fa38239139db10e7bbf1f7825
5
5
  SHA512:
6
- metadata.gz: df7412344421bd421fb459fe5cf8053618dea1212c4da27e83cf41225dbaf664d9b143499978e6bcef2ae293a7bf9378d3ecb4867f989553f798e9723ba8344b
7
- data.tar.gz: 436474ceafd2689137fab890b19ca24715ebe72dd1311b3ad64313bc130cf8bbce12fe35049008d20a89634309cbac882da70bc891522d45a58e8ce310b466a7
6
+ metadata.gz: e185aa4e7ca59e5995f6e87379efd4f921d4242307abd55718e46bf346e7414c487b56f753f446d8518ffab74e783774aaa2adc9b7fcf8a014fdba96b8da090d
7
+ data.tar.gz: 5c990777d79432c813c92dda0b21e7af3dcf587485b868e9723d30e3d1c1b3b2d6e1bc8dc7235fb1ba32c1b27ddeb6f829ef3e1b7b9248d5be1199f27e153849
@@ -1,4 +1,10 @@
1
- ### 0.5.08 / 2017-11-xx
1
+ ### 0.5.09 / 2017-12-02
2
+ * [CHANGE] Dir `examples/general/SRL/ Added support for letter range to Simple Regex Language parser.
3
+
4
+ ### 0.5.08 / 2017-11-28
5
+ * [NEW] Dir `examples/general/SRL/ Added an initial version of the Simple Regex Language parser.
6
+ Supports the SRL quantifier syntax only.
7
+ * [FIX] Method `ParseTreeBuilder::place_TOS_child` was sometimes fooled when argument `aNode` was nil.
2
8
  * [FIX] Method `BaseParser::initialize` missing parameter name in doc caused a YARD warning.
3
9
  * [FIX] Method `GrmItemsBuilder::build_dotted_items` missing parameter name in doc caused a YARD warning.
4
10
  * [FIX] Method `NonTerminalVertex::initialize` missing parameter name in doc caused a YARD warning.
@@ -35,9 +35,30 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
35
35
  # @param theChildren [Array] Children nodes (one per rhs symbol)
36
36
  def new_parent_node(aProduction, aRange, theTokens, theChildren)
37
37
  node = case aProduction.name
38
- when 'srl_0' # rule 'srl' => 'quantifier'
38
+ when 'srl_0' # rule 'srl' => 'term'
39
39
  return_first_child(aRange, theTokens, theChildren)
40
40
 
41
+ when 'term_0' # rule 'term' => 'atom'
42
+ return_first_child(aRange, theTokens, theChildren)
43
+
44
+ when 'term_1' # rule 'term' => %w[atom quantifier]
45
+ reduce_term_1(aProduction, aRange, theTokens, theChildren)
46
+
47
+ when 'atom_0' #rule 'atom' => 'letter_range'
48
+ return_first_child(aRange, theTokens, theChildren)
49
+
50
+ when 'letter_range_0' # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
51
+ reduce_letter_range_0(aProduction, aRange, theTokens, theChildren)
52
+
53
+ when 'letter_range_1' #rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
54
+ reduce_letter_range_1(aProduction, aRange, theTokens, theChildren)
55
+
56
+ when 'letter_range_2' # rule 'letter_range' => 'LETTER'
57
+ reduce_letter_range_2(aProduction, aRange, theTokens, theChildren)
58
+
59
+ when 'letter_range_3' # rule 'letter_range' => %w[UPPERCASE LETTER]
60
+ reduce_letter_range_3(aProduction, aRange, theTokens, theChildren)
61
+
41
62
  when 'quantifier_0' # rule 'quantifier' => 'ONCE'
42
63
  multiplicity(1, 1)
43
64
 
@@ -78,99 +99,74 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
78
99
  return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
79
100
  end
80
101
 
81
- # rule 'quantifier' => %w[EXACTLY count TIMES]
82
- def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
83
- count = theChildren[1].token.lexeme.to_i
84
- multiplicity(count, count)
85
- end
86
-
87
- # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
88
- def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
89
- upper = theChildren[3].token.lexeme.to_i
90
- # lower = theChildren[1].token.lexeme.to_i
91
- multiplicity(3, upper)
92
- end
93
-
94
- # rule 'quantifier' => %w[AT LEAST count TIMES]
95
- def reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
96
- count = theChildren[2].token.lexeme.to_i
97
- multiplicity(count, :more)
102
+ def char_range(lowerBound, upperBound)
103
+ # TODO fix module nesting
104
+ lower = Regex::Character.new(lowerBound)
105
+ upper = Regex::Character.new(upperBound)
106
+ return Regex::CharRange.new(lower, upper)
98
107
  end
99
-
100
-
101
- =begin
102
- def reduce_binary_operator(theChildren)
103
- operator_node = theChildren[1]
104
- operator_node.children << theChildren[0]
105
- operator_node.children << theChildren[2]
106
- return operator_node
108
+
109
+ def char_class(toNegate, *theChildren)
110
+ Regex::CharClass.new(toNegate, *theChildren)
107
111
  end
108
-
109
- # rule 'simple_expression' => %w[simple_expression add_operator term]
110
- def reduce_simple_expression_1(_production, _range, _tokens, theChildren)
111
- reduce_binary_operator(theChildren)
112
+
113
+ def repetition(expressionToRepeat, aMultiplicity)
114
+ return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
112
115
  end
113
116
 
114
- # rule 'term' => %w[term mul_operator factor]
115
- def reduce_term_1(_production, _range, _tokens, theChildren)
116
- reduce_binary_operator(theChildren)
117
+ # rule 'term' => %w[atom quantifier]
118
+ def reduce_term_1(aProduction, aRange, theTokens, theChildren)
119
+ quantifier = theChildren.last
120
+ atom = theChildren.first
121
+ repetition(atom, quantifier)
117
122
  end
118
123
 
119
- # rule 'factor' => %w[simple_factor POWER simple_factor]]
120
- def reduce_factor_1(aProduction, aRange, theTokens, theChildren)
121
- result = PowerNode.new(theChildren[1].symbol, aRange)
122
- result.children << theChildren[0]
123
- result.children << theChildren[2]
124
-
125
- return result
124
+ # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
125
+ def reduce_letter_range_0(aProduction, aRange, theTokens, theChildren)
126
+ lower = theChildren[2].token.lexeme
127
+ upper = theChildren[4].token.lexeme
128
+ ch_range = char_range(lower, upper)
129
+ char_class(false, ch_range)
126
130
  end
127
-
128
- # rule 'simple_factor' => %[sign scalar]
129
- def reduce_simple_factor_0(aProduction, aRange, theTokens, theChildren)
130
- first_child = theChildren[0]
131
- result = if first_child.kind_of?(CalcNegateNode)
132
- -theChildren[1]
133
- else
134
- theChildren[1]
135
- end
136
-
137
- return result
131
+
132
+ # rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
133
+ def reduce_letter_range_1(aProduction, aRange, theTokens, theChildren)
134
+ lower = theChildren[3].token.lexeme
135
+ upper = theChildren[5].token.lexeme
136
+ ch_range = char_range(lower.upcase, upper.upcase)
137
+ char_class(false, ch_range)
138
138
  end
139
-
140
- # rule 'simple_factor' => %w[unary_function in_parenthesis]
141
- def reduce_simple_factor_1(aProduction, aRange, theTokens, theChildren)
142
- func = CalcUnaryFunction.new(theChildren[0].symbol, aRange.low)
143
- func.func_name = theChildren[0].value
144
- func.children << theChildren[1]
145
- return func
139
+
140
+ # rule 'letter_range' => 'LETTER'
141
+ def reduce_letter_range_2(aProduction, aRange, theTokens, theChildren)
142
+ ch_range = char_range('a', 'z')
143
+ char_class(false, ch_range)
146
144
  end
147
-
148
- # rule 'simple_factor' => %w[MINUS in_parenthesis]
149
- def reduce_simple_factor_2(aProduction, aRange, theTokens, theChildren)
150
- negation = CalcNegateNode.new(theChildren[0].symbol, aRange.low)
151
- negation.children << theChildren[1]
152
- return negation
145
+
146
+ #rule 'letter_range' => %w[UPPERCASE LETTER]
147
+ def reduce_letter_range_3(aProduction, aRange, theTokens, theChildren)
148
+ ch_range = char_range('A', 'Z')
149
+ char_class(false, ch_range)
153
150
  end
154
151
 
155
- # rule 'add_operator' => 'PLUS'
156
- def reduce_add_operator_0(_production, aRange, _tokens, theChildren)
157
- return CalcAddNode.new(theChildren[0].symbol, aRange)
152
+ # rule 'quantifier' => %w[EXACTLY count TIMES]
153
+ def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
154
+ count = theChildren[1].token.lexeme.to_i
155
+ multiplicity(count, count)
158
156
  end
159
157
 
160
- # rule 'add_operator' => 'MINUS'
161
- def reduce_add_operator_1(_production, aRange, _tokens, theChildren)
162
- return CalcSubtractNode.new(theChildren[0].symbol, aRange)
158
+ # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
159
+ def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
160
+ lower = theChildren[1].token.lexeme.to_i
161
+ upper = theChildren[3].token.lexeme.to_i
162
+ multiplicity(lower, upper)
163
163
  end
164
164
 
165
- # rule 'mul_operator' => 'STAR'
166
- def reduce_mul_operator_0(_production, aRange, _tokens, theChildren)
167
- return CalcMultiplyNode.new(theChildren[0].symbol, aRange)
165
+ # rule 'quantifier' => %w[AT LEAST count TIMES]
166
+ def reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
167
+ count = theChildren[2].token.lexeme.to_i
168
+ multiplicity(count, :more)
168
169
  end
169
170
 
170
- # rule 'mul_operator' => 'DIVIDE'
171
- def reduce_mul_operator_1(_production, aRange, _tokens, theChildren)
172
- return CalcDivideNode.new(theChildren[0].symbol, aRange)
173
- end
174
- =end
175
171
  end # class
176
172
  # End of file
@@ -6,13 +6,21 @@ module SRL
6
6
  # This is a very partial grammar of SRL.
7
7
  # It will be expanded with the coming versions of Rley
8
8
  builder = Rley::Syntax::GrammarBuilder.new do
9
- add_terminals('DIGIT', 'INTEGER')
9
+ add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
10
+ add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
10
11
  add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
11
12
  add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
12
13
  add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
13
14
 
14
15
  # For the moment one focuses on quantifier syntax only...
15
- rule 'srl' => 'quantifier'
16
+ rule 'srl' => 'term'
17
+ rule 'term' => 'atom'
18
+ rule 'term' => %w[atom quantifier]
19
+ rule 'atom' => 'letter_range'
20
+ rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
21
+ rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
22
+ rule 'letter_range' => 'LETTER'
23
+ rule 'letter_range' => %w[UPPERCASE LETTER]
16
24
  rule 'quantifier' => 'ONCE'
17
25
  rule 'quantifier' => 'TWICE'
18
26
  rule 'quantifier' => %w[EXACTLY count TIMES]
@@ -21,7 +29,7 @@ module SRL
21
29
  rule 'quantifier' => %w[ONCE OR MORE]
22
30
  rule 'quantifier' => %w[NEVER OR MORE]
23
31
  rule 'quantifier' => %w[AT LEAST count TIMES]
24
- rule 'count' => 'DIGIT'
32
+ rule 'count' => 'DIGIT_LIT'
25
33
  rule 'count' => 'INTEGER'
26
34
  rule 'times_suffix' => 'TIMES'
27
35
  rule 'times_suffix' => []
@@ -0,0 +1,35 @@
1
+ # File: abstract_method.rb
2
+
3
+ # Mix-in module. Provides the method 'abstract_method' that raises an exception
4
+ # with an appropriate message when called.
5
+ module AbstractMethod
6
+ public
7
+
8
+ # Call this method in the body of your abstract methods.
9
+ # Example:
10
+ # require 'AbstractMethod'
11
+ # class SomeClass
12
+ # include AbstractMethod # To add the behaviour from the mix-in module AbstractMethod
13
+ # ...
14
+ # Consider that SomeClass has an abstract method called 'some_method'
15
+ #
16
+ # def some_method() abstract_method
17
+ # end
18
+ def abstract_method()
19
+ # Determine the short class name of self
20
+ className = self.class.name.split(/::/).last
21
+
22
+ # Retrieve the top text line of the call stack
23
+ top_line = caller.first
24
+
25
+ # Extract the calling method name
26
+ callerNameInQuotes = top_line.scan(/`.+?$/).first
27
+ callerName = callerNameInQuotes.gsub(/`|'/, '') # Remove enclosing quotes
28
+
29
+ # Build the error message
30
+ error_message = "The method #{className}##{callerName} is abstract. It should be implemented in subclasses of #{className}."
31
+ raise NotImplementedError, error_message
32
+ end
33
+ end # module
34
+
35
+ # End of file
@@ -0,0 +1,21 @@
1
+ # File: atomic_expression.rb
2
+
3
+ require_relative "expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. A valid regular expression that
8
+ # cannot be further decomposed into sub-expressions.
9
+ class AtomicExpression < Expression
10
+
11
+ public
12
+ # Redefined method. Return true since it may not have any child.
13
+ def atomic?
14
+ return true
15
+ end
16
+
17
+ end # class
18
+
19
+ end # module
20
+
21
+ # End of file
@@ -0,0 +1,34 @@
1
+ # File: char_class.rb
2
+
3
+ require_relative "polyadic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. A n-ary matching operator.
8
+ # It succeeds when one child expression succeeds to match the subject text
9
+ # than defined by this concatenation.
10
+ class CharClass < PolyadicExpression
11
+ # A flag that indicates whether the character is negated
12
+ attr_reader(:negated)
13
+
14
+ # Constructor.
15
+ def initialize(to_negate,*theChildren)
16
+ super(theChildren)
17
+ @negated = to_negate
18
+ end
19
+
20
+ public
21
+ # Conversion method re-definition.
22
+ # Purpose: Return the String representation of the concatented expressions.
23
+ def to_str()
24
+ result_children = children.inject('') { |subResult, aChild| subResult << aChild.to_str() }
25
+ result = '['+ (negated ? '^' : '') + result_children + ']'
26
+
27
+ return result
28
+ end
29
+
30
+ end # class
31
+
32
+ end # module
33
+
34
+ # End of file
@@ -0,0 +1,50 @@
1
+ # File: char_range.rb
2
+
3
+ require_relative 'polyadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # A binary expression that represents a contiguous range of characters.
8
+ # Assumption: characters are ordered by codepoint
9
+ class CharRange < PolyadicExpression
10
+
11
+ # Constructor.
12
+ # [thelowerBound] A character that will be the lower bound value for the range.
13
+ # [theUpperBound] A character that will be the upper bound value for the range.
14
+ # TODO: optimisation. Build a Character if lower bound == upper bound.
15
+ def initialize(theLowerBound, theUpperBound)
16
+ range = validated_range(theLowerBound, theUpperBound)
17
+ super(range)
18
+ end
19
+
20
+ public
21
+ # Return the lower bound of the range.
22
+ def lower()
23
+ return children.first
24
+ end
25
+
26
+ # Return the upper bound of the range.
27
+ def upper()
28
+ return children.last
29
+ end
30
+
31
+ # Conversion method re-definition.
32
+ # Purpose: Return the String representation of the concatented expressions.
33
+ def to_str()
34
+ result = lower.to_str() + '-' + upper.to_str()
35
+
36
+ return result
37
+ end
38
+
39
+ private
40
+ # Validation method. Returns a couple of Characters.after their validation.
41
+ def validated_range(theLowerBound, theUpperBound)
42
+ raise StandardError, "Character range error: lower bound is greater than upper bound." if theLowerBound.codepoint > theUpperBound.codepoint
43
+ return [theLowerBound, theUpperBound]
44
+ end
45
+
46
+ end # class
47
+
48
+ end # module
49
+
50
+ # End of file
@@ -0,0 +1,195 @@
1
+ # File: character.rb
2
+
3
+ require_relative 'atomic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # A regular expression that matches a specific character in a given character set
8
+ class Character < AtomicExpression
9
+ # Constant with all special 2-characters escape sequences
10
+ DigramSequences = {
11
+ "\\a" => 0x7, # alarm
12
+ "\\n" => 0xA, # newline
13
+ "\\r" => 0xD, # carriage return
14
+ "\\t" => 0x9, # tab
15
+ "\\e" => 0x1B, # escape
16
+ "\\f" => 0xC, # form feed
17
+ "\\v" => 0xB, # vertical feed
18
+ # Single octal digit literals
19
+ "\\0" => 0,
20
+ "\\1" => 1,
21
+ "\\2" => 2,
22
+ "\\3" => 3,
23
+ "\\4" => 4,
24
+ "\\5" => 5,
25
+ "\\6" => 6,
26
+ "\\7" => 7
27
+ }
28
+
29
+ # The integer value that uniquely identifies the character.
30
+ attr_reader(:codepoint)
31
+
32
+ # The initial text representation of the character (if any).
33
+ attr_reader(:lexeme)
34
+
35
+ # Constructor.
36
+ # [aValue] Initialize the character with a either a String literal or a codepoint value.
37
+ # Examples:
38
+ # Initializing with codepoint value...
39
+ # RegAn::Character.new(0x3a3) # Represents: Σ (Unicode GREEK CAPITAL LETTER SIGMA)
40
+ # RegAn::Character.new(931) # Also represents: Σ (931 dec == 3a3 hex)
41
+ #
42
+ # Initializing with a single character string
43
+ # RegAn::Character.new(?\u03a3) # Also represents: Σ
44
+ # RegAn::Character.new('Σ') # Obviously, represents a Σ
45
+ #
46
+ # Initializing with an escape sequence string
47
+ # Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
48
+ # \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B), \f (form feed, 0xC)
49
+ # \uXXXX where XXXX is a 4 hex digits integer value, \u{X...}, \ooo (octal) \xXX (hex)
50
+ # Any other escaped character will be treated as a literal character
51
+ # RegAn::Character.new('\n') # Represents a newline
52
+ # RegAn::Character.new('\u03a3') # Represents a Σ
53
+ def initialize(aValue)
54
+
55
+ case aValue
56
+ when String
57
+ if aValue.size == 1
58
+ # Literal single character case...
59
+ @codepoint = self.class.char2codepoint(aValue)
60
+ else
61
+ # Should be an escape sequence...
62
+ @codepoint = self.class.esc2codepoint(aValue)
63
+ end
64
+ @lexeme = aValue
65
+
66
+ when Fixnum
67
+ @codepoint = aValue
68
+ else
69
+ raise StandardError, "Cannot initialize a Character with a '#{aValue}'."
70
+ end
71
+ end
72
+
73
+ public
74
+ # Convertion method that returns a character given a codepoint (integer) value.
75
+ # Example:
76
+ # RegAn::Character::codepoint2char(0x3a3) # Returns: Σ (The Unicode GREEK CAPITAL LETTER SIGMA)
77
+ def self.codepoint2char(aCodepoint)
78
+ return [aCodepoint].pack('U') # Remark: chr() fails with codepoints > 256
79
+ end
80
+
81
+ # Convertion method that returns the codepoint for the given single character.
82
+ # Example:
83
+ # RegAn::Character::char2codepoint('Σ') # Returns: 0x3a3
84
+ def self.char2codepoint(aChar)
85
+ return aChar.ord()
86
+ end
87
+
88
+ # Convertion method that returns the codepoint for the given escape sequence (a String).
89
+ # Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
90
+ # \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B), \f (form feed, 0xC), \v (vertical feed, 0xB)
91
+ # \uXXXX where XXXX is a 4 hex digits integer value, \u{X...}, \ooo (octal) \xXX (hex)
92
+ # Any other escaped character will be treated as a literal character
93
+ # Example:
94
+ # RegAn::Character::esc2codepoint('\n') # Returns: 0xd
95
+ def self.esc2codepoint(anEscapeSequence)
96
+ raise StandardError, "Escape sequence #{anEscapeSequence} does not begin with a backslash (\)." unless anEscapeSequence[0] == "\\"
97
+ result = (anEscapeSequence.length == 2)? digram2codepoint(anEscapeSequence) : esc_number2codepoint(anEscapeSequence)
98
+
99
+ return result
100
+ end
101
+
102
+ # Return the character as a String object
103
+ def char()
104
+ self.class.codepoint2char(@codepoint)
105
+ end
106
+
107
+ # Conversion method re-definition.
108
+ # Purpose: Return the String representation of the expression.
109
+ # If the Character was initially from a text (the lexeme), then the lexeme is returned back.
110
+ # Otherwise the character corresponding to the codepoint is returned.
111
+ def to_str()
112
+ if lexeme.nil?
113
+ result = char()
114
+ else
115
+ result = lexeme.dup()
116
+ end
117
+
118
+ return result
119
+ end
120
+
121
+ # Returns true iff this Character and parameter 'another' represent the same character.
122
+ # [another] any Object. The way the equality is tested depends on the another's class
123
+ # Example:
124
+ # newOne = Character.new(?\u03a3)
125
+ # newOne == newOne # true. Identity
126
+ # newOne == Character.new(?\u03a3) # true. Both have same codepoint
127
+ # newOne == ?\u03a3 # true. The single character String match exactly the char attribute.
128
+ # newOne == 0x03a3 # true. The Fixnum is compared to the codepoint value.
129
+ # Will test equality with any Object that knows the to_s method
130
+ def ==(another)
131
+ result = case another
132
+ when Character
133
+ self.to_str == another.to_str
134
+
135
+ when Fixnum
136
+ self.codepoint == another
137
+
138
+ when String
139
+ (another.size > 1) ? false : self.to_str == another
140
+
141
+ else
142
+ # Unknown type: try with a convertion
143
+ self == another.to_s() # Recursive call
144
+ end
145
+
146
+ return result
147
+ end
148
+
149
+ # Return a plain English description of the character
150
+ def explain()
151
+ return "the character '#{to_str()}'"
152
+ end
153
+
154
+ private
155
+ # Convertion method that returns a codepoint for the given two characters (digram) escape sequence.
156
+ # Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
157
+ # \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B), \f (form feed, 0xC), \v (vertical feed, 0xB)
158
+ # Any other escape sequence will return the codepoint of the escaped character.
159
+ # [aDigram] A sequence of two characters that starts with a backslash.
160
+ def self.digram2codepoint(aDigram)
161
+ # Check that the digram is a special escape sequence
162
+ result = DigramSequences.fetch(aDigram, nil)
163
+
164
+ # If it not a special sequence, then escaped character is considered literally (the backslash is 'dummy')
165
+ result = char2codepoint(aDigram[-1]) if result.nil?
166
+ return result
167
+ end
168
+
169
+ # Convertion method that returns a codepoint for the given complex escape sequence.
170
+ # [anEscapeSequence] A String with the format:
171
+ # \uXXXX where XXXX is a 4 hex digits integer value,
172
+ # \u{X...} X 1 or more hex digits
173
+ # \ooo (1..3 octal digits literal)
174
+ # \xXX (1..2 hex digits literal)
175
+ def self.esc_number2codepoint(anEscapeSequence)
176
+ # Next line requires Ruby >= 1.9
177
+ unless /^\\(?:(?:(?<prefix>[uxX])\{?(?<hexa>\h+)\}?)|(?<octal>[0-7]{1,3}))$/ =~ anEscapeSequence
178
+ raise StandardError, "Unsupported escape sequence #{anEscapeSequence}."
179
+ else
180
+ #shorterSeq = anEscapeSequence[1..-1] # Remove the backslash
181
+
182
+ # Octal literal case?
183
+ return octal.oct() if octal # shorterSeq =~ /[0-7]{1,3}/
184
+
185
+ # Extract the hexadecimal number
186
+ hexliteral = hexa # shorterSeq.sub(/^[xXu]\{?([0-9a-fA-F]+)}?$/, '\1')
187
+ return hexliteral.hex()
188
+ end
189
+ end
190
+
191
+ end # class
192
+
193
+ end # module
194
+
195
+ # End of file