rley 0.5.08 → 0.5.09

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c616b691fb51ba2eb00a25fee75ff4a80093990
4
- data.tar.gz: 1039cfe8f29c8d1ec7c88fa83c18f9173763b8f2
3
+ metadata.gz: d95e1ed6a38d8e1ed70e456f46c812275a1d4530
4
+ data.tar.gz: fcf7b54ff98d107fa38239139db10e7bbf1f7825
5
5
  SHA512:
6
- metadata.gz: df7412344421bd421fb459fe5cf8053618dea1212c4da27e83cf41225dbaf664d9b143499978e6bcef2ae293a7bf9378d3ecb4867f989553f798e9723ba8344b
7
- data.tar.gz: 436474ceafd2689137fab890b19ca24715ebe72dd1311b3ad64313bc130cf8bbce12fe35049008d20a89634309cbac882da70bc891522d45a58e8ce310b466a7
6
+ metadata.gz: e185aa4e7ca59e5995f6e87379efd4f921d4242307abd55718e46bf346e7414c487b56f753f446d8518ffab74e783774aaa2adc9b7fcf8a014fdba96b8da090d
7
+ data.tar.gz: 5c990777d79432c813c92dda0b21e7af3dcf587485b868e9723d30e3d1c1b3b2d6e1bc8dc7235fb1ba32c1b27ddeb6f829ef3e1b7b9248d5be1199f27e153849
@@ -1,4 +1,10 @@
1
- ### 0.5.08 / 2017-11-xx
1
+ ### 0.5.09 / 2017-12-02
2
+ * [CHANGE] Dir `examples/general/SRL/ Added support for letter range to Simple Regex Language parser.
3
+
4
+ ### 0.5.08 / 2017-11-28
5
+ * [NEW] Dir `examples/general/SRL/ Added an initial version of the Simple Regex Language parser.
6
+ Supports the SRL quantifier syntax only.
7
+ * [FIX] Method `ParseTreeBuilder::place_TOS_child` was sometimes fooled when argument `aNode` was nil.
2
8
  * [FIX] Method `BaseParser::initialize` missing parameter name in doc caused a YARD warning.
3
9
  * [FIX] Method `GrmItemsBuilder::build_dotted_items` missing parameter name in doc caused a YARD warning.
4
10
  * [FIX] Method `NonTerminalVertex::initialize` missing parameter name in doc caused a YARD warning.
@@ -35,9 +35,30 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
35
35
  # @param theChildren [Array] Children nodes (one per rhs symbol)
36
36
  def new_parent_node(aProduction, aRange, theTokens, theChildren)
37
37
  node = case aProduction.name
38
- when 'srl_0' # rule 'srl' => 'quantifier'
38
+ when 'srl_0' # rule 'srl' => 'term'
39
39
  return_first_child(aRange, theTokens, theChildren)
40
40
 
41
+ when 'term_0' # rule 'term' => 'atom'
42
+ return_first_child(aRange, theTokens, theChildren)
43
+
44
+ when 'term_1' # rule 'term' => %w[atom quantifier]
45
+ reduce_term_1(aProduction, aRange, theTokens, theChildren)
46
+
47
+ when 'atom_0' #rule 'atom' => 'letter_range'
48
+ return_first_child(aRange, theTokens, theChildren)
49
+
50
+ when 'letter_range_0' # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
51
+ reduce_letter_range_0(aProduction, aRange, theTokens, theChildren)
52
+
53
+ when 'letter_range_1' #rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
54
+ reduce_letter_range_1(aProduction, aRange, theTokens, theChildren)
55
+
56
+ when 'letter_range_2' # rule 'letter_range' => 'LETTER'
57
+ reduce_letter_range_2(aProduction, aRange, theTokens, theChildren)
58
+
59
+ when 'letter_range_3' # rule 'letter_range' => %w[UPPERCASE LETTER]
60
+ reduce_letter_range_3(aProduction, aRange, theTokens, theChildren)
61
+
41
62
  when 'quantifier_0' # rule 'quantifier' => 'ONCE'
42
63
  multiplicity(1, 1)
43
64
 
@@ -78,99 +99,74 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
78
99
  return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
79
100
  end
80
101
 
81
- # rule 'quantifier' => %w[EXACTLY count TIMES]
82
- def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
83
- count = theChildren[1].token.lexeme.to_i
84
- multiplicity(count, count)
85
- end
86
-
87
- # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
88
- def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
89
- upper = theChildren[3].token.lexeme.to_i
90
- # lower = theChildren[1].token.lexeme.to_i
91
- multiplicity(3, upper)
92
- end
93
-
94
- # rule 'quantifier' => %w[AT LEAST count TIMES]
95
- def reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
96
- count = theChildren[2].token.lexeme.to_i
97
- multiplicity(count, :more)
102
+ def char_range(lowerBound, upperBound)
103
+ # TODO fix module nesting
104
+ lower = Regex::Character.new(lowerBound)
105
+ upper = Regex::Character.new(upperBound)
106
+ return Regex::CharRange.new(lower, upper)
98
107
  end
99
-
100
-
101
- =begin
102
- def reduce_binary_operator(theChildren)
103
- operator_node = theChildren[1]
104
- operator_node.children << theChildren[0]
105
- operator_node.children << theChildren[2]
106
- return operator_node
108
+
109
+ def char_class(toNegate, *theChildren)
110
+ Regex::CharClass.new(toNegate, *theChildren)
107
111
  end
108
-
109
- # rule 'simple_expression' => %w[simple_expression add_operator term]
110
- def reduce_simple_expression_1(_production, _range, _tokens, theChildren)
111
- reduce_binary_operator(theChildren)
112
+
113
+ def repetition(expressionToRepeat, aMultiplicity)
114
+ return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
112
115
  end
113
116
 
114
- # rule 'term' => %w[term mul_operator factor]
115
- def reduce_term_1(_production, _range, _tokens, theChildren)
116
- reduce_binary_operator(theChildren)
117
+ # rule 'term' => %w[atom quantifier]
118
+ def reduce_term_1(aProduction, aRange, theTokens, theChildren)
119
+ quantifier = theChildren.last
120
+ atom = theChildren.first
121
+ repetition(atom, quantifier)
117
122
  end
118
123
 
119
- # rule 'factor' => %w[simple_factor POWER simple_factor]]
120
- def reduce_factor_1(aProduction, aRange, theTokens, theChildren)
121
- result = PowerNode.new(theChildren[1].symbol, aRange)
122
- result.children << theChildren[0]
123
- result.children << theChildren[2]
124
-
125
- return result
124
+ # rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
125
+ def reduce_letter_range_0(aProduction, aRange, theTokens, theChildren)
126
+ lower = theChildren[2].token.lexeme
127
+ upper = theChildren[4].token.lexeme
128
+ ch_range = char_range(lower, upper)
129
+ char_class(false, ch_range)
126
130
  end
127
-
128
- # rule 'simple_factor' => %[sign scalar]
129
- def reduce_simple_factor_0(aProduction, aRange, theTokens, theChildren)
130
- first_child = theChildren[0]
131
- result = if first_child.kind_of?(CalcNegateNode)
132
- -theChildren[1]
133
- else
134
- theChildren[1]
135
- end
136
-
137
- return result
131
+
132
+ # rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
133
+ def reduce_letter_range_1(aProduction, aRange, theTokens, theChildren)
134
+ lower = theChildren[3].token.lexeme
135
+ upper = theChildren[5].token.lexeme
136
+ ch_range = char_range(lower.upcase, upper.upcase)
137
+ char_class(false, ch_range)
138
138
  end
139
-
140
- # rule 'simple_factor' => %w[unary_function in_parenthesis]
141
- def reduce_simple_factor_1(aProduction, aRange, theTokens, theChildren)
142
- func = CalcUnaryFunction.new(theChildren[0].symbol, aRange.low)
143
- func.func_name = theChildren[0].value
144
- func.children << theChildren[1]
145
- return func
139
+
140
+ # rule 'letter_range' => 'LETTER'
141
+ def reduce_letter_range_2(aProduction, aRange, theTokens, theChildren)
142
+ ch_range = char_range('a', 'z')
143
+ char_class(false, ch_range)
146
144
  end
147
-
148
- # rule 'simple_factor' => %w[MINUS in_parenthesis]
149
- def reduce_simple_factor_2(aProduction, aRange, theTokens, theChildren)
150
- negation = CalcNegateNode.new(theChildren[0].symbol, aRange.low)
151
- negation.children << theChildren[1]
152
- return negation
145
+
146
+ #rule 'letter_range' => %w[UPPERCASE LETTER]
147
+ def reduce_letter_range_3(aProduction, aRange, theTokens, theChildren)
148
+ ch_range = char_range('A', 'Z')
149
+ char_class(false, ch_range)
153
150
  end
154
151
 
155
- # rule 'add_operator' => 'PLUS'
156
- def reduce_add_operator_0(_production, aRange, _tokens, theChildren)
157
- return CalcAddNode.new(theChildren[0].symbol, aRange)
152
+ # rule 'quantifier' => %w[EXACTLY count TIMES]
153
+ def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
154
+ count = theChildren[1].token.lexeme.to_i
155
+ multiplicity(count, count)
158
156
  end
159
157
 
160
- # rule 'add_operator' => 'MINUS'
161
- def reduce_add_operator_1(_production, aRange, _tokens, theChildren)
162
- return CalcSubtractNode.new(theChildren[0].symbol, aRange)
158
+ # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
159
+ def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
160
+ lower = theChildren[1].token.lexeme.to_i
161
+ upper = theChildren[3].token.lexeme.to_i
162
+ multiplicity(lower, upper)
163
163
  end
164
164
 
165
- # rule 'mul_operator' => 'STAR'
166
- def reduce_mul_operator_0(_production, aRange, _tokens, theChildren)
167
- return CalcMultiplyNode.new(theChildren[0].symbol, aRange)
165
+ # rule 'quantifier' => %w[AT LEAST count TIMES]
166
+ def reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
167
+ count = theChildren[2].token.lexeme.to_i
168
+ multiplicity(count, :more)
168
169
  end
169
170
 
170
- # rule 'mul_operator' => 'DIVIDE'
171
- def reduce_mul_operator_1(_production, aRange, _tokens, theChildren)
172
- return CalcDivideNode.new(theChildren[0].symbol, aRange)
173
- end
174
- =end
175
171
  end # class
176
172
  # End of file
@@ -6,13 +6,21 @@ module SRL
6
6
  # This is a very partial grammar of SRL.
7
7
  # It will be expanded with the coming versions of Rley
8
8
  builder = Rley::Syntax::GrammarBuilder.new do
9
- add_terminals('DIGIT', 'INTEGER')
9
+ add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
10
+ add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
10
11
  add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
11
12
  add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
12
13
  add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
13
14
 
14
15
  # For the moment one focuses on quantifier syntax only...
15
- rule 'srl' => 'quantifier'
16
+ rule 'srl' => 'term'
17
+ rule 'term' => 'atom'
18
+ rule 'term' => %w[atom quantifier]
19
+ rule 'atom' => 'letter_range'
20
+ rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
21
+ rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
22
+ rule 'letter_range' => 'LETTER'
23
+ rule 'letter_range' => %w[UPPERCASE LETTER]
16
24
  rule 'quantifier' => 'ONCE'
17
25
  rule 'quantifier' => 'TWICE'
18
26
  rule 'quantifier' => %w[EXACTLY count TIMES]
@@ -21,7 +29,7 @@ module SRL
21
29
  rule 'quantifier' => %w[ONCE OR MORE]
22
30
  rule 'quantifier' => %w[NEVER OR MORE]
23
31
  rule 'quantifier' => %w[AT LEAST count TIMES]
24
- rule 'count' => 'DIGIT'
32
+ rule 'count' => 'DIGIT_LIT'
25
33
  rule 'count' => 'INTEGER'
26
34
  rule 'times_suffix' => 'TIMES'
27
35
  rule 'times_suffix' => []
@@ -0,0 +1,35 @@
1
+ # File: abstract_method.rb
2
+
3
+ # Mix-in module. Provides the method 'abstract_method' that raises an exception
4
+ # with an appropriate message when called.
5
+ module AbstractMethod
6
+ public
7
+
8
+ # Call this method in the body of your abstract methods.
9
+ # Example:
10
+ # require 'AbstractMethod'
11
+ # class SomeClass
12
+ # include AbstractMethod # To add the behaviour from the mix-in module AbstractMethod
13
+ # ...
14
+ # Consider that SomeClass has an abstract method called 'some_method'
15
+ #
16
+ # def some_method() abstract_method
17
+ # end
18
+ def abstract_method()
19
+ # Determine the short class name of self
20
+ className = self.class.name.split(/::/).last
21
+
22
+ # Retrieve the top text line of the call stack
23
+ top_line = caller.first
24
+
25
+ # Extract the calling method name
26
+ callerNameInQuotes = top_line.scan(/`.+?$/).first
27
+ callerName = callerNameInQuotes.gsub(/`|'/, '') # Remove enclosing quotes
28
+
29
+ # Build the error message
30
+ error_message = "The method #{className}##{callerName} is abstract. It should be implemented in subclasses of #{className}."
31
+ raise NotImplementedError, error_message
32
+ end
33
+ end # module
34
+
35
+ # End of file
@@ -0,0 +1,21 @@
1
+ # File: atomic_expression.rb
2
+
3
+ require_relative "expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. A valid regular expression that
8
+ # cannot be further decomposed into sub-expressions.
9
+ class AtomicExpression < Expression
10
+
11
+ public
12
+ # Redefined method. Return true since it may not have any child.
13
+ def atomic?
14
+ return true
15
+ end
16
+
17
+ end # class
18
+
19
+ end # module
20
+
21
+ # End of file
@@ -0,0 +1,34 @@
1
+ # File: char_class.rb
2
+
3
+ require_relative "polyadic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. A n-ary matching operator.
8
+ # It succeeds when one child expression succeeds to match the subject text
9
+ # than defined by this concatenation.
10
+ class CharClass < PolyadicExpression
11
+ # A flag that indicates whether the character is negated
12
+ attr_reader(:negated)
13
+
14
+ # Constructor.
15
+ def initialize(to_negate,*theChildren)
16
+ super(theChildren)
17
+ @negated = to_negate
18
+ end
19
+
20
+ public
21
+ # Conversion method re-definition.
22
+ # Purpose: Return the String representation of the concatented expressions.
23
+ def to_str()
24
+ result_children = children.inject('') { |subResult, aChild| subResult << aChild.to_str() }
25
+ result = '['+ (negated ? '^' : '') + result_children + ']'
26
+
27
+ return result
28
+ end
29
+
30
+ end # class
31
+
32
+ end # module
33
+
34
+ # End of file
@@ -0,0 +1,50 @@
1
+ # File: char_range.rb
2
+
3
+ require_relative 'polyadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # A binary expression that represents a contiguous range of characters.
8
+ # Assumption: characters are ordered by codepoint
9
+ class CharRange < PolyadicExpression
10
+
11
+ # Constructor.
12
+ # [thelowerBound] A character that will be the lower bound value for the range.
13
+ # [theUpperBound] A character that will be the upper bound value for the range.
14
+ # TODO: optimisation. Build a Character if lower bound == upper bound.
15
+ def initialize(theLowerBound, theUpperBound)
16
+ range = validated_range(theLowerBound, theUpperBound)
17
+ super(range)
18
+ end
19
+
20
+ public
21
+ # Return the lower bound of the range.
22
+ def lower()
23
+ return children.first
24
+ end
25
+
26
+ # Return the upper bound of the range.
27
+ def upper()
28
+ return children.last
29
+ end
30
+
31
+ # Conversion method re-definition.
32
+ # Purpose: Return the String representation of the concatented expressions.
33
+ def to_str()
34
+ result = lower.to_str() + '-' + upper.to_str()
35
+
36
+ return result
37
+ end
38
+
39
+ private
40
+ # Validation method. Returns a couple of Characters.after their validation.
41
+ def validated_range(theLowerBound, theUpperBound)
42
+ raise StandardError, "Character range error: lower bound is greater than upper bound." if theLowerBound.codepoint > theUpperBound.codepoint
43
+ return [theLowerBound, theUpperBound]
44
+ end
45
+
46
+ end # class
47
+
48
+ end # module
49
+
50
+ # End of file
@@ -0,0 +1,195 @@
1
+ # File: character.rb
2
+
3
+ require_relative 'atomic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # A regular expression that matches a specific character in a given character set
8
+ class Character < AtomicExpression
9
+ # Constant with all special 2-characters escape sequences
10
+ DigramSequences = {
11
+ "\\a" => 0x7, # alarm
12
+ "\\n" => 0xA, # newline
13
+ "\\r" => 0xD, # carriage return
14
+ "\\t" => 0x9, # tab
15
+ "\\e" => 0x1B, # escape
16
+ "\\f" => 0xC, # form feed
17
+ "\\v" => 0xB, # vertical feed
18
+ # Single octal digit literals
19
+ "\\0" => 0,
20
+ "\\1" => 1,
21
+ "\\2" => 2,
22
+ "\\3" => 3,
23
+ "\\4" => 4,
24
+ "\\5" => 5,
25
+ "\\6" => 6,
26
+ "\\7" => 7
27
+ }
28
+
29
+ # The integer value that uniquely identifies the character.
30
+ attr_reader(:codepoint)
31
+
32
+ # The initial text representation of the character (if any).
33
+ attr_reader(:lexeme)
34
+
35
+ # Constructor.
36
+ # [aValue] Initialize the character with a either a String literal or a codepoint value.
37
+ # Examples:
38
+ # Initializing with codepoint value...
39
+ # RegAn::Character.new(0x3a3) # Represents: Σ (Unicode GREEK CAPITAL LETTER SIGMA)
40
+ # RegAn::Character.new(931) # Also represents: Σ (931 dec == 3a3 hex)
41
+ #
42
+ # Initializing with a single character string
43
+ # RegAn::Character.new(?\u03a3) # Also represents: Σ
44
+ # RegAn::Character.new('Σ') # Obviously, represents a Σ
45
+ #
46
+ # Initializing with an escape sequence string
47
+ # Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
48
+ # \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B), \f (form feed, 0xC)
49
+ # \uXXXX where XXXX is a 4 hex digits integer value, \u{X...}, \ooo (octal) \xXX (hex)
50
+ # Any other escaped character will be treated as a literal character
51
+ # RegAn::Character.new('\n') # Represents a newline
52
+ # RegAn::Character.new('\u03a3') # Represents a Σ
53
+ def initialize(aValue)
54
+
55
+ case aValue
56
+ when String
57
+ if aValue.size == 1
58
+ # Literal single character case...
59
+ @codepoint = self.class.char2codepoint(aValue)
60
+ else
61
+ # Should be an escape sequence...
62
+ @codepoint = self.class.esc2codepoint(aValue)
63
+ end
64
+ @lexeme = aValue
65
+
66
+ when Fixnum
67
+ @codepoint = aValue
68
+ else
69
+ raise StandardError, "Cannot initialize a Character with a '#{aValue}'."
70
+ end
71
+ end
72
+
73
+ public
74
+ # Convertion method that returns a character given a codepoint (integer) value.
75
+ # Example:
76
+ # RegAn::Character::codepoint2char(0x3a3) # Returns: Σ (The Unicode GREEK CAPITAL LETTER SIGMA)
77
+ def self.codepoint2char(aCodepoint)
78
+ return [aCodepoint].pack('U') # Remark: chr() fails with codepoints > 256
79
+ end
80
+
81
+ # Convertion method that returns the codepoint for the given single character.
82
+ # Example:
83
+ # RegAn::Character::char2codepoint('Σ') # Returns: 0x3a3
84
+ def self.char2codepoint(aChar)
85
+ return aChar.ord()
86
+ end
87
+
88
+ # Convertion method that returns the codepoint for the given escape sequence (a String).
89
+ # Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
90
+ # \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B), \f (form feed, 0xC), \v (vertical feed, 0xB)
91
+ # \uXXXX where XXXX is a 4 hex digits integer value, \u{X...}, \ooo (octal) \xXX (hex)
92
+ # Any other escaped character will be treated as a literal character
93
+ # Example:
94
+ # RegAn::Character::esc2codepoint('\n') # Returns: 0xd
95
+ def self.esc2codepoint(anEscapeSequence)
96
+ raise StandardError, "Escape sequence #{anEscapeSequence} does not begin with a backslash (\)." unless anEscapeSequence[0] == "\\"
97
+ result = (anEscapeSequence.length == 2)? digram2codepoint(anEscapeSequence) : esc_number2codepoint(anEscapeSequence)
98
+
99
+ return result
100
+ end
101
+
102
+ # Return the character as a String object
103
+ def char()
104
+ self.class.codepoint2char(@codepoint)
105
+ end
106
+
107
+ # Conversion method re-definition.
108
+ # Purpose: Return the String representation of the expression.
109
+ # If the Character was initially from a text (the lexeme), then the lexeme is returned back.
110
+ # Otherwise the character corresponding to the codepoint is returned.
111
+ def to_str()
112
+ if lexeme.nil?
113
+ result = char()
114
+ else
115
+ result = lexeme.dup()
116
+ end
117
+
118
+ return result
119
+ end
120
+
121
+ # Returns true iff this Character and parameter 'another' represent the same character.
122
+ # [another] any Object. The way the equality is tested depends on the another's class
123
+ # Example:
124
+ # newOne = Character.new(?\u03a3)
125
+ # newOne == newOne # true. Identity
126
+ # newOne == Character.new(?\u03a3) # true. Both have same codepoint
127
+ # newOne == ?\u03a3 # true. The single character String match exactly the char attribute.
128
+ # newOne == 0x03a3 # true. The Fixnum is compared to the codepoint value.
129
+ # Will test equality with any Object that knows the to_s method
130
+ def ==(another)
131
+ result = case another
132
+ when Character
133
+ self.to_str == another.to_str
134
+
135
+ when Fixnum
136
+ self.codepoint == another
137
+
138
+ when String
139
+ (another.size > 1) ? false : self.to_str == another
140
+
141
+ else
142
+ # Unknown type: try with a convertion
143
+ self == another.to_s() # Recursive call
144
+ end
145
+
146
+ return result
147
+ end
148
+
149
+ # Return a plain English description of the character
150
+ def explain()
151
+ return "the character '#{to_str()}'"
152
+ end
153
+
154
+ private
155
+ # Convertion method that returns a codepoint for the given two characters (digram) escape sequence.
156
+ # Recognized escaped characters are: \a (alarm, 0x07), \n (newline, 0xA),
157
+ # \r (carriage return, 0xD), \t (tab, 0x9), \e (escape, 0x1B), \f (form feed, 0xC), \v (vertical feed, 0xB)
158
+ # Any other escape sequence will return the codepoint of the escaped character.
159
+ # [aDigram] A sequence of two characters that starts with a backslash.
160
+ def self.digram2codepoint(aDigram)
161
+ # Check that the digram is a special escape sequence
162
+ result = DigramSequences.fetch(aDigram, nil)
163
+
164
+ # If it not a special sequence, then escaped character is considered literally (the backslash is 'dummy')
165
+ result = char2codepoint(aDigram[-1]) if result.nil?
166
+ return result
167
+ end
168
+
169
+ # Convertion method that returns a codepoint for the given complex escape sequence.
170
+ # [anEscapeSequence] A String with the format:
171
+ # \uXXXX where XXXX is a 4 hex digits integer value,
172
+ # \u{X...} X 1 or more hex digits
173
+ # \ooo (1..3 octal digits literal)
174
+ # \xXX (1..2 hex digits literal)
175
+ def self.esc_number2codepoint(anEscapeSequence)
176
+ # Next line requires Ruby >= 1.9
177
+ unless /^\\(?:(?:(?<prefix>[uxX])\{?(?<hexa>\h+)\}?)|(?<octal>[0-7]{1,3}))$/ =~ anEscapeSequence
178
+ raise StandardError, "Unsupported escape sequence #{anEscapeSequence}."
179
+ else
180
+ #shorterSeq = anEscapeSequence[1..-1] # Remove the backslash
181
+
182
+ # Octal literal case?
183
+ return octal.oct() if octal # shorterSeq =~ /[0-7]{1,3}/
184
+
185
+ # Extract the hexadecimal number
186
+ hexliteral = hexa # shorterSeq.sub(/^[xXu]\{?([0-9a-fA-F]+)}?$/, '\1')
187
+ return hexliteral.hex()
188
+ end
189
+ end
190
+
191
+ end # class
192
+
193
+ end # module
194
+
195
+ # End of file