rley 0.5.08 → 0.5.09
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/examples/general/SRL/lib/ast_builder.rb +74 -78
- data/examples/general/SRL/lib/grammar.rb +11 -3
- data/examples/general/SRL/lib/regex/abstract_method.rb +35 -0
- data/examples/general/SRL/lib/regex/atomic_expression.rb +21 -0
- data/examples/general/SRL/lib/regex/char_class.rb +34 -0
- data/examples/general/SRL/lib/regex/char_range.rb +50 -0
- data/examples/general/SRL/lib/regex/character.rb +195 -0
- data/examples/general/SRL/lib/regex/compound_expression.rb +60 -0
- data/examples/general/SRL/lib/regex/expression.rb +42 -0
- data/examples/general/SRL/lib/regex/monadic_expression.rb +31 -0
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +64 -0
- data/examples/general/SRL/lib/regex/quantifiable.rb +28 -0
- data/examples/general/SRL/lib/regex/repetition.rb +31 -0
- data/examples/general/SRL/lib/regex_repr.rb +5 -1
- data/examples/general/SRL/lib/tokenizer.rb +8 -5
- data/examples/general/SRL/spec/integration_spec.rb +64 -41
- data/examples/general/SRL/spec/regex/character_spec.rb +159 -0
- data/examples/general/SRL/spec/tokenizer_spec.rb +18 -4
- data/examples/general/SRL/srl_demo.rb +14 -4
- data/lib/rley/constants.rb +1 -1
- metadata +14 -3
- data/examples/general/SRL/lib/srl_demo.rb +0 -67
@@ -0,0 +1,159 @@
|
|
1
|
+
# File: character_spec.rb
|
2
|
+
require_relative '../spec_helper' # Use the RSpec test framework
|
3
|
+
require_relative '../../lib/regex/character'
|
4
|
+
|
5
|
+
module Regex # Open this namespace, to get rid of scope qualifiers
|
6
|
+
|
7
|
+
describe Character do
|
8
|
+
# This constant holds an arbitrary selection of characters
|
9
|
+
SampleChars = [?a, ?\0, ?\u0107]
|
10
|
+
|
11
|
+
# This constant holds the codepoints of the character selection
|
12
|
+
SampleInts = [0x61, 0, 0x0107]
|
13
|
+
|
14
|
+
# This constant holds an arbitrary selection of two characters (digrams) escape sequences
|
15
|
+
SampleDigrams = %w[ \n \e \0 \6 \k]
|
16
|
+
|
17
|
+
# This constant holds an arbitrary selection of escaped octal or hexadecimal literals
|
18
|
+
SampleNumEscs = %w[ \0 \07 \x07 \xa \x0F \u03a3 \u{a}]
|
19
|
+
|
20
|
+
before(:all) do
|
21
|
+
# Ensure that the set of codepoints is mapping the set of chars...
|
22
|
+
expect(SampleChars.map(&:ord)).to eq(SampleInts)
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'Creation & initialization' do
|
26
|
+
it 'should be created with a with an integer value (codepoint) or...' do
|
27
|
+
SampleInts.each do |aCodepoint|
|
28
|
+
expect { Character.new(aCodepoint) }.not_to raise_error
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it '...could be created with a single character String or...' do
|
33
|
+
SampleChars.each do |aChar|
|
34
|
+
expect { Character.new(aChar) }.not_to raise_error
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it '...could be created with an escape sequence' do
|
39
|
+
# Case 1: escape sequence is a digram
|
40
|
+
SampleDigrams.each do |anEscapeSeq|
|
41
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
42
|
+
end
|
43
|
+
|
44
|
+
# Case 2: escape sequence is an escaped octal or hexadecimal literal
|
45
|
+
SampleNumEscs.each do |anEscapeSeq|
|
46
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end # context
|
51
|
+
|
52
|
+
context 'Provided services' do
|
53
|
+
it 'Should know its lexeme if created from a string' do
|
54
|
+
# Lexeme is defined when the character was initialised from a text
|
55
|
+
SampleChars.each do |aChar|
|
56
|
+
ch = Character.new(aChar)
|
57
|
+
expect(ch.lexeme).to eq(aChar)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'Should not know its lexeme representation if created from a codepoint' do
|
62
|
+
SampleInts.each do |aChar|
|
63
|
+
ch = Character.new(aChar)
|
64
|
+
expect(ch.lexeme).to be_nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should know its String representation' do
|
69
|
+
# Try for one character
|
70
|
+
newOne = Character.new(?\u03a3)
|
71
|
+
expect(newOne.char).to eq('Σ')
|
72
|
+
expect(newOne.to_str).to eq("\u03A3")
|
73
|
+
|
74
|
+
# Try with our chars sample
|
75
|
+
SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
|
76
|
+
|
77
|
+
# Try with our codepoint sample
|
78
|
+
mapped_chars = SampleInts.map { |aCodepoint| Character.new(aCodepoint).char }
|
79
|
+
expect(mapped_chars).to eq(SampleChars)
|
80
|
+
|
81
|
+
# Try with our escape sequence samples
|
82
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
83
|
+
Character.new(anEscSeq).to_str == String::class_eval(%Q|"#{anEscSeq}"|)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should know its codepoint' do
|
88
|
+
# Try for one character
|
89
|
+
newOne = Character.new(?\u03a3)
|
90
|
+
expect(newOne.codepoint).to eq(0x03a3)
|
91
|
+
|
92
|
+
# Try with our chars sample
|
93
|
+
allCodepoints = SampleChars.map { |aChar| Character.new(aChar).codepoint }
|
94
|
+
expect(allCodepoints).to eq(SampleInts)
|
95
|
+
|
96
|
+
# Try with our codepoint sample
|
97
|
+
mapped_chars = SampleInts.each { |aCodepoint| expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint) }
|
98
|
+
|
99
|
+
# Try with our escape sequence samples
|
100
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
101
|
+
expect(Character.new(anEscSeq).codepoint).to eq(String::class_eval(%Q|"#{anEscSeq}".ord()|))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should known whether it is equal to another Object' do
|
106
|
+
newOne = Character.new(?\u03a3)
|
107
|
+
|
108
|
+
# Case 1: test equality with itself
|
109
|
+
expect(newOne).to eq(newOne)
|
110
|
+
|
111
|
+
# Case 2: test equality with another Character
|
112
|
+
expect(newOne).to eq(Character.new(?\u03a3))
|
113
|
+
expect(newOne).not_to eq(Character.new(?\u0333))
|
114
|
+
|
115
|
+
# Case 3: test equality with an integer value (equality based on codepoint value)
|
116
|
+
expect(newOne).to eq(0x03a3)
|
117
|
+
expect(newOne).not_to eq(0x0333)
|
118
|
+
|
119
|
+
# Case 4: test equality with a single-character String
|
120
|
+
expect(newOne).to eq(?\u03a3)
|
121
|
+
expect(newOne).not_to eq(?\u0333)
|
122
|
+
|
123
|
+
# Case 5: test fails with multiple character strings
|
124
|
+
expect(newOne).not_to eq('03a3')
|
125
|
+
|
126
|
+
# Case 6: equality testing with arbitray object
|
127
|
+
expect(newOne).not_to eq(nil)
|
128
|
+
expect(newOne).not_to eq(Object.new)
|
129
|
+
|
130
|
+
# In case 6, equality is based on to_s method.
|
131
|
+
simulator = double('fake')
|
132
|
+
expect(simulator).to receive(:to_s).and_return(?\u03a3)
|
133
|
+
expect(newOne).to eq(simulator)
|
134
|
+
|
135
|
+
# Create a module that re-defines the existing to_s method
|
136
|
+
module Tweak_to_s
|
137
|
+
def to_s() # Overwrite the existing to_s method
|
138
|
+
return ?\u03a3
|
139
|
+
end
|
140
|
+
end # module
|
141
|
+
weird = Object.new
|
142
|
+
weird.extend(Tweak_to_s)
|
143
|
+
expect(newOne).to eq(weird)
|
144
|
+
end
|
145
|
+
|
146
|
+
it "should know its readable description" do
|
147
|
+
ch1 = Character.new('a')
|
148
|
+
expect(ch1.explain).to eq("the character 'a'")
|
149
|
+
|
150
|
+
ch2 = Character.new(?\u03a3)
|
151
|
+
expect(ch2.explain).to eq("the character '\u03a3'")
|
152
|
+
end
|
153
|
+
end # context
|
154
|
+
|
155
|
+
end # describe
|
156
|
+
|
157
|
+
end # module
|
158
|
+
|
159
|
+
# End of file
|
@@ -57,18 +57,32 @@ module SRL
|
|
57
57
|
subject.scanner.string = ' 1 '
|
58
58
|
token = subject.tokens.first
|
59
59
|
expect(token).to be_kind_of(Rley::Lexical::Token)
|
60
|
-
expect(token.terminal.name).to eq('
|
60
|
+
expect(token.terminal.name).to eq('DIGIT_LIT')
|
61
61
|
expect(token.lexeme).to eq('1')
|
62
62
|
end
|
63
63
|
end # context
|
64
64
|
|
65
|
+
context 'Character range tokenization:' do
|
66
|
+
it "should recognize 'letter from ... to ...'" do
|
67
|
+
input = 'letter a to f'
|
68
|
+
subject.scanner.string = input
|
69
|
+
expectations = [
|
70
|
+
['LETTER', 'letter'],
|
71
|
+
['LETTER_LIT', 'a'],
|
72
|
+
['TO', 'to'],
|
73
|
+
['LETTER_LIT', 'f']
|
74
|
+
]
|
75
|
+
match_expectations(subject, expectations)
|
76
|
+
end
|
77
|
+
end # context
|
78
|
+
|
65
79
|
context 'Quantifier tokenization:' do
|
66
80
|
it "should recognize 'exactly ... times'" do
|
67
81
|
input = 'exactly 4 Times'
|
68
82
|
subject.scanner.string = input
|
69
83
|
expectations = [
|
70
84
|
['EXACTLY', 'exactly'],
|
71
|
-
['
|
85
|
+
['DIGIT_LIT', '4'],
|
72
86
|
['TIMES', 'Times']
|
73
87
|
]
|
74
88
|
match_expectations(subject, expectations)
|
@@ -79,9 +93,9 @@ module SRL
|
|
79
93
|
subject.scanner.string = input
|
80
94
|
expectations = [
|
81
95
|
['BETWEEN', 'Between'],
|
82
|
-
['
|
96
|
+
['DIGIT_LIT', '2'],
|
83
97
|
['AND', 'AND'],
|
84
|
-
['
|
98
|
+
['DIGIT_LIT', '4'],
|
85
99
|
['TIMES', 'times']
|
86
100
|
]
|
87
101
|
match_expectations(subject, expectations)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative './lib/parser'
|
2
|
+
require_relative './lib/ast_builder'
|
2
3
|
|
3
4
|
def print_title(aTitle)
|
4
5
|
puts aTitle
|
@@ -28,13 +29,13 @@ Simple Regex Language parser:
|
|
28
29
|
- Parses a very limited subset of the language and displays the parse tree
|
29
30
|
|
30
31
|
Command-line syntax:
|
31
|
-
ruby #{my_name} "
|
32
|
+
ruby #{my_name} "SRL expression"
|
32
33
|
where:
|
33
|
-
the SRL
|
34
|
+
the SRL expression is enclosed between double quotes (")
|
34
35
|
|
35
36
|
Examples:
|
36
|
-
ruby #{my_name} "exactly 4 times"
|
37
|
-
ruby #{my_name} "between 2 and 3 times"
|
37
|
+
ruby #{my_name} "letter from a to f exactly 4 times"
|
38
|
+
ruby #{my_name} "uppercase letter between 2 and 3 times"
|
38
39
|
END_MSG
|
39
40
|
puts msg
|
40
41
|
exit(1)
|
@@ -54,4 +55,13 @@ end
|
|
54
55
|
cst_ptree = result.parse_tree
|
55
56
|
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
56
57
|
|
58
|
+
# Generate a regexp literal representation from the parse result
|
59
|
+
tree_builder = ASTBuilder
|
60
|
+
ast_ptree = result.parse_tree(tree_builder)
|
61
|
+
|
62
|
+
# Now output the regexp literal
|
63
|
+
root = ast_ptree.root
|
64
|
+
print_title('SRL to Regexp representation:')
|
65
|
+
puts "#{ARGV[0]} => #{root.to_str}" # Output the expression result
|
66
|
+
|
57
67
|
# End of file
|
data/lib/rley/constants.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.09
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|
@@ -146,11 +146,22 @@ files:
|
|
146
146
|
- examples/general/SRL/lib/ast_building.rb
|
147
147
|
- examples/general/SRL/lib/grammar.rb
|
148
148
|
- examples/general/SRL/lib/parser.rb
|
149
|
+
- examples/general/SRL/lib/regex/abstract_method.rb
|
150
|
+
- examples/general/SRL/lib/regex/atomic_expression.rb
|
151
|
+
- examples/general/SRL/lib/regex/char_class.rb
|
152
|
+
- examples/general/SRL/lib/regex/char_range.rb
|
153
|
+
- examples/general/SRL/lib/regex/character.rb
|
154
|
+
- examples/general/SRL/lib/regex/compound_expression.rb
|
155
|
+
- examples/general/SRL/lib/regex/expression.rb
|
156
|
+
- examples/general/SRL/lib/regex/monadic_expression.rb
|
149
157
|
- examples/general/SRL/lib/regex/multiplicity.rb
|
158
|
+
- examples/general/SRL/lib/regex/polyadic_expression.rb
|
159
|
+
- examples/general/SRL/lib/regex/quantifiable.rb
|
160
|
+
- examples/general/SRL/lib/regex/repetition.rb
|
150
161
|
- examples/general/SRL/lib/regex_repr.rb
|
151
|
-
- examples/general/SRL/lib/srl_demo.rb
|
152
162
|
- examples/general/SRL/lib/tokenizer.rb
|
153
163
|
- examples/general/SRL/spec/integration_spec.rb
|
164
|
+
- examples/general/SRL/spec/regex/character_spec.rb
|
154
165
|
- examples/general/SRL/spec/regex/multiplicity_spec.rb
|
155
166
|
- examples/general/SRL/spec/spec_helper.rb
|
156
167
|
- examples/general/SRL/spec/tokenizer_spec.rb
|
@@ -1,67 +0,0 @@
|
|
1
|
-
require_relative 'parser'
|
2
|
-
require_relative 'ast_builder'
|
3
|
-
|
4
|
-
def print_title(aTitle)
|
5
|
-
puts aTitle
|
6
|
-
puts '=' * aTitle.size
|
7
|
-
end
|
8
|
-
|
9
|
-
def print_tree(aTitle, aParseTree)
|
10
|
-
# Let's create a parse tree visitor
|
11
|
-
visitor = Rley::ParseTreeVisitor.new(aParseTree)
|
12
|
-
|
13
|
-
# Now output formatted parse tree
|
14
|
-
print_title(aTitle)
|
15
|
-
renderer = Rley::Formatter::Asciitree.new($stdout)
|
16
|
-
renderer.render(visitor)
|
17
|
-
puts ''
|
18
|
-
end
|
19
|
-
|
20
|
-
# Create a calculator parser object
|
21
|
-
parser = SRL::Parser.new
|
22
|
-
|
23
|
-
# Parse the input expression in command-line
|
24
|
-
if ARGV.empty?
|
25
|
-
my_name = File.basename(__FILE__)
|
26
|
-
msg = <<-END_MSG
|
27
|
-
Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
|
28
|
-
Ultimately it will support SRL in full, currently it parses only the
|
29
|
-
SRL quantifiers.
|
30
|
-
The utility prints the resulting regular expression.
|
31
|
-
|
32
|
-
Command-line syntax:
|
33
|
-
ruby #{my_name} filename
|
34
|
-
where:
|
35
|
-
the file name is a SRL source file.
|
36
|
-
|
37
|
-
Examples:
|
38
|
-
ruby #{my_name} sample01.srl
|
39
|
-
END_MSG
|
40
|
-
puts msg
|
41
|
-
exit(1)
|
42
|
-
end
|
43
|
-
puts ARGV[0]
|
44
|
-
result = parser.parse_expression(ARGV[0])
|
45
|
-
|
46
|
-
unless result.success?
|
47
|
-
# Stop if the parse failed...
|
48
|
-
puts "Parsing of '#{ARGV[0]}' failed"
|
49
|
-
puts "Reason: #{result.failure_reason.message}"
|
50
|
-
exit(1)
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
# Generate a concrete syntax parse tree from the parse result
|
55
|
-
cst_ptree = result.parse_tree
|
56
|
-
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
57
|
-
|
58
|
-
# Generate an abstract syntax parse tree from the parse result
|
59
|
-
tree_builder = ASTBuilder
|
60
|
-
ast_ptree = result.parse_tree(tree_builder)
|
61
|
-
# print_tree('Abstract Syntax Tree (AST)', ast_ptree)
|
62
|
-
|
63
|
-
# # Now perform the computation of math expression
|
64
|
-
# root = ast_ptree.root
|
65
|
-
# print_title('Result:')
|
66
|
-
# puts root.interpret.to_s # Output the expression result
|
67
|
-
# End of file
|