rley 0.5.08 → 0.5.09
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/examples/general/SRL/lib/ast_builder.rb +74 -78
- data/examples/general/SRL/lib/grammar.rb +11 -3
- data/examples/general/SRL/lib/regex/abstract_method.rb +35 -0
- data/examples/general/SRL/lib/regex/atomic_expression.rb +21 -0
- data/examples/general/SRL/lib/regex/char_class.rb +34 -0
- data/examples/general/SRL/lib/regex/char_range.rb +50 -0
- data/examples/general/SRL/lib/regex/character.rb +195 -0
- data/examples/general/SRL/lib/regex/compound_expression.rb +60 -0
- data/examples/general/SRL/lib/regex/expression.rb +42 -0
- data/examples/general/SRL/lib/regex/monadic_expression.rb +31 -0
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +64 -0
- data/examples/general/SRL/lib/regex/quantifiable.rb +28 -0
- data/examples/general/SRL/lib/regex/repetition.rb +31 -0
- data/examples/general/SRL/lib/regex_repr.rb +5 -1
- data/examples/general/SRL/lib/tokenizer.rb +8 -5
- data/examples/general/SRL/spec/integration_spec.rb +64 -41
- data/examples/general/SRL/spec/regex/character_spec.rb +159 -0
- data/examples/general/SRL/spec/tokenizer_spec.rb +18 -4
- data/examples/general/SRL/srl_demo.rb +14 -4
- data/lib/rley/constants.rb +1 -1
- metadata +14 -3
- data/examples/general/SRL/lib/srl_demo.rb +0 -67
@@ -0,0 +1,159 @@
|
|
1
|
+
# File: character_spec.rb
|
2
|
+
require_relative '../spec_helper' # Use the RSpec test framework
|
3
|
+
require_relative '../../lib/regex/character'
|
4
|
+
|
5
|
+
module Regex # Open this namespace, to get rid of scope qualifiers
|
6
|
+
|
7
|
+
describe Character do
|
8
|
+
# This constant holds an arbitrary selection of characters
|
9
|
+
SampleChars = [?a, ?\0, ?\u0107]
|
10
|
+
|
11
|
+
# This constant holds the codepoints of the character selection
|
12
|
+
SampleInts = [0x61, 0, 0x0107]
|
13
|
+
|
14
|
+
# This constant holds an arbitrary selection of two characters (digrams) escape sequences
|
15
|
+
SampleDigrams = %w[ \n \e \0 \6 \k]
|
16
|
+
|
17
|
+
# This constant holds an arbitrary selection of escaped octal or hexadecimal literals
|
18
|
+
SampleNumEscs = %w[ \0 \07 \x07 \xa \x0F \u03a3 \u{a}]
|
19
|
+
|
20
|
+
before(:all) do
|
21
|
+
# Ensure that the set of codepoints is mapping the set of chars...
|
22
|
+
expect(SampleChars.map(&:ord)).to eq(SampleInts)
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'Creation & initialization' do
|
26
|
+
it 'should be created with a with an integer value (codepoint) or...' do
|
27
|
+
SampleInts.each do |aCodepoint|
|
28
|
+
expect { Character.new(aCodepoint) }.not_to raise_error
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it '...could be created with a single character String or...' do
|
33
|
+
SampleChars.each do |aChar|
|
34
|
+
expect { Character.new(aChar) }.not_to raise_error
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it '...could be created with an escape sequence' do
|
39
|
+
# Case 1: escape sequence is a digram
|
40
|
+
SampleDigrams.each do |anEscapeSeq|
|
41
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
42
|
+
end
|
43
|
+
|
44
|
+
# Case 2: escape sequence is an escaped octal or hexadecimal literal
|
45
|
+
SampleNumEscs.each do |anEscapeSeq|
|
46
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end # context
|
51
|
+
|
52
|
+
context 'Provided services' do
|
53
|
+
it 'Should know its lexeme if created from a string' do
|
54
|
+
# Lexeme is defined when the character was initialised from a text
|
55
|
+
SampleChars.each do |aChar|
|
56
|
+
ch = Character.new(aChar)
|
57
|
+
expect(ch.lexeme).to eq(aChar)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'Should not know its lexeme representation if created from a codepoint' do
|
62
|
+
SampleInts.each do |aChar|
|
63
|
+
ch = Character.new(aChar)
|
64
|
+
expect(ch.lexeme).to be_nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should know its String representation' do
|
69
|
+
# Try for one character
|
70
|
+
newOne = Character.new(?\u03a3)
|
71
|
+
expect(newOne.char).to eq('Σ')
|
72
|
+
expect(newOne.to_str).to eq("\u03A3")
|
73
|
+
|
74
|
+
# Try with our chars sample
|
75
|
+
SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
|
76
|
+
|
77
|
+
# Try with our codepoint sample
|
78
|
+
mapped_chars = SampleInts.map { |aCodepoint| Character.new(aCodepoint).char }
|
79
|
+
expect(mapped_chars).to eq(SampleChars)
|
80
|
+
|
81
|
+
# Try with our escape sequence samples
|
82
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
83
|
+
Character.new(anEscSeq).to_str == String::class_eval(%Q|"#{anEscSeq}"|)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should know its codepoint' do
|
88
|
+
# Try for one character
|
89
|
+
newOne = Character.new(?\u03a3)
|
90
|
+
expect(newOne.codepoint).to eq(0x03a3)
|
91
|
+
|
92
|
+
# Try with our chars sample
|
93
|
+
allCodepoints = SampleChars.map { |aChar| Character.new(aChar).codepoint }
|
94
|
+
expect(allCodepoints).to eq(SampleInts)
|
95
|
+
|
96
|
+
# Try with our codepoint sample
|
97
|
+
mapped_chars = SampleInts.each { |aCodepoint| expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint) }
|
98
|
+
|
99
|
+
# Try with our escape sequence samples
|
100
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
101
|
+
expect(Character.new(anEscSeq).codepoint).to eq(String::class_eval(%Q|"#{anEscSeq}".ord()|))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should known whether it is equal to another Object' do
|
106
|
+
newOne = Character.new(?\u03a3)
|
107
|
+
|
108
|
+
# Case 1: test equality with itself
|
109
|
+
expect(newOne).to eq(newOne)
|
110
|
+
|
111
|
+
# Case 2: test equality with another Character
|
112
|
+
expect(newOne).to eq(Character.new(?\u03a3))
|
113
|
+
expect(newOne).not_to eq(Character.new(?\u0333))
|
114
|
+
|
115
|
+
# Case 3: test equality with an integer value (equality based on codepoint value)
|
116
|
+
expect(newOne).to eq(0x03a3)
|
117
|
+
expect(newOne).not_to eq(0x0333)
|
118
|
+
|
119
|
+
# Case 4: test equality with a single-character String
|
120
|
+
expect(newOne).to eq(?\u03a3)
|
121
|
+
expect(newOne).not_to eq(?\u0333)
|
122
|
+
|
123
|
+
# Case 5: test fails with multiple character strings
|
124
|
+
expect(newOne).not_to eq('03a3')
|
125
|
+
|
126
|
+
# Case 6: equality testing with arbitray object
|
127
|
+
expect(newOne).not_to eq(nil)
|
128
|
+
expect(newOne).not_to eq(Object.new)
|
129
|
+
|
130
|
+
# In case 6, equality is based on to_s method.
|
131
|
+
simulator = double('fake')
|
132
|
+
expect(simulator).to receive(:to_s).and_return(?\u03a3)
|
133
|
+
expect(newOne).to eq(simulator)
|
134
|
+
|
135
|
+
# Create a module that re-defines the existing to_s method
|
136
|
+
module Tweak_to_s
|
137
|
+
def to_s() # Overwrite the existing to_s method
|
138
|
+
return ?\u03a3
|
139
|
+
end
|
140
|
+
end # module
|
141
|
+
weird = Object.new
|
142
|
+
weird.extend(Tweak_to_s)
|
143
|
+
expect(newOne).to eq(weird)
|
144
|
+
end
|
145
|
+
|
146
|
+
it "should know its readable description" do
|
147
|
+
ch1 = Character.new('a')
|
148
|
+
expect(ch1.explain).to eq("the character 'a'")
|
149
|
+
|
150
|
+
ch2 = Character.new(?\u03a3)
|
151
|
+
expect(ch2.explain).to eq("the character '\u03a3'")
|
152
|
+
end
|
153
|
+
end # context
|
154
|
+
|
155
|
+
end # describe
|
156
|
+
|
157
|
+
end # module
|
158
|
+
|
159
|
+
# End of file
|
@@ -57,18 +57,32 @@ module SRL
|
|
57
57
|
subject.scanner.string = ' 1 '
|
58
58
|
token = subject.tokens.first
|
59
59
|
expect(token).to be_kind_of(Rley::Lexical::Token)
|
60
|
-
expect(token.terminal.name).to eq('
|
60
|
+
expect(token.terminal.name).to eq('DIGIT_LIT')
|
61
61
|
expect(token.lexeme).to eq('1')
|
62
62
|
end
|
63
63
|
end # context
|
64
64
|
|
65
|
+
context 'Character range tokenization:' do
|
66
|
+
it "should recognize 'letter from ... to ...'" do
|
67
|
+
input = 'letter a to f'
|
68
|
+
subject.scanner.string = input
|
69
|
+
expectations = [
|
70
|
+
['LETTER', 'letter'],
|
71
|
+
['LETTER_LIT', 'a'],
|
72
|
+
['TO', 'to'],
|
73
|
+
['LETTER_LIT', 'f']
|
74
|
+
]
|
75
|
+
match_expectations(subject, expectations)
|
76
|
+
end
|
77
|
+
end # context
|
78
|
+
|
65
79
|
context 'Quantifier tokenization:' do
|
66
80
|
it "should recognize 'exactly ... times'" do
|
67
81
|
input = 'exactly 4 Times'
|
68
82
|
subject.scanner.string = input
|
69
83
|
expectations = [
|
70
84
|
['EXACTLY', 'exactly'],
|
71
|
-
['
|
85
|
+
['DIGIT_LIT', '4'],
|
72
86
|
['TIMES', 'Times']
|
73
87
|
]
|
74
88
|
match_expectations(subject, expectations)
|
@@ -79,9 +93,9 @@ module SRL
|
|
79
93
|
subject.scanner.string = input
|
80
94
|
expectations = [
|
81
95
|
['BETWEEN', 'Between'],
|
82
|
-
['
|
96
|
+
['DIGIT_LIT', '2'],
|
83
97
|
['AND', 'AND'],
|
84
|
-
['
|
98
|
+
['DIGIT_LIT', '4'],
|
85
99
|
['TIMES', 'times']
|
86
100
|
]
|
87
101
|
match_expectations(subject, expectations)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative './lib/parser'
|
2
|
+
require_relative './lib/ast_builder'
|
2
3
|
|
3
4
|
def print_title(aTitle)
|
4
5
|
puts aTitle
|
@@ -28,13 +29,13 @@ Simple Regex Language parser:
|
|
28
29
|
- Parses a very limited subset of the language and displays the parse tree
|
29
30
|
|
30
31
|
Command-line syntax:
|
31
|
-
ruby #{my_name} "
|
32
|
+
ruby #{my_name} "SRL expression"
|
32
33
|
where:
|
33
|
-
the SRL
|
34
|
+
the SRL expression is enclosed between double quotes (")
|
34
35
|
|
35
36
|
Examples:
|
36
|
-
ruby #{my_name} "exactly 4 times"
|
37
|
-
ruby #{my_name} "between 2 and 3 times"
|
37
|
+
ruby #{my_name} "letter from a to f exactly 4 times"
|
38
|
+
ruby #{my_name} "uppercase letter between 2 and 3 times"
|
38
39
|
END_MSG
|
39
40
|
puts msg
|
40
41
|
exit(1)
|
@@ -54,4 +55,13 @@ end
|
|
54
55
|
cst_ptree = result.parse_tree
|
55
56
|
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
56
57
|
|
58
|
+
# Generate a regexp literal representation from the parse result
|
59
|
+
tree_builder = ASTBuilder
|
60
|
+
ast_ptree = result.parse_tree(tree_builder)
|
61
|
+
|
62
|
+
# Now output the regexp literal
|
63
|
+
root = ast_ptree.root
|
64
|
+
print_title('SRL to Regexp representation:')
|
65
|
+
puts "#{ARGV[0]} => #{root.to_str}" # Output the expression result
|
66
|
+
|
57
67
|
# End of file
|
data/lib/rley/constants.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.09
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|
@@ -146,11 +146,22 @@ files:
|
|
146
146
|
- examples/general/SRL/lib/ast_building.rb
|
147
147
|
- examples/general/SRL/lib/grammar.rb
|
148
148
|
- examples/general/SRL/lib/parser.rb
|
149
|
+
- examples/general/SRL/lib/regex/abstract_method.rb
|
150
|
+
- examples/general/SRL/lib/regex/atomic_expression.rb
|
151
|
+
- examples/general/SRL/lib/regex/char_class.rb
|
152
|
+
- examples/general/SRL/lib/regex/char_range.rb
|
153
|
+
- examples/general/SRL/lib/regex/character.rb
|
154
|
+
- examples/general/SRL/lib/regex/compound_expression.rb
|
155
|
+
- examples/general/SRL/lib/regex/expression.rb
|
156
|
+
- examples/general/SRL/lib/regex/monadic_expression.rb
|
149
157
|
- examples/general/SRL/lib/regex/multiplicity.rb
|
158
|
+
- examples/general/SRL/lib/regex/polyadic_expression.rb
|
159
|
+
- examples/general/SRL/lib/regex/quantifiable.rb
|
160
|
+
- examples/general/SRL/lib/regex/repetition.rb
|
150
161
|
- examples/general/SRL/lib/regex_repr.rb
|
151
|
-
- examples/general/SRL/lib/srl_demo.rb
|
152
162
|
- examples/general/SRL/lib/tokenizer.rb
|
153
163
|
- examples/general/SRL/spec/integration_spec.rb
|
164
|
+
- examples/general/SRL/spec/regex/character_spec.rb
|
154
165
|
- examples/general/SRL/spec/regex/multiplicity_spec.rb
|
155
166
|
- examples/general/SRL/spec/spec_helper.rb
|
156
167
|
- examples/general/SRL/spec/tokenizer_spec.rb
|
@@ -1,67 +0,0 @@
|
|
1
|
-
require_relative 'parser'
|
2
|
-
require_relative 'ast_builder'
|
3
|
-
|
4
|
-
def print_title(aTitle)
|
5
|
-
puts aTitle
|
6
|
-
puts '=' * aTitle.size
|
7
|
-
end
|
8
|
-
|
9
|
-
def print_tree(aTitle, aParseTree)
|
10
|
-
# Let's create a parse tree visitor
|
11
|
-
visitor = Rley::ParseTreeVisitor.new(aParseTree)
|
12
|
-
|
13
|
-
# Now output formatted parse tree
|
14
|
-
print_title(aTitle)
|
15
|
-
renderer = Rley::Formatter::Asciitree.new($stdout)
|
16
|
-
renderer.render(visitor)
|
17
|
-
puts ''
|
18
|
-
end
|
19
|
-
|
20
|
-
# Create a calculator parser object
|
21
|
-
parser = SRL::Parser.new
|
22
|
-
|
23
|
-
# Parse the input expression in command-line
|
24
|
-
if ARGV.empty?
|
25
|
-
my_name = File.basename(__FILE__)
|
26
|
-
msg = <<-END_MSG
|
27
|
-
Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
|
28
|
-
Ultimately it will support SRL in full, currently it parses only the
|
29
|
-
SRL quantifiers.
|
30
|
-
The utility prints the resulting regular expression.
|
31
|
-
|
32
|
-
Command-line syntax:
|
33
|
-
ruby #{my_name} filename
|
34
|
-
where:
|
35
|
-
the file name is a SRL source file.
|
36
|
-
|
37
|
-
Examples:
|
38
|
-
ruby #{my_name} sample01.srl
|
39
|
-
END_MSG
|
40
|
-
puts msg
|
41
|
-
exit(1)
|
42
|
-
end
|
43
|
-
puts ARGV[0]
|
44
|
-
result = parser.parse_expression(ARGV[0])
|
45
|
-
|
46
|
-
unless result.success?
|
47
|
-
# Stop if the parse failed...
|
48
|
-
puts "Parsing of '#{ARGV[0]}' failed"
|
49
|
-
puts "Reason: #{result.failure_reason.message}"
|
50
|
-
exit(1)
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
# Generate a concrete syntax parse tree from the parse result
|
55
|
-
cst_ptree = result.parse_tree
|
56
|
-
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
57
|
-
|
58
|
-
# Generate an abstract syntax parse tree from the parse result
|
59
|
-
tree_builder = ASTBuilder
|
60
|
-
ast_ptree = result.parse_tree(tree_builder)
|
61
|
-
# print_tree('Abstract Syntax Tree (AST)', ast_ptree)
|
62
|
-
|
63
|
-
# # Now perform the computation of math expression
|
64
|
-
# root = ast_ptree.root
|
65
|
-
# print_title('Result:')
|
66
|
-
# puts root.interpret.to_s # Output the expression result
|
67
|
-
# End of file
|