rley 0.5.08 → 0.5.09

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,159 @@
1
+ # File: character_spec.rb
2
+ require_relative '../spec_helper' # Use the RSpec test framework
3
+ require_relative '../../lib/regex/character'
4
+
5
+ module Regex # Open this namespace, to get rid of scope qualifiers
6
+
7
+ describe Character do
8
+ # This constant holds an arbitrary selection of characters
9
+ SampleChars = [?a, ?\0, ?\u0107]
10
+
11
+ # This constant holds the codepoints of the character selection
12
+ SampleInts = [0x61, 0, 0x0107]
13
+
14
+ # This constant holds an arbitrary selection of two characters (digrams) escape sequences
15
+ SampleDigrams = %w[ \n \e \0 \6 \k]
16
+
17
+ # This constant holds an arbitrary selection of escaped octal or hexadecimal literals
18
+ SampleNumEscs = %w[ \0 \07 \x07 \xa \x0F \u03a3 \u{a}]
19
+
20
+ before(:all) do
21
+ # Ensure that the set of codepoints is mapping the set of chars...
22
+ expect(SampleChars.map(&:ord)).to eq(SampleInts)
23
+ end
24
+
25
+ context 'Creation & initialization' do
26
+ it 'should be created with a with an integer value (codepoint) or...' do
27
+ SampleInts.each do |aCodepoint|
28
+ expect { Character.new(aCodepoint) }.not_to raise_error
29
+ end
30
+ end
31
+
32
+ it '...could be created with a single character String or...' do
33
+ SampleChars.each do |aChar|
34
+ expect { Character.new(aChar) }.not_to raise_error
35
+ end
36
+ end
37
+
38
+ it '...could be created with an escape sequence' do
39
+ # Case 1: escape sequence is a digram
40
+ SampleDigrams.each do |anEscapeSeq|
41
+ expect { Character.new(anEscapeSeq) }.not_to raise_error
42
+ end
43
+
44
+ # Case 2: escape sequence is an escaped octal or hexadecimal literal
45
+ SampleNumEscs.each do |anEscapeSeq|
46
+ expect { Character.new(anEscapeSeq) }.not_to raise_error
47
+ end
48
+ end
49
+
50
+ end # context
51
+
52
+ context 'Provided services' do
53
+ it 'Should know its lexeme if created from a string' do
54
+ # Lexeme is defined when the character was initialised from a text
55
+ SampleChars.each do |aChar|
56
+ ch = Character.new(aChar)
57
+ expect(ch.lexeme).to eq(aChar)
58
+ end
59
+ end
60
+
61
+ it 'Should not know its lexeme representation if created from a codepoint' do
62
+ SampleInts.each do |aChar|
63
+ ch = Character.new(aChar)
64
+ expect(ch.lexeme).to be_nil
65
+ end
66
+ end
67
+
68
+ it 'should know its String representation' do
69
+ # Try for one character
70
+ newOne = Character.new(?\u03a3)
71
+ expect(newOne.char).to eq('Σ')
72
+ expect(newOne.to_str).to eq("\u03A3")
73
+
74
+ # Try with our chars sample
75
+ SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
76
+
77
+ # Try with our codepoint sample
78
+ mapped_chars = SampleInts.map { |aCodepoint| Character.new(aCodepoint).char }
79
+ expect(mapped_chars).to eq(SampleChars)
80
+
81
+ # Try with our escape sequence samples
82
+ (SampleDigrams + SampleNumEscs).each do |anEscSeq|
83
+ Character.new(anEscSeq).to_str == String::class_eval(%Q|"#{anEscSeq}"|)
84
+ end
85
+ end
86
+
87
+ it 'should know its codepoint' do
88
+ # Try for one character
89
+ newOne = Character.new(?\u03a3)
90
+ expect(newOne.codepoint).to eq(0x03a3)
91
+
92
+ # Try with our chars sample
93
+ allCodepoints = SampleChars.map { |aChar| Character.new(aChar).codepoint }
94
+ expect(allCodepoints).to eq(SampleInts)
95
+
96
+ # Try with our codepoint sample
97
+ mapped_chars = SampleInts.each { |aCodepoint| expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint) }
98
+
99
+ # Try with our escape sequence samples
100
+ (SampleDigrams + SampleNumEscs).each do |anEscSeq|
101
+ expect(Character.new(anEscSeq).codepoint).to eq(String::class_eval(%Q|"#{anEscSeq}".ord()|))
102
+ end
103
+ end
104
+
105
+ it 'should known whether it is equal to another Object' do
106
+ newOne = Character.new(?\u03a3)
107
+
108
+ # Case 1: test equality with itself
109
+ expect(newOne).to eq(newOne)
110
+
111
+ # Case 2: test equality with another Character
112
+ expect(newOne).to eq(Character.new(?\u03a3))
113
+ expect(newOne).not_to eq(Character.new(?\u0333))
114
+
115
+ # Case 3: test equality with an integer value (equality based on codepoint value)
116
+ expect(newOne).to eq(0x03a3)
117
+ expect(newOne).not_to eq(0x0333)
118
+
119
+ # Case 4: test equality with a single-character String
120
+ expect(newOne).to eq(?\u03a3)
121
+ expect(newOne).not_to eq(?\u0333)
122
+
123
+ # Case 5: test fails with multiple character strings
124
+ expect(newOne).not_to eq('03a3')
125
+
126
+ # Case 6: equality testing with arbitray object
127
+ expect(newOne).not_to eq(nil)
128
+ expect(newOne).not_to eq(Object.new)
129
+
130
+ # In case 6, equality is based on to_s method.
131
+ simulator = double('fake')
132
+ expect(simulator).to receive(:to_s).and_return(?\u03a3)
133
+ expect(newOne).to eq(simulator)
134
+
135
+ # Create a module that re-defines the existing to_s method
136
+ module Tweak_to_s
137
+ def to_s() # Overwrite the existing to_s method
138
+ return ?\u03a3
139
+ end
140
+ end # module
141
+ weird = Object.new
142
+ weird.extend(Tweak_to_s)
143
+ expect(newOne).to eq(weird)
144
+ end
145
+
146
+ it "should know its readable description" do
147
+ ch1 = Character.new('a')
148
+ expect(ch1.explain).to eq("the character 'a'")
149
+
150
+ ch2 = Character.new(?\u03a3)
151
+ expect(ch2.explain).to eq("the character '\u03a3'")
152
+ end
153
+ end # context
154
+
155
+ end # describe
156
+
157
+ end # module
158
+
159
+ # End of file
@@ -57,18 +57,32 @@ module SRL
57
57
  subject.scanner.string = ' 1 '
58
58
  token = subject.tokens.first
59
59
  expect(token).to be_kind_of(Rley::Lexical::Token)
60
- expect(token.terminal.name).to eq('DIGIT')
60
+ expect(token.terminal.name).to eq('DIGIT_LIT')
61
61
  expect(token.lexeme).to eq('1')
62
62
  end
63
63
  end # context
64
64
 
65
+ context 'Character range tokenization:' do
66
+ it "should recognize 'letter from ... to ...'" do
67
+ input = 'letter a to f'
68
+ subject.scanner.string = input
69
+ expectations = [
70
+ ['LETTER', 'letter'],
71
+ ['LETTER_LIT', 'a'],
72
+ ['TO', 'to'],
73
+ ['LETTER_LIT', 'f']
74
+ ]
75
+ match_expectations(subject, expectations)
76
+ end
77
+ end # context
78
+
65
79
  context 'Quantifier tokenization:' do
66
80
  it "should recognize 'exactly ... times'" do
67
81
  input = 'exactly 4 Times'
68
82
  subject.scanner.string = input
69
83
  expectations = [
70
84
  ['EXACTLY', 'exactly'],
71
- ['DIGIT', '4'],
85
+ ['DIGIT_LIT', '4'],
72
86
  ['TIMES', 'Times']
73
87
  ]
74
88
  match_expectations(subject, expectations)
@@ -79,9 +93,9 @@ module SRL
79
93
  subject.scanner.string = input
80
94
  expectations = [
81
95
  ['BETWEEN', 'Between'],
82
- ['DIGIT', '2'],
96
+ ['DIGIT_LIT', '2'],
83
97
  ['AND', 'AND'],
84
- ['DIGIT', '4'],
98
+ ['DIGIT_LIT', '4'],
85
99
  ['TIMES', 'times']
86
100
  ]
87
101
  match_expectations(subject, expectations)
@@ -1,4 +1,5 @@
1
1
  require_relative './lib/parser'
2
+ require_relative './lib/ast_builder'
2
3
 
3
4
  def print_title(aTitle)
4
5
  puts aTitle
@@ -28,13 +29,13 @@ Simple Regex Language parser:
28
29
  - Parses a very limited subset of the language and displays the parse tree
29
30
 
30
31
  Command-line syntax:
31
- ruby #{my_name} "quantifier expression"
32
+ ruby #{my_name} "SRL expression"
32
33
  where:
33
- the SRL quantifier expression is enclosed between double quotes (")
34
+ the SRL expression is enclosed between double quotes (")
34
35
 
35
36
  Examples:
36
- ruby #{my_name} "exactly 4 times"
37
- ruby #{my_name} "between 2 and 3 times"
37
+ ruby #{my_name} "letter from a to f exactly 4 times"
38
+ ruby #{my_name} "uppercase letter between 2 and 3 times"
38
39
  END_MSG
39
40
  puts msg
40
41
  exit(1)
@@ -54,4 +55,13 @@ end
54
55
  cst_ptree = result.parse_tree
55
56
  print_tree('Concrete Syntax Tree (CST)', cst_ptree)
56
57
 
58
+ # Generate a regexp literal representation from the parse result
59
+ tree_builder = ASTBuilder
60
+ ast_ptree = result.parse_tree(tree_builder)
61
+
62
+ # Now output the regexp literal
63
+ root = ast_ptree.root
64
+ print_title('SRL to Regexp representation:')
65
+ puts "#{ARGV[0]} => #{root.to_str}" # Output the expression result
66
+
57
67
  # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.08'.freeze
6
+ Version = '0.5.09'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.08
4
+ version: 0.5.09
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-28 00:00:00.000000000 Z
11
+ date: 2017-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: coveralls
@@ -146,11 +146,22 @@ files:
146
146
  - examples/general/SRL/lib/ast_building.rb
147
147
  - examples/general/SRL/lib/grammar.rb
148
148
  - examples/general/SRL/lib/parser.rb
149
+ - examples/general/SRL/lib/regex/abstract_method.rb
150
+ - examples/general/SRL/lib/regex/atomic_expression.rb
151
+ - examples/general/SRL/lib/regex/char_class.rb
152
+ - examples/general/SRL/lib/regex/char_range.rb
153
+ - examples/general/SRL/lib/regex/character.rb
154
+ - examples/general/SRL/lib/regex/compound_expression.rb
155
+ - examples/general/SRL/lib/regex/expression.rb
156
+ - examples/general/SRL/lib/regex/monadic_expression.rb
149
157
  - examples/general/SRL/lib/regex/multiplicity.rb
158
+ - examples/general/SRL/lib/regex/polyadic_expression.rb
159
+ - examples/general/SRL/lib/regex/quantifiable.rb
160
+ - examples/general/SRL/lib/regex/repetition.rb
150
161
  - examples/general/SRL/lib/regex_repr.rb
151
- - examples/general/SRL/lib/srl_demo.rb
152
162
  - examples/general/SRL/lib/tokenizer.rb
153
163
  - examples/general/SRL/spec/integration_spec.rb
164
+ - examples/general/SRL/spec/regex/character_spec.rb
154
165
  - examples/general/SRL/spec/regex/multiplicity_spec.rb
155
166
  - examples/general/SRL/spec/spec_helper.rb
156
167
  - examples/general/SRL/spec/tokenizer_spec.rb
@@ -1,67 +0,0 @@
1
- require_relative 'parser'
2
- require_relative 'ast_builder'
3
-
4
- def print_title(aTitle)
5
- puts aTitle
6
- puts '=' * aTitle.size
7
- end
8
-
9
- def print_tree(aTitle, aParseTree)
10
- # Let's create a parse tree visitor
11
- visitor = Rley::ParseTreeVisitor.new(aParseTree)
12
-
13
- # Now output formatted parse tree
14
- print_title(aTitle)
15
- renderer = Rley::Formatter::Asciitree.new($stdout)
16
- renderer.render(visitor)
17
- puts ''
18
- end
19
-
20
- # Create a calculator parser object
21
- parser = SRL::Parser.new
22
-
23
- # Parse the input expression in command-line
24
- if ARGV.empty?
25
- my_name = File.basename(__FILE__)
26
- msg = <<-END_MSG
27
- Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
28
- Ultimately it will support SRL in full, currently it parses only the
29
- SRL quantifiers.
30
- The utility prints the resulting regular expression.
31
-
32
- Command-line syntax:
33
- ruby #{my_name} filename
34
- where:
35
- the file name is a SRL source file.
36
-
37
- Examples:
38
- ruby #{my_name} sample01.srl
39
- END_MSG
40
- puts msg
41
- exit(1)
42
- end
43
- puts ARGV[0]
44
- result = parser.parse_expression(ARGV[0])
45
-
46
- unless result.success?
47
- # Stop if the parse failed...
48
- puts "Parsing of '#{ARGV[0]}' failed"
49
- puts "Reason: #{result.failure_reason.message}"
50
- exit(1)
51
- end
52
-
53
-
54
- # Generate a concrete syntax parse tree from the parse result
55
- cst_ptree = result.parse_tree
56
- print_tree('Concrete Syntax Tree (CST)', cst_ptree)
57
-
58
- # Generate an abstract syntax parse tree from the parse result
59
- tree_builder = ASTBuilder
60
- ast_ptree = result.parse_tree(tree_builder)
61
- # print_tree('Abstract Syntax Tree (AST)', ast_ptree)
62
-
63
- # # Now perform the computation of math expression
64
- # root = ast_ptree.root
65
- # print_title('Result:')
66
- # puts root.interpret.to_s # Output the expression result
67
- # End of file