rley 0.5.08 → 0.5.09

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ # File: character_spec.rb
2
+ require_relative '../spec_helper' # Use the RSpec test framework
3
+ require_relative '../../lib/regex/character'
4
+
5
+ module Regex # Open this namespace, to get rid of scope qualifiers
6
+
7
+ describe Character do
8
+ # This constant holds an arbitrary selection of characters
9
+ SampleChars = [?a, ?\0, ?\u0107]
10
+
11
+ # This constant holds the codepoints of the character selection
12
+ SampleInts = [0x61, 0, 0x0107]
13
+
14
+ # This constant holds an arbitrary selection of two characters (digrams) escape sequences
15
+ SampleDigrams = %w[ \n \e \0 \6 \k]
16
+
17
+ # This constant holds an arbitrary selection of escaped octal or hexadecimal literals
18
+ SampleNumEscs = %w[ \0 \07 \x07 \xa \x0F \u03a3 \u{a}]
19
+
20
+ before(:all) do
21
+ # Ensure that the set of codepoints is mapping the set of chars...
22
+ expect(SampleChars.map(&:ord)).to eq(SampleInts)
23
+ end
24
+
25
+ context 'Creation & initialization' do
26
+ it 'should be created with a with an integer value (codepoint) or...' do
27
+ SampleInts.each do |aCodepoint|
28
+ expect { Character.new(aCodepoint) }.not_to raise_error
29
+ end
30
+ end
31
+
32
+ it '...could be created with a single character String or...' do
33
+ SampleChars.each do |aChar|
34
+ expect { Character.new(aChar) }.not_to raise_error
35
+ end
36
+ end
37
+
38
+ it '...could be created with an escape sequence' do
39
+ # Case 1: escape sequence is a digram
40
+ SampleDigrams.each do |anEscapeSeq|
41
+ expect { Character.new(anEscapeSeq) }.not_to raise_error
42
+ end
43
+
44
+ # Case 2: escape sequence is an escaped octal or hexadecimal literal
45
+ SampleNumEscs.each do |anEscapeSeq|
46
+ expect { Character.new(anEscapeSeq) }.not_to raise_error
47
+ end
48
+ end
49
+
50
+ end # context
51
+
52
+ context 'Provided services' do
53
+ it 'Should know its lexeme if created from a string' do
54
+ # Lexeme is defined when the character was initialised from a text
55
+ SampleChars.each do |aChar|
56
+ ch = Character.new(aChar)
57
+ expect(ch.lexeme).to eq(aChar)
58
+ end
59
+ end
60
+
61
+ it 'Should not know its lexeme representation if created from a codepoint' do
62
+ SampleInts.each do |aChar|
63
+ ch = Character.new(aChar)
64
+ expect(ch.lexeme).to be_nil
65
+ end
66
+ end
67
+
68
+ it 'should know its String representation' do
69
+ # Try for one character
70
+ newOne = Character.new(?\u03a3)
71
+ expect(newOne.char).to eq('Σ')
72
+ expect(newOne.to_str).to eq("\u03A3")
73
+
74
+ # Try with our chars sample
75
+ SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
76
+
77
+ # Try with our codepoint sample
78
+ mapped_chars = SampleInts.map { |aCodepoint| Character.new(aCodepoint).char }
79
+ expect(mapped_chars).to eq(SampleChars)
80
+
81
+ # Try with our escape sequence samples
82
+ (SampleDigrams + SampleNumEscs).each do |anEscSeq|
83
+ Character.new(anEscSeq).to_str == String::class_eval(%Q|"#{anEscSeq}"|)
84
+ end
85
+ end
86
+
87
+ it 'should know its codepoint' do
88
+ # Try for one character
89
+ newOne = Character.new(?\u03a3)
90
+ expect(newOne.codepoint).to eq(0x03a3)
91
+
92
+ # Try with our chars sample
93
+ allCodepoints = SampleChars.map { |aChar| Character.new(aChar).codepoint }
94
+ expect(allCodepoints).to eq(SampleInts)
95
+
96
+ # Try with our codepoint sample
97
+ mapped_chars = SampleInts.each { |aCodepoint| expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint) }
98
+
99
+ # Try with our escape sequence samples
100
+ (SampleDigrams + SampleNumEscs).each do |anEscSeq|
101
+ expect(Character.new(anEscSeq).codepoint).to eq(String::class_eval(%Q|"#{anEscSeq}".ord()|))
102
+ end
103
+ end
104
+
105
+ it 'should known whether it is equal to another Object' do
106
+ newOne = Character.new(?\u03a3)
107
+
108
+ # Case 1: test equality with itself
109
+ expect(newOne).to eq(newOne)
110
+
111
+ # Case 2: test equality with another Character
112
+ expect(newOne).to eq(Character.new(?\u03a3))
113
+ expect(newOne).not_to eq(Character.new(?\u0333))
114
+
115
+ # Case 3: test equality with an integer value (equality based on codepoint value)
116
+ expect(newOne).to eq(0x03a3)
117
+ expect(newOne).not_to eq(0x0333)
118
+
119
+ # Case 4: test equality with a single-character String
120
+ expect(newOne).to eq(?\u03a3)
121
+ expect(newOne).not_to eq(?\u0333)
122
+
123
+ # Case 5: test fails with multiple character strings
124
+ expect(newOne).not_to eq('03a3')
125
+
126
+ # Case 6: equality testing with arbitray object
127
+ expect(newOne).not_to eq(nil)
128
+ expect(newOne).not_to eq(Object.new)
129
+
130
+ # In case 6, equality is based on to_s method.
131
+ simulator = double('fake')
132
+ expect(simulator).to receive(:to_s).and_return(?\u03a3)
133
+ expect(newOne).to eq(simulator)
134
+
135
+ # Create a module that re-defines the existing to_s method
136
+ module Tweak_to_s
137
+ def to_s() # Overwrite the existing to_s method
138
+ return ?\u03a3
139
+ end
140
+ end # module
141
+ weird = Object.new
142
+ weird.extend(Tweak_to_s)
143
+ expect(newOne).to eq(weird)
144
+ end
145
+
146
+ it "should know its readable description" do
147
+ ch1 = Character.new('a')
148
+ expect(ch1.explain).to eq("the character 'a'")
149
+
150
+ ch2 = Character.new(?\u03a3)
151
+ expect(ch2.explain).to eq("the character '\u03a3'")
152
+ end
153
+ end # context
154
+
155
+ end # describe
156
+
157
+ end # module
158
+
159
+ # End of file
@@ -57,18 +57,32 @@ module SRL
57
57
  subject.scanner.string = ' 1 '
58
58
  token = subject.tokens.first
59
59
  expect(token).to be_kind_of(Rley::Lexical::Token)
60
- expect(token.terminal.name).to eq('DIGIT')
60
+ expect(token.terminal.name).to eq('DIGIT_LIT')
61
61
  expect(token.lexeme).to eq('1')
62
62
  end
63
63
  end # context
64
64
 
65
+ context 'Character range tokenization:' do
66
+ it "should recognize 'letter from ... to ...'" do
67
+ input = 'letter a to f'
68
+ subject.scanner.string = input
69
+ expectations = [
70
+ ['LETTER', 'letter'],
71
+ ['LETTER_LIT', 'a'],
72
+ ['TO', 'to'],
73
+ ['LETTER_LIT', 'f']
74
+ ]
75
+ match_expectations(subject, expectations)
76
+ end
77
+ end # context
78
+
65
79
  context 'Quantifier tokenization:' do
66
80
  it "should recognize 'exactly ... times'" do
67
81
  input = 'exactly 4 Times'
68
82
  subject.scanner.string = input
69
83
  expectations = [
70
84
  ['EXACTLY', 'exactly'],
71
- ['DIGIT', '4'],
85
+ ['DIGIT_LIT', '4'],
72
86
  ['TIMES', 'Times']
73
87
  ]
74
88
  match_expectations(subject, expectations)
@@ -79,9 +93,9 @@ module SRL
79
93
  subject.scanner.string = input
80
94
  expectations = [
81
95
  ['BETWEEN', 'Between'],
82
- ['DIGIT', '2'],
96
+ ['DIGIT_LIT', '2'],
83
97
  ['AND', 'AND'],
84
- ['DIGIT', '4'],
98
+ ['DIGIT_LIT', '4'],
85
99
  ['TIMES', 'times']
86
100
  ]
87
101
  match_expectations(subject, expectations)
@@ -1,4 +1,5 @@
1
1
  require_relative './lib/parser'
2
+ require_relative './lib/ast_builder'
2
3
 
3
4
  def print_title(aTitle)
4
5
  puts aTitle
@@ -28,13 +29,13 @@ Simple Regex Language parser:
28
29
  - Parses a very limited subset of the language and displays the parse tree
29
30
 
30
31
  Command-line syntax:
31
- ruby #{my_name} "quantifier expression"
32
+ ruby #{my_name} "SRL expression"
32
33
  where:
33
- the SRL quantifier expression is enclosed between double quotes (")
34
+ the SRL expression is enclosed between double quotes (")
34
35
 
35
36
  Examples:
36
- ruby #{my_name} "exactly 4 times"
37
- ruby #{my_name} "between 2 and 3 times"
37
+ ruby #{my_name} "letter from a to f exactly 4 times"
38
+ ruby #{my_name} "uppercase letter between 2 and 3 times"
38
39
  END_MSG
39
40
  puts msg
40
41
  exit(1)
@@ -54,4 +55,13 @@ end
54
55
  cst_ptree = result.parse_tree
55
56
  print_tree('Concrete Syntax Tree (CST)', cst_ptree)
56
57
 
58
+ # Generate a regexp literal representation from the parse result
59
+ tree_builder = ASTBuilder
60
+ ast_ptree = result.parse_tree(tree_builder)
61
+
62
+ # Now output the regexp literal
63
+ root = ast_ptree.root
64
+ print_title('SRL to Regexp representation:')
65
+ puts "#{ARGV[0]} => #{root.to_str}" # Output the expression result
66
+
57
67
  # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.08'.freeze
6
+ Version = '0.5.09'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.08
4
+ version: 0.5.09
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-28 00:00:00.000000000 Z
11
+ date: 2017-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: coveralls
@@ -146,11 +146,22 @@ files:
146
146
  - examples/general/SRL/lib/ast_building.rb
147
147
  - examples/general/SRL/lib/grammar.rb
148
148
  - examples/general/SRL/lib/parser.rb
149
+ - examples/general/SRL/lib/regex/abstract_method.rb
150
+ - examples/general/SRL/lib/regex/atomic_expression.rb
151
+ - examples/general/SRL/lib/regex/char_class.rb
152
+ - examples/general/SRL/lib/regex/char_range.rb
153
+ - examples/general/SRL/lib/regex/character.rb
154
+ - examples/general/SRL/lib/regex/compound_expression.rb
155
+ - examples/general/SRL/lib/regex/expression.rb
156
+ - examples/general/SRL/lib/regex/monadic_expression.rb
149
157
  - examples/general/SRL/lib/regex/multiplicity.rb
158
+ - examples/general/SRL/lib/regex/polyadic_expression.rb
159
+ - examples/general/SRL/lib/regex/quantifiable.rb
160
+ - examples/general/SRL/lib/regex/repetition.rb
150
161
  - examples/general/SRL/lib/regex_repr.rb
151
- - examples/general/SRL/lib/srl_demo.rb
152
162
  - examples/general/SRL/lib/tokenizer.rb
153
163
  - examples/general/SRL/spec/integration_spec.rb
164
+ - examples/general/SRL/spec/regex/character_spec.rb
154
165
  - examples/general/SRL/spec/regex/multiplicity_spec.rb
155
166
  - examples/general/SRL/spec/spec_helper.rb
156
167
  - examples/general/SRL/spec/tokenizer_spec.rb
@@ -1,67 +0,0 @@
1
- require_relative 'parser'
2
- require_relative 'ast_builder'
3
-
4
- def print_title(aTitle)
5
- puts aTitle
6
- puts '=' * aTitle.size
7
- end
8
-
9
- def print_tree(aTitle, aParseTree)
10
- # Let's create a parse tree visitor
11
- visitor = Rley::ParseTreeVisitor.new(aParseTree)
12
-
13
- # Now output formatted parse tree
14
- print_title(aTitle)
15
- renderer = Rley::Formatter::Asciitree.new($stdout)
16
- renderer.render(visitor)
17
- puts ''
18
- end
19
-
20
- # Create a calculator parser object
21
- parser = SRL::Parser.new
22
-
23
- # Parse the input expression in command-line
24
- if ARGV.empty?
25
- my_name = File.basename(__FILE__)
26
- msg = <<-END_MSG
27
- Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
28
- Ultimately it will support SRL in full, currently it parses only the
29
- SRL quantifiers.
30
- The utility prints the resulting regular expression.
31
-
32
- Command-line syntax:
33
- ruby #{my_name} filename
34
- where:
35
- the file name is a SRL source file.
36
-
37
- Examples:
38
- ruby #{my_name} sample01.srl
39
- END_MSG
40
- puts msg
41
- exit(1)
42
- end
43
- puts ARGV[0]
44
- result = parser.parse_expression(ARGV[0])
45
-
46
- unless result.success?
47
- # Stop if the parse failed...
48
- puts "Parsing of '#{ARGV[0]}' failed"
49
- puts "Reason: #{result.failure_reason.message}"
50
- exit(1)
51
- end
52
-
53
-
54
- # Generate a concrete syntax parse tree from the parse result
55
- cst_ptree = result.parse_tree
56
- print_tree('Concrete Syntax Tree (CST)', cst_ptree)
57
-
58
- # Generate an abstract syntax parse tree from the parse result
59
- tree_builder = ASTBuilder
60
- ast_ptree = result.parse_tree(tree_builder)
61
- # print_tree('Abstract Syntax Tree (AST)', ast_ptree)
62
-
63
- # # Now perform the computation of math expression
64
- # root = ast_ptree.root
65
- # print_title('Result:')
66
- # puts root.interpret.to_s # Output the expression result
67
- # End of file