rley 0.6.01 → 0.6.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -1
  3. data/examples/NLP/engtagger.rb +58 -60
  4. data/lib/rley/constants.rb +1 -1
  5. metadata +2 -33
  6. data/examples/general/SRL/lib/ast_builder.rb +0 -382
  7. data/examples/general/SRL/lib/grammar.rb +0 -106
  8. data/examples/general/SRL/lib/regex/abstract_method.rb +0 -35
  9. data/examples/general/SRL/lib/regex/alternation.rb +0 -27
  10. data/examples/general/SRL/lib/regex/anchor.rb +0 -45
  11. data/examples/general/SRL/lib/regex/atomic_expression.rb +0 -16
  12. data/examples/general/SRL/lib/regex/capturing_group.rb +0 -51
  13. data/examples/general/SRL/lib/regex/char_class.rb +0 -38
  14. data/examples/general/SRL/lib/regex/char_range.rb +0 -51
  15. data/examples/general/SRL/lib/regex/char_shorthand.rb +0 -50
  16. data/examples/general/SRL/lib/regex/character.rb +0 -204
  17. data/examples/general/SRL/lib/regex/compound_expression.rb +0 -57
  18. data/examples/general/SRL/lib/regex/concatenation.rb +0 -29
  19. data/examples/general/SRL/lib/regex/expression.rb +0 -60
  20. data/examples/general/SRL/lib/regex/lookaround.rb +0 -50
  21. data/examples/general/SRL/lib/regex/match_option.rb +0 -34
  22. data/examples/general/SRL/lib/regex/monadic_expression.rb +0 -28
  23. data/examples/general/SRL/lib/regex/multiplicity.rb +0 -91
  24. data/examples/general/SRL/lib/regex/non_capturing_group.rb +0 -27
  25. data/examples/general/SRL/lib/regex/polyadic_expression.rb +0 -60
  26. data/examples/general/SRL/lib/regex/quantifiable.rb +0 -22
  27. data/examples/general/SRL/lib/regex/repetition.rb +0 -29
  28. data/examples/general/SRL/lib/regex/wildcard.rb +0 -23
  29. data/examples/general/SRL/lib/regex_repr.rb +0 -13
  30. data/examples/general/SRL/lib/tokenizer.rb +0 -147
  31. data/examples/general/SRL/spec/integration_spec.rb +0 -448
  32. data/examples/general/SRL/spec/regex/character_spec.rb +0 -166
  33. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +0 -79
  34. data/examples/general/SRL/spec/spec_helper.rb +0 -25
  35. data/examples/general/SRL/spec/tokenizer_spec.rb +0 -148
  36. data/examples/general/SRL/srl_demo.rb +0 -75
@@ -1,166 +0,0 @@
1
- # File: character_spec.rb
2
- require_relative '../spec_helper' # Use the RSpec test framework
3
- require_relative '../../lib/regex/character'
4
-
5
- module Regex # Open this namespace, to get rid of scope qualifiers
6
- describe Character do
7
- # This constant holds an arbitrary selection of characters
8
- SampleChars = [?a, ?\0, ?\u0107].freeze
9
-
10
- # This constant holds the codepoints of the character selection
11
- SampleInts = [0x61, 0, 0x0107].freeze
12
-
13
- # This constant holds an arbitrary selection of two characters (digrams)
14
- # escape sequences
15
- SampleDigrams = %w[\n \e \0 \6 \k].freeze
16
-
17
- # This constant holds an arbitrary selection of escaped octal
18
- # or hexadecimal literals
19
- SampleNumEscs = %w[\0 \07 \x07 \xa \x0F \u03a3 \u{a}].freeze
20
-
21
- before(:all) do
22
- # Ensure that the set of codepoints is mapping the set of chars...
23
- expect(SampleChars.map(&:ord)).to eq(SampleInts)
24
- end
25
-
26
- context 'Creation & initialization' do
27
- it 'should be created with a with an integer value (codepoint) or...' do
28
- SampleInts.each do |aCodepoint|
29
- expect { Character.new(aCodepoint) }.not_to raise_error
30
- end
31
- end
32
-
33
- it '...could be created with a single character String or...' do
34
- SampleChars.each do |aChar|
35
- expect { Character.new(aChar) }.not_to raise_error
36
- end
37
- end
38
-
39
- it '...could be created with an escape sequence' do
40
- # Case 1: escape sequence is a digram
41
- SampleDigrams.each do |anEscapeSeq|
42
- expect { Character.new(anEscapeSeq) }.not_to raise_error
43
- end
44
-
45
- # Case 2: escape sequence is an escaped octal or hexadecimal literal
46
- SampleNumEscs.each do |anEscapeSeq|
47
- expect { Character.new(anEscapeSeq) }.not_to raise_error
48
- end
49
- end
50
- end # context
51
-
52
- context 'Provided services' do
53
- it 'Should know its lexeme if created from a string' do
54
- # Lexeme is defined when the character was initialised from a text
55
- SampleChars.each do |aChar|
56
- ch = Character.new(aChar)
57
- expect(ch.lexeme).to eq(aChar)
58
- end
59
- end
60
-
61
- it 'Should not know its lexeme representation from a codepoint' do
62
- SampleInts.each do |aChar|
63
- ch = Character.new(aChar)
64
- expect(ch.lexeme).to be_nil
65
- end
66
- end
67
-
68
- it 'should know its String representation' do
69
- # Try for one character
70
- newOne = Character.new(?\u03a3)
71
- expect(newOne.char).to eq('Σ')
72
- expect(newOne.to_str).to eq("\u03A3")
73
-
74
- # Try with our chars sample
75
- SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
76
-
77
- # Try with our codepoint sample
78
- mapped_chars = SampleInts.map do |aCodepoint|
79
- Character.new(aCodepoint).char
80
- end
81
- expect(mapped_chars).to eq(SampleChars)
82
-
83
- # Try with our escape sequence samples
84
- (SampleDigrams + SampleNumEscs).each do |anEscSeq|
85
- expectation = String.class_eval(%Q|"#{anEscSeq}"|, __FILE__, __LINE__)
86
- Character.new(anEscSeq).to_str == expectation
87
- end
88
- end
89
-
90
- it 'should know its codepoint' do
91
- # Try for one character
92
- newOne = Character.new(?\u03a3)
93
- expect(newOne.codepoint).to eq(0x03a3)
94
-
95
- # Try with our chars sample
96
- allCodepoints = SampleChars.map do |aChar|
97
- Character.new(aChar).codepoint
98
- end
99
- expect(allCodepoints).to eq(SampleInts)
100
-
101
- # Try with our codepoint sample
102
- mapped_chars = SampleInts.each do |aCodepoint|
103
- expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint)
104
- end
105
-
106
- # Try with our escape sequence samples
107
- (SampleDigrams + SampleNumEscs).each do |anEscSeq|
108
- expectation = String.class_eval(%Q|"#{anEscSeq}".ord()|, __FILE__, __LINE__)
109
- expect(Character.new(anEscSeq).codepoint).to eq(expectation)
110
- end
111
- end
112
-
113
- it 'should known whether it is equal to another Object' do
114
- newOne = Character.new(?\u03a3)
115
-
116
- # Case 1: test equality with itself
117
- expect(newOne).to eq(newOne)
118
-
119
- # Case 2: test equality with another Character
120
- expect(newOne).to eq(Character.new(?\u03a3))
121
- expect(newOne).not_to eq(Character.new(?\u0333))
122
-
123
- # Case 3: test equality with an integer value
124
- # (equality based on codepoint value)
125
- expect(newOne).to eq(0x03a3)
126
- expect(newOne).not_to eq(0x0333)
127
-
128
- # Case 4: test equality with a single-character String
129
- expect(newOne).to eq(?\u03a3)
130
- expect(newOne).not_to eq(?\u0333)
131
-
132
- # Case 5: test fails with multiple character strings
133
- expect(newOne).not_to eq('03a3')
134
-
135
- # Case 6: equality testing with arbitray object
136
- expect(newOne).not_to eq(nil)
137
- expect(newOne).not_to eq(Object.new)
138
-
139
- # In case 6, equality is based on to_s method.
140
- simulator = double('fake')
141
- expect(simulator).to receive(:to_s).and_return(?\u03a3)
142
- expect(newOne).to eq(simulator)
143
-
144
- # Create a module that re-defines the existing to_s method
145
- module Tweak_to_s
146
- def to_s() # Overwrite the existing to_s method
147
- return ?\u03a3
148
- end
149
- end # module
150
- weird = Object.new
151
- weird.extend(Tweak_to_s)
152
- expect(newOne).to eq(weird)
153
- end
154
-
155
- it 'should know its readable description' do
156
- ch1 = Character.new('a')
157
- expect(ch1.explain).to eq("the character 'a'")
158
-
159
- ch2 = Character.new(?\u03a3)
160
- expect(ch2.explain).to eq("the character '\u03a3'")
161
- end
162
- end # context
163
- end # describe
164
- end # module
165
-
166
- # End of file
@@ -1,79 +0,0 @@
1
- # File: Multiplicity_spec.rb
2
-
3
- require_relative '../spec_helper' # Use the RSpec test framework
4
- require_relative '../../lib/regex/multiplicity'
5
-
6
- module SRL
7
- # Reopen the module, in order to get rid of fully qualified names
8
- module Regex # This module is used as a namespace
9
- describe Multiplicity do
10
- context 'Creation & initialisation' do
11
- it 'should be created with 3 arguments' do
12
- # Valid cases: initialized with two integer values and a policy symbol
13
- %i[greedy lazy possessive].each do |aPolicy|
14
- expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
15
- end
16
-
17
- # Invalid case: initialized with invalid policy value
18
- err = StandardError
19
- msg = "Invalid repetition policy 'KO'."
20
- expect { Multiplicity.new(0, :more, 'KO') }.to raise_error(err, msg)
21
- end
22
- end
23
-
24
- context 'Provided services' do
25
- it 'should know its text representation' do
26
- policy2text = { greedy: '', lazy: '?', possessive: '+' }
27
-
28
- # Case: zero or one
29
- policy2text.each_key do |aPolicy|
30
- multi = Multiplicity.new(0, 1, aPolicy)
31
- expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
32
- end
33
-
34
- # Case: zero or more
35
- policy2text.each_key do |aPolicy|
36
- multi = Multiplicity.new(0, :more, aPolicy)
37
- expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
38
- end
39
-
40
- # Case: one or more
41
- policy2text.each_key do |aPolicy|
42
- multi = Multiplicity.new(1, :more, aPolicy)
43
- expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
44
- end
45
-
46
- # Case: exactly m times
47
- policy2text.each_key do |aPolicy|
48
- samples = [1, 2, 5, 100]
49
- samples.each do |aCount|
50
- multi = Multiplicity.new(aCount, aCount, aPolicy)
51
- expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
52
- end
53
- end
54
-
55
- # Case: m, n times
56
- policy2text.each_key do |aPolicy|
57
- samples = [1, 2, 5, 100]
58
- samples.each do |aCount|
59
- upper = aCount + 1 + rand(20)
60
- multi = Multiplicity.new(aCount, upper, aPolicy)
61
- expectation = "{#{aCount},#{upper}}#{policy2text[aPolicy]}"
62
- expect(multi.to_str).to eq(expectation)
63
- end
64
- end
65
-
66
- # Case: m or more
67
- policy2text.each_key do |aPolicy|
68
- samples = [2, 3, 5, 100]
69
- samples.each do |aCount|
70
- multi = Multiplicity.new(aCount, :more, aPolicy)
71
- expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
72
- end
73
- end
74
- end
75
- end
76
- end
77
- end # module
78
- end # module
79
- # End of file
@@ -1,25 +0,0 @@
1
- # File: spec_helper.rb
2
- # Purpose: utility file that is loaded by all our RSpec files
3
-
4
- require 'simplecov'
5
-
6
- SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new(
7
- [
8
- SimpleCov::Formatter::HTMLFormatter
9
- ]
10
- )
11
-
12
- require 'pp' # Use pretty-print for debugging purposes
13
- require 'rspec' # Use the RSpec framework
14
-
15
- RSpec.configure do |config|
16
- config.expect_with :rspec do |c|
17
- # Disable the `should` syntax...
18
- c.syntax = :expect
19
- end
20
-
21
- # Display stack trace in case of failure
22
- config.full_backtrace = true
23
- end
24
-
25
- # End of file
@@ -1,148 +0,0 @@
1
- require_relative 'spec_helper' # Use the RSpec framework
2
- require_relative '../lib/tokenizer' # Load the class under test
3
-
4
-
5
- module SRL
6
- describe Tokenizer do
7
- def match_expectations(aTokenizer, theExpectations)
8
- aTokenizer.tokens.each_with_index do |token, i|
9
- terminal, lexeme = theExpectations[i]
10
- expect(token.terminal).to eq(terminal)
11
- expect(token.lexeme).to eq(lexeme)
12
- end
13
- end
14
-
15
- subject { Tokenizer.new('') }
16
-
17
- context 'Initialization:' do
18
- it 'should be initialized with a text to tokenize and a grammar' do
19
- expect { Tokenizer.new('anything') }.not_to raise_error
20
- end
21
-
22
- it 'should have its scanner initialized' do
23
- expect(subject.scanner).to be_kind_of(StringScanner)
24
- end
25
- end # context
26
-
27
- context 'Single token recognition:' do
28
- it 'should tokenize delimiters and separators' do
29
- subject.scanner.string = ','
30
- token = subject.tokens.first
31
- expect(token).to be_kind_of(Rley::Lexical::Token)
32
- expect(token.terminal).to eq('COMMA')
33
- expect(token.lexeme).to eq(',')
34
- end
35
-
36
- it 'should tokenize keywords' do
37
- sample = 'between Exactly oncE optional TWICE'
38
- subject.scanner.string = sample
39
- subject.tokens.each do |tok|
40
- expect(tok).to be_kind_of(Rley::Lexical::Token)
41
- expect(tok.terminal).to eq(tok.lexeme.upcase)
42
- end
43
- end
44
-
45
- it 'should tokenize integer values' do
46
- subject.scanner.string = ' 123 '
47
- token = subject.tokens.first
48
- expect(token).to be_kind_of(Rley::Lexical::Token)
49
- expect(token.terminal).to eq('INTEGER')
50
- expect(token.lexeme).to eq('123')
51
- end
52
-
53
- it 'should tokenize single digits' do
54
- subject.scanner.string = ' 1 '
55
- token = subject.tokens.first
56
- expect(token).to be_kind_of(Rley::Lexical::Token)
57
- expect(token.terminal).to eq('DIGIT_LIT')
58
- expect(token.lexeme).to eq('1')
59
- end
60
- end # context
61
-
62
- context 'String literal tokenization:' do
63
- it "should recognize 'literally ...'" do
64
- input = 'literally "hello"'
65
- subject.scanner.string = input
66
- expectations = [
67
- %w[LITERALLY literally],
68
- %w[STRING_LIT hello]
69
- ]
70
- match_expectations(subject, expectations)
71
- end
72
- end # context
73
-
74
- context 'Character range tokenization:' do
75
- it "should recognize 'letter from ... to ...'" do
76
- input = 'letter a to f'
77
- subject.scanner.string = input
78
- expectations = [
79
- %w[LETTER letter],
80
- %w[LETTER_LIT a],
81
- %w[TO to],
82
- %w[LETTER_LIT f]
83
- ]
84
- match_expectations(subject, expectations)
85
- end
86
- end # context
87
-
88
- context 'Quantifier tokenization:' do
89
- it "should recognize 'exactly ... times'" do
90
- input = 'exactly 4 Times'
91
- subject.scanner.string = input
92
- expectations = [
93
- %w[EXACTLY exactly],
94
- %w[DIGIT_LIT 4],
95
- %w[TIMES Times]
96
- ]
97
- match_expectations(subject, expectations)
98
- end
99
-
100
- it "should recognize 'between ... and ... times'" do
101
- input = 'Between 2 AND 4 times'
102
- subject.scanner.string = input
103
- expectations = [
104
- %w[BETWEEN Between],
105
- %w[DIGIT_LIT 2],
106
- %w[AND AND],
107
- %w[DIGIT_LIT 4],
108
- %w[TIMES times]
109
- ]
110
- match_expectations(subject, expectations)
111
- end
112
-
113
- it "should recognize 'once or more'" do
114
- input = 'Once or MORE'
115
- subject.scanner.string = input
116
- expectations = [
117
- %w[ONCE Once],
118
- %w[OR or],
119
- %w[MORE MORE]
120
- ]
121
- match_expectations(subject, expectations)
122
- end
123
-
124
- it "should recognize 'never or more'" do
125
- input = 'never or more'
126
- subject.scanner.string = input
127
- expectations = [
128
- %w[NEVER never],
129
- %w[OR or],
130
- %w[MORE more]
131
- ]
132
- match_expectations(subject, expectations)
133
- end
134
-
135
- it "should recognize 'at least ... times'" do
136
- input = 'at least 10 times'
137
- subject.scanner.string = input
138
- expectations = [
139
- %w[AT at],
140
- %w[LEAST least],
141
- %w[INTEGER 10],
142
- %w[TIMES times]
143
- ]
144
- match_expectations(subject, expectations)
145
- end
146
- end # context
147
- end # describe
148
- end # module
@@ -1,75 +0,0 @@
1
- require_relative './lib/tokenizer'
2
- require_relative './lib/grammar'
3
- require_relative './lib/ast_builder'
4
-
5
- def print_title(aTitle)
6
- puts aTitle
7
- puts '=' * aTitle.size
8
- end
9
-
10
- def print_tree(aTitle, aParseTree)
11
- # Let's create a parse tree visitor
12
- visitor = Rley::ParseTreeVisitor.new(aParseTree)
13
-
14
- # Now output formatted parse tree
15
- print_title(aTitle)
16
- renderer = Rley::Formatter::Asciitree.new($stdout)
17
- renderer.render(visitor)
18
- puts ''
19
- end
20
-
21
- # Parse the input expression in command-line
22
- if ARGV.empty?
23
- my_name = File.basename(__FILE__)
24
- msg = <<-END_MSG
25
- WORK IN PROGRESS
26
- Simple Regex Language parser:
27
- - Parses a very limited subset of the language and displays the parse tree
28
-
29
- Command-line syntax:
30
- ruby #{my_name} "SRL expression"
31
- where:
32
- the SRL expression is enclosed between double quotes (")
33
-
34
- Examples:
35
- ruby #{my_name} "letter from a to f exactly 4 times"
36
- ruby #{my_name} "uppercase letter between 2 and 3 times"
37
- ruby #{my_name} "digit from 0 to 7 once or more"
38
- END_MSG
39
- puts msg
40
- exit(1)
41
- end
42
- puts ARGV[0]
43
-
44
- # Create a Rley facade object
45
- engine = Rley::Engine.new
46
-
47
- ########################################
48
- # Step 1. Load a grammar for calculator
49
- engine.use_grammar(SRL::Grammar)
50
-
51
- lexer = SRL::Tokenizer.new(ARGV[0])
52
- result = engine.parse(lexer.tokens)
53
-
54
- unless result.success?
55
- # Stop if the parse failed...
56
- puts "Parsing of '#{ARGV[0]}' failed"
57
- puts "Reason: #{result.failure_reason.message}"
58
- exit(1)
59
- end
60
-
61
-
62
- # Generate a concrete syntax parse tree from the parse result
63
- cst_ptree = engine.convert(result)
64
- print_tree('Concrete Syntax Tree (CST)', cst_ptree)
65
-
66
- # Generate an abstract syntax tree (AST) from the parse result
67
- engine.configuration.repr_builder = ASTBuilder
68
- ast_ptree = engine.convert(result)
69
-
70
- # Now output the regexp literal
71
- root = ast_ptree.root
72
- print_title('SRL to Regexp representation:')
73
- puts "#{ARGV[0]} => #{root.to_str}" # Output the expression result
74
-
75
- # End of file