rley 0.6.01 → 0.6.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -1
  3. data/examples/NLP/engtagger.rb +58 -60
  4. data/lib/rley/constants.rb +1 -1
  5. metadata +2 -33
  6. data/examples/general/SRL/lib/ast_builder.rb +0 -382
  7. data/examples/general/SRL/lib/grammar.rb +0 -106
  8. data/examples/general/SRL/lib/regex/abstract_method.rb +0 -35
  9. data/examples/general/SRL/lib/regex/alternation.rb +0 -27
  10. data/examples/general/SRL/lib/regex/anchor.rb +0 -45
  11. data/examples/general/SRL/lib/regex/atomic_expression.rb +0 -16
  12. data/examples/general/SRL/lib/regex/capturing_group.rb +0 -51
  13. data/examples/general/SRL/lib/regex/char_class.rb +0 -38
  14. data/examples/general/SRL/lib/regex/char_range.rb +0 -51
  15. data/examples/general/SRL/lib/regex/char_shorthand.rb +0 -50
  16. data/examples/general/SRL/lib/regex/character.rb +0 -204
  17. data/examples/general/SRL/lib/regex/compound_expression.rb +0 -57
  18. data/examples/general/SRL/lib/regex/concatenation.rb +0 -29
  19. data/examples/general/SRL/lib/regex/expression.rb +0 -60
  20. data/examples/general/SRL/lib/regex/lookaround.rb +0 -50
  21. data/examples/general/SRL/lib/regex/match_option.rb +0 -34
  22. data/examples/general/SRL/lib/regex/monadic_expression.rb +0 -28
  23. data/examples/general/SRL/lib/regex/multiplicity.rb +0 -91
  24. data/examples/general/SRL/lib/regex/non_capturing_group.rb +0 -27
  25. data/examples/general/SRL/lib/regex/polyadic_expression.rb +0 -60
  26. data/examples/general/SRL/lib/regex/quantifiable.rb +0 -22
  27. data/examples/general/SRL/lib/regex/repetition.rb +0 -29
  28. data/examples/general/SRL/lib/regex/wildcard.rb +0 -23
  29. data/examples/general/SRL/lib/regex_repr.rb +0 -13
  30. data/examples/general/SRL/lib/tokenizer.rb +0 -147
  31. data/examples/general/SRL/spec/integration_spec.rb +0 -448
  32. data/examples/general/SRL/spec/regex/character_spec.rb +0 -166
  33. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +0 -79
  34. data/examples/general/SRL/spec/spec_helper.rb +0 -25
  35. data/examples/general/SRL/spec/tokenizer_spec.rb +0 -148
  36. data/examples/general/SRL/srl_demo.rb +0 -75
@@ -1,166 +0,0 @@
1
- # File: character_spec.rb
2
- require_relative '../spec_helper' # Use the RSpec test framework
3
- require_relative '../../lib/regex/character'
4
-
5
- module Regex # Open this namespace, to get rid of scope qualifiers
6
- describe Character do
7
- # This constant holds an arbitrary selection of characters
8
- SampleChars = [?a, ?\0, ?\u0107].freeze
9
-
10
- # This constant holds the codepoints of the character selection
11
- SampleInts = [0x61, 0, 0x0107].freeze
12
-
13
- # This constant holds an arbitrary selection of two characters (digrams)
14
- # escape sequences
15
- SampleDigrams = %w[\n \e \0 \6 \k].freeze
16
-
17
- # This constant holds an arbitrary selection of escaped octal
18
- # or hexadecimal literals
19
- SampleNumEscs = %w[\0 \07 \x07 \xa \x0F \u03a3 \u{a}].freeze
20
-
21
- before(:all) do
22
- # Ensure that the set of codepoints is mapping the set of chars...
23
- expect(SampleChars.map(&:ord)).to eq(SampleInts)
24
- end
25
-
26
- context 'Creation & initialization' do
27
- it 'should be created with a with an integer value (codepoint) or...' do
28
- SampleInts.each do |aCodepoint|
29
- expect { Character.new(aCodepoint) }.not_to raise_error
30
- end
31
- end
32
-
33
- it '...could be created with a single character String or...' do
34
- SampleChars.each do |aChar|
35
- expect { Character.new(aChar) }.not_to raise_error
36
- end
37
- end
38
-
39
- it '...could be created with an escape sequence' do
40
- # Case 1: escape sequence is a digram
41
- SampleDigrams.each do |anEscapeSeq|
42
- expect { Character.new(anEscapeSeq) }.not_to raise_error
43
- end
44
-
45
- # Case 2: escape sequence is an escaped octal or hexadecimal literal
46
- SampleNumEscs.each do |anEscapeSeq|
47
- expect { Character.new(anEscapeSeq) }.not_to raise_error
48
- end
49
- end
50
- end # context
51
-
52
- context 'Provided services' do
53
- it 'Should know its lexeme if created from a string' do
54
- # Lexeme is defined when the character was initialised from a text
55
- SampleChars.each do |aChar|
56
- ch = Character.new(aChar)
57
- expect(ch.lexeme).to eq(aChar)
58
- end
59
- end
60
-
61
- it 'Should not know its lexeme representation from a codepoint' do
62
- SampleInts.each do |aChar|
63
- ch = Character.new(aChar)
64
- expect(ch.lexeme).to be_nil
65
- end
66
- end
67
-
68
- it 'should know its String representation' do
69
- # Try for one character
70
- newOne = Character.new(?\u03a3)
71
- expect(newOne.char).to eq('Σ')
72
- expect(newOne.to_str).to eq("\u03A3")
73
-
74
- # Try with our chars sample
75
- SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
76
-
77
- # Try with our codepoint sample
78
- mapped_chars = SampleInts.map do |aCodepoint|
79
- Character.new(aCodepoint).char
80
- end
81
- expect(mapped_chars).to eq(SampleChars)
82
-
83
- # Try with our escape sequence samples
84
- (SampleDigrams + SampleNumEscs).each do |anEscSeq|
85
- expectation = String.class_eval(%Q|"#{anEscSeq}"|, __FILE__, __LINE__)
86
- Character.new(anEscSeq).to_str == expectation
87
- end
88
- end
89
-
90
- it 'should know its codepoint' do
91
- # Try for one character
92
- newOne = Character.new(?\u03a3)
93
- expect(newOne.codepoint).to eq(0x03a3)
94
-
95
- # Try with our chars sample
96
- allCodepoints = SampleChars.map do |aChar|
97
- Character.new(aChar).codepoint
98
- end
99
- expect(allCodepoints).to eq(SampleInts)
100
-
101
- # Try with our codepoint sample
102
- mapped_chars = SampleInts.each do |aCodepoint|
103
- expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint)
104
- end
105
-
106
- # Try with our escape sequence samples
107
- (SampleDigrams + SampleNumEscs).each do |anEscSeq|
108
- expectation = String.class_eval(%Q|"#{anEscSeq}".ord()|, __FILE__, __LINE__)
109
- expect(Character.new(anEscSeq).codepoint).to eq(expectation)
110
- end
111
- end
112
-
113
- it 'should known whether it is equal to another Object' do
114
- newOne = Character.new(?\u03a3)
115
-
116
- # Case 1: test equality with itself
117
- expect(newOne).to eq(newOne)
118
-
119
- # Case 2: test equality with another Character
120
- expect(newOne).to eq(Character.new(?\u03a3))
121
- expect(newOne).not_to eq(Character.new(?\u0333))
122
-
123
- # Case 3: test equality with an integer value
124
- # (equality based on codepoint value)
125
- expect(newOne).to eq(0x03a3)
126
- expect(newOne).not_to eq(0x0333)
127
-
128
- # Case 4: test equality with a single-character String
129
- expect(newOne).to eq(?\u03a3)
130
- expect(newOne).not_to eq(?\u0333)
131
-
132
- # Case 5: test fails with multiple character strings
133
- expect(newOne).not_to eq('03a3')
134
-
135
- # Case 6: equality testing with arbitray object
136
- expect(newOne).not_to eq(nil)
137
- expect(newOne).not_to eq(Object.new)
138
-
139
- # In case 6, equality is based on to_s method.
140
- simulator = double('fake')
141
- expect(simulator).to receive(:to_s).and_return(?\u03a3)
142
- expect(newOne).to eq(simulator)
143
-
144
- # Create a module that re-defines the existing to_s method
145
- module Tweak_to_s
146
- def to_s() # Overwrite the existing to_s method
147
- return ?\u03a3
148
- end
149
- end # module
150
- weird = Object.new
151
- weird.extend(Tweak_to_s)
152
- expect(newOne).to eq(weird)
153
- end
154
-
155
- it 'should know its readable description' do
156
- ch1 = Character.new('a')
157
- expect(ch1.explain).to eq("the character 'a'")
158
-
159
- ch2 = Character.new(?\u03a3)
160
- expect(ch2.explain).to eq("the character '\u03a3'")
161
- end
162
- end # context
163
- end # describe
164
- end # module
165
-
166
- # End of file
@@ -1,79 +0,0 @@
1
- # File: Multiplicity_spec.rb
2
-
3
- require_relative '../spec_helper' # Use the RSpec test framework
4
- require_relative '../../lib/regex/multiplicity'
5
-
6
- module SRL
7
- # Reopen the module, in order to get rid of fully qualified names
8
- module Regex # This module is used as a namespace
9
- describe Multiplicity do
10
- context 'Creation & initialisation' do
11
- it 'should be created with 3 arguments' do
12
- # Valid cases: initialized with two integer values and a policy symbol
13
- %i[greedy lazy possessive].each do |aPolicy|
14
- expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
15
- end
16
-
17
- # Invalid case: initialized with invalid policy value
18
- err = StandardError
19
- msg = "Invalid repetition policy 'KO'."
20
- expect { Multiplicity.new(0, :more, 'KO') }.to raise_error(err, msg)
21
- end
22
- end
23
-
24
- context 'Provided services' do
25
- it 'should know its text representation' do
26
- policy2text = { greedy: '', lazy: '?', possessive: '+' }
27
-
28
- # Case: zero or one
29
- policy2text.each_key do |aPolicy|
30
- multi = Multiplicity.new(0, 1, aPolicy)
31
- expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
32
- end
33
-
34
- # Case: zero or more
35
- policy2text.each_key do |aPolicy|
36
- multi = Multiplicity.new(0, :more, aPolicy)
37
- expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
38
- end
39
-
40
- # Case: one or more
41
- policy2text.each_key do |aPolicy|
42
- multi = Multiplicity.new(1, :more, aPolicy)
43
- expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
44
- end
45
-
46
- # Case: exactly m times
47
- policy2text.each_key do |aPolicy|
48
- samples = [1, 2, 5, 100]
49
- samples.each do |aCount|
50
- multi = Multiplicity.new(aCount, aCount, aPolicy)
51
- expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
52
- end
53
- end
54
-
55
- # Case: m, n times
56
- policy2text.each_key do |aPolicy|
57
- samples = [1, 2, 5, 100]
58
- samples.each do |aCount|
59
- upper = aCount + 1 + rand(20)
60
- multi = Multiplicity.new(aCount, upper, aPolicy)
61
- expectation = "{#{aCount},#{upper}}#{policy2text[aPolicy]}"
62
- expect(multi.to_str).to eq(expectation)
63
- end
64
- end
65
-
66
- # Case: m or more
67
- policy2text.each_key do |aPolicy|
68
- samples = [2, 3, 5, 100]
69
- samples.each do |aCount|
70
- multi = Multiplicity.new(aCount, :more, aPolicy)
71
- expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
72
- end
73
- end
74
- end
75
- end
76
- end
77
- end # module
78
- end # module
79
- # End of file
@@ -1,25 +0,0 @@
1
- # File: spec_helper.rb
2
- # Purpose: utility file that is loaded by all our RSpec files
3
-
4
- require 'simplecov'
5
-
6
- SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new(
7
- [
8
- SimpleCov::Formatter::HTMLFormatter
9
- ]
10
- )
11
-
12
- require 'pp' # Use pretty-print for debugging purposes
13
- require 'rspec' # Use the RSpec framework
14
-
15
- RSpec.configure do |config|
16
- config.expect_with :rspec do |c|
17
- # Disable the `should` syntax...
18
- c.syntax = :expect
19
- end
20
-
21
- # Display stack trace in case of failure
22
- config.full_backtrace = true
23
- end
24
-
25
- # End of file
@@ -1,148 +0,0 @@
1
- require_relative 'spec_helper' # Use the RSpec framework
2
- require_relative '../lib/tokenizer' # Load the class under test
3
-
4
-
5
- module SRL
6
- describe Tokenizer do
7
- def match_expectations(aTokenizer, theExpectations)
8
- aTokenizer.tokens.each_with_index do |token, i|
9
- terminal, lexeme = theExpectations[i]
10
- expect(token.terminal).to eq(terminal)
11
- expect(token.lexeme).to eq(lexeme)
12
- end
13
- end
14
-
15
- subject { Tokenizer.new('') }
16
-
17
- context 'Initialization:' do
18
- it 'should be initialized with a text to tokenize and a grammar' do
19
- expect { Tokenizer.new('anything') }.not_to raise_error
20
- end
21
-
22
- it 'should have its scanner initialized' do
23
- expect(subject.scanner).to be_kind_of(StringScanner)
24
- end
25
- end # context
26
-
27
- context 'Single token recognition:' do
28
- it 'should tokenize delimiters and separators' do
29
- subject.scanner.string = ','
30
- token = subject.tokens.first
31
- expect(token).to be_kind_of(Rley::Lexical::Token)
32
- expect(token.terminal).to eq('COMMA')
33
- expect(token.lexeme).to eq(',')
34
- end
35
-
36
- it 'should tokenize keywords' do
37
- sample = 'between Exactly oncE optional TWICE'
38
- subject.scanner.string = sample
39
- subject.tokens.each do |tok|
40
- expect(tok).to be_kind_of(Rley::Lexical::Token)
41
- expect(tok.terminal).to eq(tok.lexeme.upcase)
42
- end
43
- end
44
-
45
- it 'should tokenize integer values' do
46
- subject.scanner.string = ' 123 '
47
- token = subject.tokens.first
48
- expect(token).to be_kind_of(Rley::Lexical::Token)
49
- expect(token.terminal).to eq('INTEGER')
50
- expect(token.lexeme).to eq('123')
51
- end
52
-
53
- it 'should tokenize single digits' do
54
- subject.scanner.string = ' 1 '
55
- token = subject.tokens.first
56
- expect(token).to be_kind_of(Rley::Lexical::Token)
57
- expect(token.terminal).to eq('DIGIT_LIT')
58
- expect(token.lexeme).to eq('1')
59
- end
60
- end # context
61
-
62
- context 'String literal tokenization:' do
63
- it "should recognize 'literally ...'" do
64
- input = 'literally "hello"'
65
- subject.scanner.string = input
66
- expectations = [
67
- %w[LITERALLY literally],
68
- %w[STRING_LIT hello]
69
- ]
70
- match_expectations(subject, expectations)
71
- end
72
- end # context
73
-
74
- context 'Character range tokenization:' do
75
- it "should recognize 'letter from ... to ...'" do
76
- input = 'letter a to f'
77
- subject.scanner.string = input
78
- expectations = [
79
- %w[LETTER letter],
80
- %w[LETTER_LIT a],
81
- %w[TO to],
82
- %w[LETTER_LIT f]
83
- ]
84
- match_expectations(subject, expectations)
85
- end
86
- end # context
87
-
88
- context 'Quantifier tokenization:' do
89
- it "should recognize 'exactly ... times'" do
90
- input = 'exactly 4 Times'
91
- subject.scanner.string = input
92
- expectations = [
93
- %w[EXACTLY exactly],
94
- %w[DIGIT_LIT 4],
95
- %w[TIMES Times]
96
- ]
97
- match_expectations(subject, expectations)
98
- end
99
-
100
- it "should recognize 'between ... and ... times'" do
101
- input = 'Between 2 AND 4 times'
102
- subject.scanner.string = input
103
- expectations = [
104
- %w[BETWEEN Between],
105
- %w[DIGIT_LIT 2],
106
- %w[AND AND],
107
- %w[DIGIT_LIT 4],
108
- %w[TIMES times]
109
- ]
110
- match_expectations(subject, expectations)
111
- end
112
-
113
- it "should recognize 'once or more'" do
114
- input = 'Once or MORE'
115
- subject.scanner.string = input
116
- expectations = [
117
- %w[ONCE Once],
118
- %w[OR or],
119
- %w[MORE MORE]
120
- ]
121
- match_expectations(subject, expectations)
122
- end
123
-
124
- it "should recognize 'never or more'" do
125
- input = 'never or more'
126
- subject.scanner.string = input
127
- expectations = [
128
- %w[NEVER never],
129
- %w[OR or],
130
- %w[MORE more]
131
- ]
132
- match_expectations(subject, expectations)
133
- end
134
-
135
- it "should recognize 'at least ... times'" do
136
- input = 'at least 10 times'
137
- subject.scanner.string = input
138
- expectations = [
139
- %w[AT at],
140
- %w[LEAST least],
141
- %w[INTEGER 10],
142
- %w[TIMES times]
143
- ]
144
- match_expectations(subject, expectations)
145
- end
146
- end # context
147
- end # describe
148
- end # module
@@ -1,75 +0,0 @@
1
- require_relative './lib/tokenizer'
2
- require_relative './lib/grammar'
3
- require_relative './lib/ast_builder'
4
-
5
- def print_title(aTitle)
6
- puts aTitle
7
- puts '=' * aTitle.size
8
- end
9
-
10
- def print_tree(aTitle, aParseTree)
11
- # Let's create a parse tree visitor
12
- visitor = Rley::ParseTreeVisitor.new(aParseTree)
13
-
14
- # Now output formatted parse tree
15
- print_title(aTitle)
16
- renderer = Rley::Formatter::Asciitree.new($stdout)
17
- renderer.render(visitor)
18
- puts ''
19
- end
20
-
21
- # Parse the input expression in command-line
22
- if ARGV.empty?
23
- my_name = File.basename(__FILE__)
24
- msg = <<-END_MSG
25
- WORK IN PROGRESS
26
- Simple Regex Language parser:
27
- - Parses a very limited subset of the language and displays the parse tree
28
-
29
- Command-line syntax:
30
- ruby #{my_name} "SRL expression"
31
- where:
32
- the SRL expression is enclosed between double quotes (")
33
-
34
- Examples:
35
- ruby #{my_name} "letter from a to f exactly 4 times"
36
- ruby #{my_name} "uppercase letter between 2 and 3 times"
37
- ruby #{my_name} "digit from 0 to 7 once or more"
38
- END_MSG
39
- puts msg
40
- exit(1)
41
- end
42
- puts ARGV[0]
43
-
44
- # Create a Rley facade object
45
- engine = Rley::Engine.new
46
-
47
- ########################################
48
- # Step 1. Load a grammar for calculator
49
- engine.use_grammar(SRL::Grammar)
50
-
51
- lexer = SRL::Tokenizer.new(ARGV[0])
52
- result = engine.parse(lexer.tokens)
53
-
54
- unless result.success?
55
- # Stop if the parse failed...
56
- puts "Parsing of '#{ARGV[0]}' failed"
57
- puts "Reason: #{result.failure_reason.message}"
58
- exit(1)
59
- end
60
-
61
-
62
- # Generate a concrete syntax parse tree from the parse result
63
- cst_ptree = engine.convert(result)
64
- print_tree('Concrete Syntax Tree (CST)', cst_ptree)
65
-
66
- # Generate an abstract syntax tree (AST) from the parse result
67
- engine.configuration.repr_builder = ASTBuilder
68
- ast_ptree = engine.convert(result)
69
-
70
- # Now output the regexp literal
71
- root = ast_ptree.root
72
- print_title('SRL to Regexp representation:')
73
- puts "#{ARGV[0]} => #{root.to_str}" # Output the expression result
74
-
75
- # End of file