rley 0.5.07 → 0.5.08

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
  4. data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
  5. data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
  6. data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
  7. data/examples/general/SRL/lib/ast_builder.rb +176 -0
  8. data/examples/general/SRL/lib/ast_building.rb +20 -0
  9. data/examples/general/SRL/lib/grammar.rb +32 -0
  10. data/examples/general/SRL/lib/parser.rb +26 -0
  11. data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
  12. data/examples/general/SRL/lib/regex_repr.rb +1 -0
  13. data/examples/general/SRL/lib/srl_demo.rb +67 -0
  14. data/examples/general/SRL/lib/tokenizer.rb +101 -0
  15. data/examples/general/SRL/spec/integration_spec.rb +103 -0
  16. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
  17. data/examples/general/SRL/spec/spec_helper.rb +25 -0
  18. data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
  19. data/examples/general/SRL/srl_demo.rb +57 -0
  20. data/examples/general/calc_iter1/calc_demo.rb +1 -1
  21. data/examples/general/calc_iter2/ast_building.rb +20 -0
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
  23. data/examples/general/calc_iter2/calc_demo.rb +1 -1
  24. data/lib/rley/base/base_parser.rb +1 -1
  25. data/lib/rley/base/grm_items_builder.rb +1 -1
  26. data/lib/rley/constants.rb +1 -1
  27. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  28. data/lib/rley/parser/gfg_chart.rb +8 -3
  29. data/lib/rley/parser/gfg_earley_parser.rb +5 -2
  30. data/lib/rley/parser/gfg_parsing.rb +5 -1
  31. data/lib/rley/parser/parse_tree_builder.rb +16 -5
  32. data/lib/rley/ptree/terminal_node.rb +3 -2
  33. data/spec/rley/parser/ast_builder_spec.rb +2 -2
  34. data/spec/rley/parser/cst_builder_spec.rb +2 -3
  35. metadata +20 -4
@@ -0,0 +1,26 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require_relative 'tokenizer'
4
+ require_relative 'grammar'
5
+ module SRL
6
+ # A parser for a subset of Simple Regex Language
7
+ class Parser < Rley::Parser::GFGEarleyParser
8
+ attr_reader(:source_file)
9
+
10
+ # Constructor
11
+ def initialize()
12
+ # Builder the Earley parser with the calculator grammar
13
+ super(Grammar)
14
+ end
15
+
16
+ def parse_SRL(aText)
17
+ lexer = Tokenizer.new(aText, grammar)
18
+ tokens = lexer.tokens
19
+ result = parse(tokens)
20
+
21
+ return result
22
+ end
23
+ end # class
24
+ end # module
25
+
26
+ # End of file
@@ -0,0 +1,94 @@
1
+ # File: Multiplicity.rb
2
+
3
+ module SRL
4
+ module Regex # This module is used as a namespace
5
+ # The multiplicity specifies by how much a given expression can be repeated.
6
+ class Multiplicity
7
+ # The lowest acceptable repetition count
8
+ attr_reader(:lower_bound)
9
+
10
+ # The highest possible repetition count
11
+ attr_reader(:upper_bound)
12
+
13
+ # An indicator that specifies how to repeat (:greedy, :lazy, :possessive)
14
+ attr_reader(:policy)
15
+
16
+ # @param aLowerBound [Integer]
17
+ # @param anUpperBound [Integer, Symbol] integer or :more symbol
18
+ # @param aPolicy [Symbol] One of: (:greedy, :lazy, :possessive)
19
+ def initialize(aLowerBound, anUpperBound, aPolicy)
20
+ @lower_bound = valid_lower_bound(aLowerBound)
21
+ @upper_bound = valid_upper_bound(anUpperBound)
22
+ @policy = valid_policy(aPolicy)
23
+ end
24
+
25
+ public
26
+ # Purpose: Return the String representation of the multiplicity.
27
+ def to_str()
28
+ case upper_bound
29
+ when :more
30
+ case lower_bound
31
+ when 0
32
+ subresult = '*'
33
+ when 1
34
+ subresult = '+'
35
+ else
36
+ subresult = "{#{lower_bound},}"
37
+ end
38
+
39
+ when lower_bound
40
+ subresult = "{#{lower_bound}}"
41
+ else
42
+ if [lower_bound, upper_bound] == [0, 1]
43
+ subresult = '?'
44
+ else
45
+ subresult = "{#{lower_bound},#{upper_bound}}"
46
+ end
47
+ end
48
+
49
+ suffix = case policy
50
+ when :greedy
51
+ ''
52
+ when :lazy
53
+ '?'
54
+ when :possessive
55
+ '+'
56
+ end
57
+
58
+ return subresult + suffix
59
+ end
60
+
61
+
62
+ private
63
+ # Validation method. Return the validated lower bound value
64
+ def valid_lower_bound(aLowerBound)
65
+ err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
66
+ raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
67
+ return aLowerBound
68
+ end
69
+
70
+ # Validation method. Return the validated lower bound value
71
+ def valid_upper_bound(anUpperBound)
72
+ err_msg = "Invalid upper bound of repetition count #{anUpperBound}"
73
+ unless anUpperBound.kind_of?(Integer) || (anUpperBound == :more)
74
+ raise StandardError, err_msg
75
+ end
76
+
77
+ return anUpperBound
78
+ end
79
+
80
+ # Validation method. Return the validated policy value.
81
+ def valid_policy(aPolicy)
82
+ err_msg = "Invalid repetition policy '#{aPolicy}'."
83
+ valid_policies = [:greedy, :lazy, :possessive]
84
+ raise StandardError, err_msg unless valid_policies.include? aPolicy
85
+
86
+ return aPolicy
87
+ end
88
+
89
+ end # class
90
+
91
+ end # module
92
+ end # module
93
+
94
+ # End of file
@@ -0,0 +1 @@
1
+ require_relative './regex/multiplicity'
@@ -0,0 +1,67 @@
1
+ require_relative 'parser'
2
+ require_relative 'ast_builder'
3
+
4
+ def print_title(aTitle)
5
+ puts aTitle
6
+ puts '=' * aTitle.size
7
+ end
8
+
9
+ def print_tree(aTitle, aParseTree)
10
+ # Let's create a parse tree visitor
11
+ visitor = Rley::ParseTreeVisitor.new(aParseTree)
12
+
13
+ # Now output formatted parse tree
14
+ print_title(aTitle)
15
+ renderer = Rley::Formatter::Asciitree.new($stdout)
16
+ renderer.render(visitor)
17
+ puts ''
18
+ end
19
+
20
+ # Create a calculator parser object
21
+ parser = SRL::Parser.new
22
+
23
+ # Parse the input expression in command-line
24
+ if ARGV.empty?
25
+ my_name = File.basename(__FILE__)
26
+ msg = <<-END_MSG
27
+ Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
28
+ Ultimately it will support SRL in full, currently it parses only the
29
+ SRL quantifiers.
30
+ The utility prints the resulting regular expression.
31
+
32
+ Command-line syntax:
33
+ ruby #{my_name} filename
34
+ where:
35
+ the file name is a SRL source file.
36
+
37
+ Examples:
38
+ ruby #{my_name} sample01.srl
39
+ END_MSG
40
+ puts msg
41
+ exit(1)
42
+ end
43
+ puts ARGV[0]
44
+ result = parser.parse_expression(ARGV[0])
45
+
46
+ unless result.success?
47
+ # Stop if the parse failed...
48
+ puts "Parsing of '#{ARGV[0]}' failed"
49
+ puts "Reason: #{result.failure_reason.message}"
50
+ exit(1)
51
+ end
52
+
53
+
54
+ # Generate a concrete syntax parse tree from the parse result
55
+ cst_ptree = result.parse_tree
56
+ print_tree('Concrete Syntax Tree (CST)', cst_ptree)
57
+
58
+ # Generate an abstract syntax parse tree from the parse result
59
+ tree_builder = ASTBuilder
60
+ ast_ptree = result.parse_tree(tree_builder)
61
+ # print_tree('Abstract Syntax Tree (AST)', ast_ptree)
62
+
63
+ # # Now perform the computation of math expression
64
+ # root = ast_ptree.root
65
+ # print_title('Result:')
66
+ # puts root.interpret.to_s # Output the expression result
67
+ # End of file
@@ -0,0 +1,101 @@
1
+ # File: srl_tokenizer.rb
2
+ # Tokenizer for SRL (Simple Regex Language)
3
+ require 'strscan'
4
+ require 'rley' # Load the gem
5
+
6
+ module SRL
7
+ # The tokenizer should recognize:
8
+ # Keywords: as, capture, letter
9
+ # Integer literals including single digit
10
+ # String literals (quote delimited)
11
+ # Single character literal
12
+ # Delimiters: parentheses '(' and ')'
13
+ # Separators: comma (optional)
14
+ class Tokenizer
15
+ attr_reader(:scanner)
16
+ attr_reader(:lineno)
17
+ attr_reader(:line_start)
18
+ attr_reader(:name2symbol)
19
+
20
+ @@lexeme2name = {
21
+ '(' => 'LPAREN',
22
+ ')' => 'RPAREN',
23
+ ',' => 'COMMA'
24
+ }.freeze
25
+
26
+ # Here are all the SRL keywords (in uppercase)
27
+ @@keywords = %w[
28
+ AND
29
+ AT
30
+ BETWEEN
31
+ EXACTLY
32
+ LEAST
33
+ MORE
34
+ NEVER
35
+ ONCE
36
+ OPTIONAL
37
+ OR
38
+ TIMES
39
+ TWICE
40
+ ].map { |x| [x, x] } .to_h
41
+
42
+ class ScanError < StandardError; end
43
+
44
+ def initialize(source, aGrammar)
45
+ @scanner = StringScanner.new(source)
46
+ @name2symbol = aGrammar.name2symbol
47
+ @lineno = 1
48
+ end
49
+
50
+ def tokens()
51
+ tok_sequence = []
52
+ until @scanner.eos?
53
+ token = _next_token
54
+ tok_sequence << token unless token.nil?
55
+ end
56
+
57
+ return tok_sequence
58
+ end
59
+
60
+ private
61
+
62
+ def _next_token()
63
+ skip_whitespaces
64
+ curr_ch = scanner.peek(1)
65
+ return nil if curr_ch.nil?
66
+
67
+ token = nil
68
+
69
+ if '(),'.include? curr_ch
70
+ # Single character token
71
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
72
+ elsif (lexeme = scanner.scan(/[0-9]{2,}/))
73
+ token = build_token('INTEGER', lexeme) # An integer has two or more digits
74
+ elsif (lexeme = scanner.scan(/[0-9]/))
75
+ token = build_token('DIGIT', lexeme)
76
+ elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
77
+ token = build_token(@@keywords[lexeme.upcase], lexeme)
78
+ # TODO: handle case unknown identifier
79
+ elsif (lexeme = scanner.scan(/\w/))
80
+ puts 'Buff'
81
+ token = build_token('CHAR', lexeme)
82
+ else # Unknown token
83
+ erroneous = curr_ch.nil? ? '' : curr_ch
84
+ sequel = scanner.scan(/.{1,20}/)
85
+ erroneous += sequel unless sequel.nil?
86
+ raise ScanError.new("Unknown token #{erroneous}")
87
+ end
88
+
89
+ return token
90
+ end
91
+
92
+ def build_token(aSymbolName, aLexeme)
93
+ token_type = name2symbol[aSymbolName]
94
+ return Rley::Lexical::Token.new(aLexeme, token_type)
95
+ end
96
+
97
+ def skip_whitespaces()
98
+ scanner.scan(/[ \t\f\n\r]+/)
99
+ end
100
+ end # class
101
+ end # module
@@ -0,0 +1,103 @@
1
+ require_relative 'spec_helper' # Use the RSpec framework
2
+ require_relative '../lib/parser'
3
+ require_relative '../lib/ast_builder'
4
+
5
+ describe 'Integration tests:' do
6
+ def parse(someSRL)
7
+ parser = SRL::Parser.new
8
+ result = parser.parse_SRL(someSRL)
9
+ end
10
+
11
+ def regexp_repr(aResult)
12
+ # Generate an abstract syntax parse tree from the parse result
13
+ regexp_expr_builder = ASTBuilder
14
+ tree = aResult.parse_tree(regexp_expr_builder)
15
+ regexp = tree.root
16
+ end
17
+
18
+ context 'Parsing quantifiers:' do
19
+ it "should parse 'once' syntax" do
20
+ result = parse('once')
21
+ expect(result).to be_success
22
+
23
+ regexp = regexp_repr(result)
24
+ expect(regexp.to_str).to eq('{1}')
25
+ end
26
+
27
+ it "should parse 'twice' syntax" do
28
+ result = parse('twice')
29
+ expect(result).to be_success
30
+
31
+ regexp = regexp_repr(result)
32
+ expect(regexp.to_str).to eq('{2}')
33
+ end
34
+
35
+ it "should parse 'optional' syntax" do
36
+ result = parse('optional')
37
+ expect(result).to be_success
38
+
39
+ regexp = regexp_repr(result)
40
+ expect(regexp.to_str).to eq('?')
41
+ end
42
+
43
+ it "should parse 'exactly ... times' syntax" do
44
+ result = parse('exactly 4 times')
45
+ expect(result).to be_success
46
+
47
+ regexp = regexp_repr(result)
48
+ expect(regexp.to_str).to eq('{4}')
49
+ end
50
+
51
+ it "should parse 'between ... and ... times' syntax" do
52
+ result = parse('between 2 and 4 times')
53
+ expect(result).to be_success
54
+
55
+ # Dropping 'times' keyword is shorter syntax
56
+ expect(parse('between 2 and 4')).to be_success
57
+
58
+ regexp = regexp_repr(result)
59
+ expect(regexp.to_str).to eq('{2, 4}')
60
+ end
61
+
62
+ it "should parse 'once or more' syntax" do
63
+ result = parse('once or more')
64
+ expect(result).to be_success
65
+ end
66
+
67
+ it "should parse 'never or more' syntax" do
68
+ result = parse('never or more')
69
+ expect(result).to be_success
70
+ end
71
+
72
+ it "should parse 'at least ... times' syntax" do
73
+ result = parse('at least 10 times')
74
+ expect(result).to be_success
75
+
76
+ regexp = regexp_repr(result)
77
+ expect(regexp.to_str).to eq('{10,}')
78
+ end
79
+
80
+ end # context
81
+
82
+ end # describe
83
+
84
+
85
+ =begin
86
+
87
+ unless result.success?
88
+ # Stop if the parse failed...
89
+ puts "Parsing of '#{ARGV[0]}' failed"
90
+ puts "Reason: #{result.failure_reason.message}"
91
+ exit(1)
92
+ end
93
+
94
+
95
+ # Generate a concrete syntax parse tree from the parse result
96
+ cst_ptree = result.parse_tree
97
+ print_tree('Concrete Syntax Tree (CST)', cst_ptree)
98
+
99
+ # Generate an abstract syntax parse tree from the parse result
100
+ tree_builder = ASTBuilder
101
+ ast_ptree = result.parse_tree(tree_builder)
102
+ =end
103
+
@@ -0,0 +1,83 @@
1
+ # File: Multiplicity_spec.rb
2
+
3
+ require 'pp'
4
+ require_relative '../spec_helper' # Use the RSpec test framework
5
+ require_relative '../../lib/regex/multiplicity'
6
+
7
+ module SRL
8
+ # Reopen the module, in order to get rid of fully qualified names
9
+ module Regex # This module is used as a namespace
10
+
11
+ describe Multiplicity do
12
+
13
+ context "Creation & initialisation" do
14
+ it "should be created with 3 arguments" do
15
+ # Valid cases: initialized with two integer values and a policy symbol
16
+ [:greedy, :lazy, :possessive].each do |aPolicy|
17
+ expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
18
+ end
19
+
20
+ # Invalid case: initialized with invalid policy value
21
+ err = "Invalid repetition policy 'wrong'."
22
+ expect { Multiplicity.new(0, :more, 'wrong') }.to raise_error(StandardError, err)
23
+ end
24
+
25
+ end
26
+
27
+ context "Provided services" do
28
+ it 'should know its text representation' do
29
+ policy2text = { :greedy => '' , :lazy => '?', :possessive => '+' }
30
+
31
+ # Case: zero or one
32
+ policy2text.keys.each do |aPolicy|
33
+ multi = Multiplicity.new(0, 1, aPolicy)
34
+ expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
35
+ end
36
+
37
+ # Case: zero or more
38
+ policy2text.keys.each do |aPolicy|
39
+ multi = Multiplicity.new(0, :more, aPolicy)
40
+ expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
41
+ end
42
+
43
+ # Case: one or more
44
+ policy2text.keys.each do |aPolicy|
45
+ multi = Multiplicity.new(1, :more, aPolicy)
46
+ expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
47
+ end
48
+
49
+ # Case: exactly m times
50
+ policy2text.keys.each do |aPolicy|
51
+ samples = [1, 2, 5, 100]
52
+ samples.each do |aCount|
53
+ multi = Multiplicity.new(aCount, aCount, aPolicy)
54
+ expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
55
+ end
56
+ end
57
+
58
+ # Case: m, n times
59
+ policy2text.keys.each do |aPolicy|
60
+ samples = [1, 2, 5, 100]
61
+ samples.each do |aCount|
62
+ upper = aCount + 1 + rand(20)
63
+ multi = Multiplicity.new(aCount, upper, aPolicy)
64
+ expect(multi.to_str).to eq("{#{aCount},#{upper}}#{policy2text[aPolicy]}")
65
+ end
66
+ end
67
+
68
+ # Case: m or more
69
+ policy2text.keys.each do |aPolicy|
70
+ samples = [2, 3, 5, 100]
71
+ samples.each do |aCount|
72
+ multi = Multiplicity.new(aCount, :more, aPolicy)
73
+ expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ end # module
82
+ end # module
83
+ # End of file