rley 0.5.07 → 0.5.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
  4. data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
  5. data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
  6. data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
  7. data/examples/general/SRL/lib/ast_builder.rb +176 -0
  8. data/examples/general/SRL/lib/ast_building.rb +20 -0
  9. data/examples/general/SRL/lib/grammar.rb +32 -0
  10. data/examples/general/SRL/lib/parser.rb +26 -0
  11. data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
  12. data/examples/general/SRL/lib/regex_repr.rb +1 -0
  13. data/examples/general/SRL/lib/srl_demo.rb +67 -0
  14. data/examples/general/SRL/lib/tokenizer.rb +101 -0
  15. data/examples/general/SRL/spec/integration_spec.rb +103 -0
  16. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
  17. data/examples/general/SRL/spec/spec_helper.rb +25 -0
  18. data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
  19. data/examples/general/SRL/srl_demo.rb +57 -0
  20. data/examples/general/calc_iter1/calc_demo.rb +1 -1
  21. data/examples/general/calc_iter2/ast_building.rb +20 -0
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
  23. data/examples/general/calc_iter2/calc_demo.rb +1 -1
  24. data/lib/rley/base/base_parser.rb +1 -1
  25. data/lib/rley/base/grm_items_builder.rb +1 -1
  26. data/lib/rley/constants.rb +1 -1
  27. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  28. data/lib/rley/parser/gfg_chart.rb +8 -3
  29. data/lib/rley/parser/gfg_earley_parser.rb +5 -2
  30. data/lib/rley/parser/gfg_parsing.rb +5 -1
  31. data/lib/rley/parser/parse_tree_builder.rb +16 -5
  32. data/lib/rley/ptree/terminal_node.rb +3 -2
  33. data/spec/rley/parser/ast_builder_spec.rb +2 -2
  34. data/spec/rley/parser/cst_builder_spec.rb +2 -3
  35. metadata +20 -4
@@ -0,0 +1,26 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require_relative 'tokenizer'
4
+ require_relative 'grammar'
5
+ module SRL
6
+ # A parser for a subset of Simple Regex Language
7
+ class Parser < Rley::Parser::GFGEarleyParser
8
+ attr_reader(:source_file)
9
+
10
+ # Constructor
11
+ def initialize()
12
+ # Builder the Earley parser with the calculator grammar
13
+ super(Grammar)
14
+ end
15
+
16
+ def parse_SRL(aText)
17
+ lexer = Tokenizer.new(aText, grammar)
18
+ tokens = lexer.tokens
19
+ result = parse(tokens)
20
+
21
+ return result
22
+ end
23
+ end # class
24
+ end # module
25
+
26
+ # End of file
@@ -0,0 +1,94 @@
1
+ # File: Multiplicity.rb
2
+
3
+ module SRL
4
+ module Regex # This module is used as a namespace
5
+ # The multiplicity specifies by how much a given expression can be repeated.
6
+ class Multiplicity
7
+ # The lowest acceptable repetition count
8
+ attr_reader(:lower_bound)
9
+
10
+ # The highest possible repetition count
11
+ attr_reader(:upper_bound)
12
+
13
+ # An indicator that specifies how to repeat (:greedy, :lazy, :possessive)
14
+ attr_reader(:policy)
15
+
16
+ # @param aLowerBound [Integer]
17
+ # @param anUpperBound [Integer, Symbol] integer or :more symbol
18
+ # @param aPolicy [Symbol] One of: (:greedy, :lazy, :possessive)
19
+ def initialize(aLowerBound, anUpperBound, aPolicy)
20
+ @lower_bound = valid_lower_bound(aLowerBound)
21
+ @upper_bound = valid_upper_bound(anUpperBound)
22
+ @policy = valid_policy(aPolicy)
23
+ end
24
+
25
+ public
26
+ # Purpose: Return the String representation of the multiplicity.
27
+ def to_str()
28
+ case upper_bound
29
+ when :more
30
+ case lower_bound
31
+ when 0
32
+ subresult = '*'
33
+ when 1
34
+ subresult = '+'
35
+ else
36
+ subresult = "{#{lower_bound},}"
37
+ end
38
+
39
+ when lower_bound
40
+ subresult = "{#{lower_bound}}"
41
+ else
42
+ if [lower_bound, upper_bound] == [0, 1]
43
+ subresult = '?'
44
+ else
45
+ subresult = "{#{lower_bound},#{upper_bound}}"
46
+ end
47
+ end
48
+
49
+ suffix = case policy
50
+ when :greedy
51
+ ''
52
+ when :lazy
53
+ '?'
54
+ when :possessive
55
+ '+'
56
+ end
57
+
58
+ return subresult + suffix
59
+ end
60
+
61
+
62
+ private
63
+ # Validation method. Return the validated lower bound value
64
+ def valid_lower_bound(aLowerBound)
65
+ err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
66
+ raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
67
+ return aLowerBound
68
+ end
69
+
70
+ # Validation method. Return the validated lower bound value
71
+ def valid_upper_bound(anUpperBound)
72
+ err_msg = "Invalid upper bound of repetition count #{anUpperBound}"
73
+ unless anUpperBound.kind_of?(Integer) || (anUpperBound == :more)
74
+ raise StandardError, err_msg
75
+ end
76
+
77
+ return anUpperBound
78
+ end
79
+
80
+ # Validation method. Return the validated policy value.
81
+ def valid_policy(aPolicy)
82
+ err_msg = "Invalid repetition policy '#{aPolicy}'."
83
+ valid_policies = [:greedy, :lazy, :possessive]
84
+ raise StandardError, err_msg unless valid_policies.include? aPolicy
85
+
86
+ return aPolicy
87
+ end
88
+
89
+ end # class
90
+
91
+ end # module
92
+ end # module
93
+
94
+ # End of file
@@ -0,0 +1 @@
1
+ require_relative './regex/multiplicity'
@@ -0,0 +1,67 @@
1
+ require_relative 'parser'
2
+ require_relative 'ast_builder'
3
+
4
+ def print_title(aTitle)
5
+ puts aTitle
6
+ puts '=' * aTitle.size
7
+ end
8
+
9
+ def print_tree(aTitle, aParseTree)
10
+ # Let's create a parse tree visitor
11
+ visitor = Rley::ParseTreeVisitor.new(aParseTree)
12
+
13
+ # Now output formatted parse tree
14
+ print_title(aTitle)
15
+ renderer = Rley::Formatter::Asciitree.new($stdout)
16
+ renderer.render(visitor)
17
+ puts ''
18
+ end
19
+
20
+ # Create a calculator parser object
21
+ parser = SRL::Parser.new
22
+
23
+ # Parse the input expression in command-line
24
+ if ARGV.empty?
25
+ my_name = File.basename(__FILE__)
26
+ msg = <<-END_MSG
27
+ Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
28
+ Ultimately it will support SRL in full, currently it parses only the
29
+ SRL quantifiers.
30
+ The utility prints the resulting regular expression.
31
+
32
+ Command-line syntax:
33
+ ruby #{my_name} filename
34
+ where:
35
+ the file name is a SRL source file.
36
+
37
+ Examples:
38
+ ruby #{my_name} sample01.srl
39
+ END_MSG
40
+ puts msg
41
+ exit(1)
42
+ end
43
+ puts ARGV[0]
44
+ result = parser.parse_expression(ARGV[0])
45
+
46
+ unless result.success?
47
+ # Stop if the parse failed...
48
+ puts "Parsing of '#{ARGV[0]}' failed"
49
+ puts "Reason: #{result.failure_reason.message}"
50
+ exit(1)
51
+ end
52
+
53
+
54
+ # Generate a concrete syntax parse tree from the parse result
55
+ cst_ptree = result.parse_tree
56
+ print_tree('Concrete Syntax Tree (CST)', cst_ptree)
57
+
58
+ # Generate an abstract syntax parse tree from the parse result
59
+ tree_builder = ASTBuilder
60
+ ast_ptree = result.parse_tree(tree_builder)
61
+ # print_tree('Abstract Syntax Tree (AST)', ast_ptree)
62
+
63
+ # # Now perform the computation of math expression
64
+ # root = ast_ptree.root
65
+ # print_title('Result:')
66
+ # puts root.interpret.to_s # Output the expression result
67
+ # End of file
@@ -0,0 +1,101 @@
1
+ # File: srl_tokenizer.rb
2
+ # Tokenizer for SRL (Simple Regex Language)
3
+ require 'strscan'
4
+ require 'rley' # Load the gem
5
+
6
+ module SRL
7
+ # The tokenizer should recognize:
8
+ # Keywords: as, capture, letter
9
+ # Integer literals including single digit
10
+ # String literals (quote delimited)
11
+ # Single character literal
12
+ # Delimiters: parentheses '(' and ')'
13
+ # Separators: comma (optional)
14
+ class Tokenizer
15
+ attr_reader(:scanner)
16
+ attr_reader(:lineno)
17
+ attr_reader(:line_start)
18
+ attr_reader(:name2symbol)
19
+
20
+ @@lexeme2name = {
21
+ '(' => 'LPAREN',
22
+ ')' => 'RPAREN',
23
+ ',' => 'COMMA'
24
+ }.freeze
25
+
26
+ # Here are all the SRL keywords (in uppercase)
27
+ @@keywords = %w[
28
+ AND
29
+ AT
30
+ BETWEEN
31
+ EXACTLY
32
+ LEAST
33
+ MORE
34
+ NEVER
35
+ ONCE
36
+ OPTIONAL
37
+ OR
38
+ TIMES
39
+ TWICE
40
+ ].map { |x| [x, x] } .to_h
41
+
42
+ class ScanError < StandardError; end
43
+
44
+ def initialize(source, aGrammar)
45
+ @scanner = StringScanner.new(source)
46
+ @name2symbol = aGrammar.name2symbol
47
+ @lineno = 1
48
+ end
49
+
50
+ def tokens()
51
+ tok_sequence = []
52
+ until @scanner.eos?
53
+ token = _next_token
54
+ tok_sequence << token unless token.nil?
55
+ end
56
+
57
+ return tok_sequence
58
+ end
59
+
60
+ private
61
+
62
+ def _next_token()
63
+ skip_whitespaces
64
+ curr_ch = scanner.peek(1)
65
+ return nil if curr_ch.nil?
66
+
67
+ token = nil
68
+
69
+ if '(),'.include? curr_ch
70
+ # Single character token
71
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
72
+ elsif (lexeme = scanner.scan(/[0-9]{2,}/))
73
+ token = build_token('INTEGER', lexeme) # An integer has two or more digits
74
+ elsif (lexeme = scanner.scan(/[0-9]/))
75
+ token = build_token('DIGIT', lexeme)
76
+ elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
77
+ token = build_token(@@keywords[lexeme.upcase], lexeme)
78
+ # TODO: handle case unknown identifier
79
+ elsif (lexeme = scanner.scan(/\w/))
80
+ puts 'Buff'
81
+ token = build_token('CHAR', lexeme)
82
+ else # Unknown token
83
+ erroneous = curr_ch.nil? ? '' : curr_ch
84
+ sequel = scanner.scan(/.{1,20}/)
85
+ erroneous += sequel unless sequel.nil?
86
+ raise ScanError.new("Unknown token #{erroneous}")
87
+ end
88
+
89
+ return token
90
+ end
91
+
92
+ def build_token(aSymbolName, aLexeme)
93
+ token_type = name2symbol[aSymbolName]
94
+ return Rley::Lexical::Token.new(aLexeme, token_type)
95
+ end
96
+
97
+ def skip_whitespaces()
98
+ scanner.scan(/[ \t\f\n\r]+/)
99
+ end
100
+ end # class
101
+ end # module
@@ -0,0 +1,103 @@
1
+ require_relative 'spec_helper' # Use the RSpec framework
2
+ require_relative '../lib/parser'
3
+ require_relative '../lib/ast_builder'
4
+
5
+ describe 'Integration tests:' do
6
+ def parse(someSRL)
7
+ parser = SRL::Parser.new
8
+ result = parser.parse_SRL(someSRL)
9
+ end
10
+
11
+ def regexp_repr(aResult)
12
+ # Generate an abstract syntax parse tree from the parse result
13
+ regexp_expr_builder = ASTBuilder
14
+ tree = aResult.parse_tree(regexp_expr_builder)
15
+ regexp = tree.root
16
+ end
17
+
18
+ context 'Parsing quantifiers:' do
19
+ it "should parse 'once' syntax" do
20
+ result = parse('once')
21
+ expect(result).to be_success
22
+
23
+ regexp = regexp_repr(result)
24
+ expect(regexp.to_str).to eq('{1}')
25
+ end
26
+
27
+ it "should parse 'twice' syntax" do
28
+ result = parse('twice')
29
+ expect(result).to be_success
30
+
31
+ regexp = regexp_repr(result)
32
+ expect(regexp.to_str).to eq('{2}')
33
+ end
34
+
35
+ it "should parse 'optional' syntax" do
36
+ result = parse('optional')
37
+ expect(result).to be_success
38
+
39
+ regexp = regexp_repr(result)
40
+ expect(regexp.to_str).to eq('?')
41
+ end
42
+
43
+ it "should parse 'exactly ... times' syntax" do
44
+ result = parse('exactly 4 times')
45
+ expect(result).to be_success
46
+
47
+ regexp = regexp_repr(result)
48
+ expect(regexp.to_str).to eq('{4}')
49
+ end
50
+
51
+ it "should parse 'between ... and ... times' syntax" do
52
+ result = parse('between 2 and 4 times')
53
+ expect(result).to be_success
54
+
55
+ # Dropping 'times' keyword is shorter syntax
56
+ expect(parse('between 2 and 4')).to be_success
57
+
58
+ regexp = regexp_repr(result)
59
+ expect(regexp.to_str).to eq('{2, 4}')
60
+ end
61
+
62
+ it "should parse 'once or more' syntax" do
63
+ result = parse('once or more')
64
+ expect(result).to be_success
65
+ end
66
+
67
+ it "should parse 'never or more' syntax" do
68
+ result = parse('never or more')
69
+ expect(result).to be_success
70
+ end
71
+
72
+ it "should parse 'at least ... times' syntax" do
73
+ result = parse('at least 10 times')
74
+ expect(result).to be_success
75
+
76
+ regexp = regexp_repr(result)
77
+ expect(regexp.to_str).to eq('{10,}')
78
+ end
79
+
80
+ end # context
81
+
82
+ end # describe
83
+
84
+
85
+ =begin
86
+
87
+ unless result.success?
88
+ # Stop if the parse failed...
89
+ puts "Parsing of '#{ARGV[0]}' failed"
90
+ puts "Reason: #{result.failure_reason.message}"
91
+ exit(1)
92
+ end
93
+
94
+
95
+ # Generate a concrete syntax parse tree from the parse result
96
+ cst_ptree = result.parse_tree
97
+ print_tree('Concrete Syntax Tree (CST)', cst_ptree)
98
+
99
+ # Generate an abstract syntax parse tree from the parse result
100
+ tree_builder = ASTBuilder
101
+ ast_ptree = result.parse_tree(tree_builder)
102
+ =end
103
+
@@ -0,0 +1,83 @@
1
+ # File: Multiplicity_spec.rb
2
+
3
+ require 'pp'
4
+ require_relative '../spec_helper' # Use the RSpec test framework
5
+ require_relative '../../lib/regex/multiplicity'
6
+
7
+ module SRL
8
+ # Reopen the module, in order to get rid of fully qualified names
9
+ module Regex # This module is used as a namespace
10
+
11
+ describe Multiplicity do
12
+
13
+ context "Creation & initialisation" do
14
+ it "should be created with 3 arguments" do
15
+ # Valid cases: initialized with two integer values and a policy symbol
16
+ [:greedy, :lazy, :possessive].each do |aPolicy|
17
+ expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
18
+ end
19
+
20
+ # Invalid case: initialized with invalid policy value
21
+ err = "Invalid repetition policy 'wrong'."
22
+ expect { Multiplicity.new(0, :more, 'wrong') }.to raise_error(StandardError, err)
23
+ end
24
+
25
+ end
26
+
27
+ context "Provided services" do
28
+ it 'should know its text representation' do
29
+ policy2text = { :greedy => '' , :lazy => '?', :possessive => '+' }
30
+
31
+ # Case: zero or one
32
+ policy2text.keys.each do |aPolicy|
33
+ multi = Multiplicity.new(0, 1, aPolicy)
34
+ expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
35
+ end
36
+
37
+ # Case: zero or more
38
+ policy2text.keys.each do |aPolicy|
39
+ multi = Multiplicity.new(0, :more, aPolicy)
40
+ expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
41
+ end
42
+
43
+ # Case: one or more
44
+ policy2text.keys.each do |aPolicy|
45
+ multi = Multiplicity.new(1, :more, aPolicy)
46
+ expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
47
+ end
48
+
49
+ # Case: exactly m times
50
+ policy2text.keys.each do |aPolicy|
51
+ samples = [1, 2, 5, 100]
52
+ samples.each do |aCount|
53
+ multi = Multiplicity.new(aCount, aCount, aPolicy)
54
+ expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
55
+ end
56
+ end
57
+
58
+ # Case: m, n times
59
+ policy2text.keys.each do |aPolicy|
60
+ samples = [1, 2, 5, 100]
61
+ samples.each do |aCount|
62
+ upper = aCount + 1 + rand(20)
63
+ multi = Multiplicity.new(aCount, upper, aPolicy)
64
+ expect(multi.to_str).to eq("{#{aCount},#{upper}}#{policy2text[aPolicy]}")
65
+ end
66
+ end
67
+
68
+ # Case: m or more
69
+ policy2text.keys.each do |aPolicy|
70
+ samples = [2, 3, 5, 100]
71
+ samples.each do |aCount|
72
+ multi = Multiplicity.new(aCount, :more, aPolicy)
73
+ expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ end # module
82
+ end # module
83
+ # End of file