rley 0.5.07 → 0.5.08
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
- data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
- data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
- data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
- data/examples/general/SRL/lib/ast_builder.rb +176 -0
- data/examples/general/SRL/lib/ast_building.rb +20 -0
- data/examples/general/SRL/lib/grammar.rb +32 -0
- data/examples/general/SRL/lib/parser.rb +26 -0
- data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
- data/examples/general/SRL/lib/regex_repr.rb +1 -0
- data/examples/general/SRL/lib/srl_demo.rb +67 -0
- data/examples/general/SRL/lib/tokenizer.rb +101 -0
- data/examples/general/SRL/spec/integration_spec.rb +103 -0
- data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
- data/examples/general/SRL/spec/spec_helper.rb +25 -0
- data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
- data/examples/general/SRL/srl_demo.rb +57 -0
- data/examples/general/calc_iter1/calc_demo.rb +1 -1
- data/examples/general/calc_iter2/ast_building.rb +20 -0
- data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
- data/examples/general/calc_iter2/calc_demo.rb +1 -1
- data/lib/rley/base/base_parser.rb +1 -1
- data/lib/rley/base/grm_items_builder.rb +1 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
- data/lib/rley/parser/gfg_chart.rb +8 -3
- data/lib/rley/parser/gfg_earley_parser.rb +5 -2
- data/lib/rley/parser/gfg_parsing.rb +5 -1
- data/lib/rley/parser/parse_tree_builder.rb +16 -5
- data/lib/rley/ptree/terminal_node.rb +3 -2
- data/spec/rley/parser/ast_builder_spec.rb +2 -2
- data/spec/rley/parser/cst_builder_spec.rb +2 -3
- metadata +20 -4
@@ -0,0 +1,26 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
+
# language
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
module SRL
|
6
|
+
# A parser for a subset of Simple Regex Language
|
7
|
+
class Parser < Rley::Parser::GFGEarleyParser
|
8
|
+
attr_reader(:source_file)
|
9
|
+
|
10
|
+
# Constructor
|
11
|
+
def initialize()
|
12
|
+
# Builder the Earley parser with the calculator grammar
|
13
|
+
super(Grammar)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_SRL(aText)
|
17
|
+
lexer = Tokenizer.new(aText, grammar)
|
18
|
+
tokens = lexer.tokens
|
19
|
+
result = parse(tokens)
|
20
|
+
|
21
|
+
return result
|
22
|
+
end
|
23
|
+
end # class
|
24
|
+
end # module
|
25
|
+
|
26
|
+
# End of file
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# File: Multiplicity.rb
|
2
|
+
|
3
|
+
module SRL
|
4
|
+
module Regex # This module is used as a namespace
|
5
|
+
# The multiplicity specifies by how much a given expression can be repeated.
|
6
|
+
class Multiplicity
|
7
|
+
# The lowest acceptable repetition count
|
8
|
+
attr_reader(:lower_bound)
|
9
|
+
|
10
|
+
# The highest possible repetition count
|
11
|
+
attr_reader(:upper_bound)
|
12
|
+
|
13
|
+
# An indicator that specifies how to repeat (:greedy, :lazy, :possessive)
|
14
|
+
attr_reader(:policy)
|
15
|
+
|
16
|
+
# @param aLowerBound [Integer]
|
17
|
+
# @param anUpperBound [Integer, Symbol] integer or :more symbol
|
18
|
+
# @param aPolicy [Symbol] One of: (:greedy, :lazy, :possessive)
|
19
|
+
def initialize(aLowerBound, anUpperBound, aPolicy)
|
20
|
+
@lower_bound = valid_lower_bound(aLowerBound)
|
21
|
+
@upper_bound = valid_upper_bound(anUpperBound)
|
22
|
+
@policy = valid_policy(aPolicy)
|
23
|
+
end
|
24
|
+
|
25
|
+
public
|
26
|
+
# Purpose: Return the String representation of the multiplicity.
|
27
|
+
def to_str()
|
28
|
+
case upper_bound
|
29
|
+
when :more
|
30
|
+
case lower_bound
|
31
|
+
when 0
|
32
|
+
subresult = '*'
|
33
|
+
when 1
|
34
|
+
subresult = '+'
|
35
|
+
else
|
36
|
+
subresult = "{#{lower_bound},}"
|
37
|
+
end
|
38
|
+
|
39
|
+
when lower_bound
|
40
|
+
subresult = "{#{lower_bound}}"
|
41
|
+
else
|
42
|
+
if [lower_bound, upper_bound] == [0, 1]
|
43
|
+
subresult = '?'
|
44
|
+
else
|
45
|
+
subresult = "{#{lower_bound},#{upper_bound}}"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
suffix = case policy
|
50
|
+
when :greedy
|
51
|
+
''
|
52
|
+
when :lazy
|
53
|
+
'?'
|
54
|
+
when :possessive
|
55
|
+
'+'
|
56
|
+
end
|
57
|
+
|
58
|
+
return subresult + suffix
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
private
|
63
|
+
# Validation method. Return the validated lower bound value
|
64
|
+
def valid_lower_bound(aLowerBound)
|
65
|
+
err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
|
66
|
+
raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
|
67
|
+
return aLowerBound
|
68
|
+
end
|
69
|
+
|
70
|
+
# Validation method. Return the validated lower bound value
|
71
|
+
def valid_upper_bound(anUpperBound)
|
72
|
+
err_msg = "Invalid upper bound of repetition count #{anUpperBound}"
|
73
|
+
unless anUpperBound.kind_of?(Integer) || (anUpperBound == :more)
|
74
|
+
raise StandardError, err_msg
|
75
|
+
end
|
76
|
+
|
77
|
+
return anUpperBound
|
78
|
+
end
|
79
|
+
|
80
|
+
# Validation method. Return the validated policy value.
|
81
|
+
def valid_policy(aPolicy)
|
82
|
+
err_msg = "Invalid repetition policy '#{aPolicy}'."
|
83
|
+
valid_policies = [:greedy, :lazy, :possessive]
|
84
|
+
raise StandardError, err_msg unless valid_policies.include? aPolicy
|
85
|
+
|
86
|
+
return aPolicy
|
87
|
+
end
|
88
|
+
|
89
|
+
end # class
|
90
|
+
|
91
|
+
end # module
|
92
|
+
end # module
|
93
|
+
|
94
|
+
# End of file
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative './regex/multiplicity'
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
require_relative 'ast_builder'
|
3
|
+
|
4
|
+
def print_title(aTitle)
|
5
|
+
puts aTitle
|
6
|
+
puts '=' * aTitle.size
|
7
|
+
end
|
8
|
+
|
9
|
+
def print_tree(aTitle, aParseTree)
|
10
|
+
# Let's create a parse tree visitor
|
11
|
+
visitor = Rley::ParseTreeVisitor.new(aParseTree)
|
12
|
+
|
13
|
+
# Now output formatted parse tree
|
14
|
+
print_title(aTitle)
|
15
|
+
renderer = Rley::Formatter::Asciitree.new($stdout)
|
16
|
+
renderer.render(visitor)
|
17
|
+
puts ''
|
18
|
+
end
|
19
|
+
|
20
|
+
# Create a calculator parser object
|
21
|
+
parser = SRL::Parser.new
|
22
|
+
|
23
|
+
# Parse the input expression in command-line
|
24
|
+
if ARGV.empty?
|
25
|
+
my_name = File.basename(__FILE__)
|
26
|
+
msg = <<-END_MSG
|
27
|
+
Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
|
28
|
+
Ultimately it will support SRL in full, currently it parses only the
|
29
|
+
SRL quantifiers.
|
30
|
+
The utility prints the resulting regular expression.
|
31
|
+
|
32
|
+
Command-line syntax:
|
33
|
+
ruby #{my_name} filename
|
34
|
+
where:
|
35
|
+
the file name is a SRL source file.
|
36
|
+
|
37
|
+
Examples:
|
38
|
+
ruby #{my_name} sample01.srl
|
39
|
+
END_MSG
|
40
|
+
puts msg
|
41
|
+
exit(1)
|
42
|
+
end
|
43
|
+
puts ARGV[0]
|
44
|
+
result = parser.parse_expression(ARGV[0])
|
45
|
+
|
46
|
+
unless result.success?
|
47
|
+
# Stop if the parse failed...
|
48
|
+
puts "Parsing of '#{ARGV[0]}' failed"
|
49
|
+
puts "Reason: #{result.failure_reason.message}"
|
50
|
+
exit(1)
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Generate a concrete syntax parse tree from the parse result
|
55
|
+
cst_ptree = result.parse_tree
|
56
|
+
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
57
|
+
|
58
|
+
# Generate an abstract syntax parse tree from the parse result
|
59
|
+
tree_builder = ASTBuilder
|
60
|
+
ast_ptree = result.parse_tree(tree_builder)
|
61
|
+
# print_tree('Abstract Syntax Tree (AST)', ast_ptree)
|
62
|
+
|
63
|
+
# # Now perform the computation of math expression
|
64
|
+
# root = ast_ptree.root
|
65
|
+
# print_title('Result:')
|
66
|
+
# puts root.interpret.to_s # Output the expression result
|
67
|
+
# End of file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# File: srl_tokenizer.rb
|
2
|
+
# Tokenizer for SRL (Simple Regex Language)
|
3
|
+
require 'strscan'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
module SRL
|
7
|
+
# The tokenizer should recognize:
|
8
|
+
# Keywords: as, capture, letter
|
9
|
+
# Integer literals including single digit
|
10
|
+
# String literals (quote delimited)
|
11
|
+
# Single character literal
|
12
|
+
# Delimiters: parentheses '(' and ')'
|
13
|
+
# Separators: comma (optional)
|
14
|
+
class Tokenizer
|
15
|
+
attr_reader(:scanner)
|
16
|
+
attr_reader(:lineno)
|
17
|
+
attr_reader(:line_start)
|
18
|
+
attr_reader(:name2symbol)
|
19
|
+
|
20
|
+
@@lexeme2name = {
|
21
|
+
'(' => 'LPAREN',
|
22
|
+
')' => 'RPAREN',
|
23
|
+
',' => 'COMMA'
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
# Here are all the SRL keywords (in uppercase)
|
27
|
+
@@keywords = %w[
|
28
|
+
AND
|
29
|
+
AT
|
30
|
+
BETWEEN
|
31
|
+
EXACTLY
|
32
|
+
LEAST
|
33
|
+
MORE
|
34
|
+
NEVER
|
35
|
+
ONCE
|
36
|
+
OPTIONAL
|
37
|
+
OR
|
38
|
+
TIMES
|
39
|
+
TWICE
|
40
|
+
].map { |x| [x, x] } .to_h
|
41
|
+
|
42
|
+
class ScanError < StandardError; end
|
43
|
+
|
44
|
+
def initialize(source, aGrammar)
|
45
|
+
@scanner = StringScanner.new(source)
|
46
|
+
@name2symbol = aGrammar.name2symbol
|
47
|
+
@lineno = 1
|
48
|
+
end
|
49
|
+
|
50
|
+
def tokens()
|
51
|
+
tok_sequence = []
|
52
|
+
until @scanner.eos?
|
53
|
+
token = _next_token
|
54
|
+
tok_sequence << token unless token.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
return tok_sequence
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def _next_token()
|
63
|
+
skip_whitespaces
|
64
|
+
curr_ch = scanner.peek(1)
|
65
|
+
return nil if curr_ch.nil?
|
66
|
+
|
67
|
+
token = nil
|
68
|
+
|
69
|
+
if '(),'.include? curr_ch
|
70
|
+
# Single character token
|
71
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
72
|
+
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
73
|
+
token = build_token('INTEGER', lexeme) # An integer has two or more digits
|
74
|
+
elsif (lexeme = scanner.scan(/[0-9]/))
|
75
|
+
token = build_token('DIGIT', lexeme)
|
76
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
77
|
+
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
78
|
+
# TODO: handle case unknown identifier
|
79
|
+
elsif (lexeme = scanner.scan(/\w/))
|
80
|
+
puts 'Buff'
|
81
|
+
token = build_token('CHAR', lexeme)
|
82
|
+
else # Unknown token
|
83
|
+
erroneous = curr_ch.nil? ? '' : curr_ch
|
84
|
+
sequel = scanner.scan(/.{1,20}/)
|
85
|
+
erroneous += sequel unless sequel.nil?
|
86
|
+
raise ScanError.new("Unknown token #{erroneous}")
|
87
|
+
end
|
88
|
+
|
89
|
+
return token
|
90
|
+
end
|
91
|
+
|
92
|
+
def build_token(aSymbolName, aLexeme)
|
93
|
+
token_type = name2symbol[aSymbolName]
|
94
|
+
return Rley::Lexical::Token.new(aLexeme, token_type)
|
95
|
+
end
|
96
|
+
|
97
|
+
def skip_whitespaces()
|
98
|
+
scanner.scan(/[ \t\f\n\r]+/)
|
99
|
+
end
|
100
|
+
end # class
|
101
|
+
end # module
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require_relative 'spec_helper' # Use the RSpec framework
|
2
|
+
require_relative '../lib/parser'
|
3
|
+
require_relative '../lib/ast_builder'
|
4
|
+
|
5
|
+
describe 'Integration tests:' do
|
6
|
+
def parse(someSRL)
|
7
|
+
parser = SRL::Parser.new
|
8
|
+
result = parser.parse_SRL(someSRL)
|
9
|
+
end
|
10
|
+
|
11
|
+
def regexp_repr(aResult)
|
12
|
+
# Generate an abstract syntax parse tree from the parse result
|
13
|
+
regexp_expr_builder = ASTBuilder
|
14
|
+
tree = aResult.parse_tree(regexp_expr_builder)
|
15
|
+
regexp = tree.root
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'Parsing quantifiers:' do
|
19
|
+
it "should parse 'once' syntax" do
|
20
|
+
result = parse('once')
|
21
|
+
expect(result).to be_success
|
22
|
+
|
23
|
+
regexp = regexp_repr(result)
|
24
|
+
expect(regexp.to_str).to eq('{1}')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse 'twice' syntax" do
|
28
|
+
result = parse('twice')
|
29
|
+
expect(result).to be_success
|
30
|
+
|
31
|
+
regexp = regexp_repr(result)
|
32
|
+
expect(regexp.to_str).to eq('{2}')
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse 'optional' syntax" do
|
36
|
+
result = parse('optional')
|
37
|
+
expect(result).to be_success
|
38
|
+
|
39
|
+
regexp = regexp_repr(result)
|
40
|
+
expect(regexp.to_str).to eq('?')
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should parse 'exactly ... times' syntax" do
|
44
|
+
result = parse('exactly 4 times')
|
45
|
+
expect(result).to be_success
|
46
|
+
|
47
|
+
regexp = regexp_repr(result)
|
48
|
+
expect(regexp.to_str).to eq('{4}')
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should parse 'between ... and ... times' syntax" do
|
52
|
+
result = parse('between 2 and 4 times')
|
53
|
+
expect(result).to be_success
|
54
|
+
|
55
|
+
# Dropping 'times' keyword is shorter syntax
|
56
|
+
expect(parse('between 2 and 4')).to be_success
|
57
|
+
|
58
|
+
regexp = regexp_repr(result)
|
59
|
+
expect(regexp.to_str).to eq('{2, 4}')
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should parse 'once or more' syntax" do
|
63
|
+
result = parse('once or more')
|
64
|
+
expect(result).to be_success
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should parse 'never or more' syntax" do
|
68
|
+
result = parse('never or more')
|
69
|
+
expect(result).to be_success
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should parse 'at least ... times' syntax" do
|
73
|
+
result = parse('at least 10 times')
|
74
|
+
expect(result).to be_success
|
75
|
+
|
76
|
+
regexp = regexp_repr(result)
|
77
|
+
expect(regexp.to_str).to eq('{10,}')
|
78
|
+
end
|
79
|
+
|
80
|
+
end # context
|
81
|
+
|
82
|
+
end # describe
|
83
|
+
|
84
|
+
|
85
|
+
=begin
|
86
|
+
|
87
|
+
unless result.success?
|
88
|
+
# Stop if the parse failed...
|
89
|
+
puts "Parsing of '#{ARGV[0]}' failed"
|
90
|
+
puts "Reason: #{result.failure_reason.message}"
|
91
|
+
exit(1)
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# Generate a concrete syntax parse tree from the parse result
|
96
|
+
cst_ptree = result.parse_tree
|
97
|
+
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
98
|
+
|
99
|
+
# Generate an abstract syntax parse tree from the parse result
|
100
|
+
tree_builder = ASTBuilder
|
101
|
+
ast_ptree = result.parse_tree(tree_builder)
|
102
|
+
=end
|
103
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# File: Multiplicity_spec.rb
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require_relative '../spec_helper' # Use the RSpec test framework
|
5
|
+
require_relative '../../lib/regex/multiplicity'
|
6
|
+
|
7
|
+
module SRL
|
8
|
+
# Reopen the module, in order to get rid of fully qualified names
|
9
|
+
module Regex # This module is used as a namespace
|
10
|
+
|
11
|
+
describe Multiplicity do
|
12
|
+
|
13
|
+
context "Creation & initialisation" do
|
14
|
+
it "should be created with 3 arguments" do
|
15
|
+
# Valid cases: initialized with two integer values and a policy symbol
|
16
|
+
[:greedy, :lazy, :possessive].each do |aPolicy|
|
17
|
+
expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
|
18
|
+
end
|
19
|
+
|
20
|
+
# Invalid case: initialized with invalid policy value
|
21
|
+
err = "Invalid repetition policy 'wrong'."
|
22
|
+
expect { Multiplicity.new(0, :more, 'wrong') }.to raise_error(StandardError, err)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
context "Provided services" do
|
28
|
+
it 'should know its text representation' do
|
29
|
+
policy2text = { :greedy => '' , :lazy => '?', :possessive => '+' }
|
30
|
+
|
31
|
+
# Case: zero or one
|
32
|
+
policy2text.keys.each do |aPolicy|
|
33
|
+
multi = Multiplicity.new(0, 1, aPolicy)
|
34
|
+
expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
|
35
|
+
end
|
36
|
+
|
37
|
+
# Case: zero or more
|
38
|
+
policy2text.keys.each do |aPolicy|
|
39
|
+
multi = Multiplicity.new(0, :more, aPolicy)
|
40
|
+
expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
|
41
|
+
end
|
42
|
+
|
43
|
+
# Case: one or more
|
44
|
+
policy2text.keys.each do |aPolicy|
|
45
|
+
multi = Multiplicity.new(1, :more, aPolicy)
|
46
|
+
expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
|
47
|
+
end
|
48
|
+
|
49
|
+
# Case: exactly m times
|
50
|
+
policy2text.keys.each do |aPolicy|
|
51
|
+
samples = [1, 2, 5, 100]
|
52
|
+
samples.each do |aCount|
|
53
|
+
multi = Multiplicity.new(aCount, aCount, aPolicy)
|
54
|
+
expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Case: m, n times
|
59
|
+
policy2text.keys.each do |aPolicy|
|
60
|
+
samples = [1, 2, 5, 100]
|
61
|
+
samples.each do |aCount|
|
62
|
+
upper = aCount + 1 + rand(20)
|
63
|
+
multi = Multiplicity.new(aCount, upper, aPolicy)
|
64
|
+
expect(multi.to_str).to eq("{#{aCount},#{upper}}#{policy2text[aPolicy]}")
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Case: m or more
|
69
|
+
policy2text.keys.each do |aPolicy|
|
70
|
+
samples = [2, 3, 5, 100]
|
71
|
+
samples.each do |aCount|
|
72
|
+
multi = Multiplicity.new(aCount, :more, aPolicy)
|
73
|
+
expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end # module
|
82
|
+
end # module
|
83
|
+
# End of file
|