rley 0.5.07 → 0.5.08
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
- data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
- data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
- data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
- data/examples/general/SRL/lib/ast_builder.rb +176 -0
- data/examples/general/SRL/lib/ast_building.rb +20 -0
- data/examples/general/SRL/lib/grammar.rb +32 -0
- data/examples/general/SRL/lib/parser.rb +26 -0
- data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
- data/examples/general/SRL/lib/regex_repr.rb +1 -0
- data/examples/general/SRL/lib/srl_demo.rb +67 -0
- data/examples/general/SRL/lib/tokenizer.rb +101 -0
- data/examples/general/SRL/spec/integration_spec.rb +103 -0
- data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
- data/examples/general/SRL/spec/spec_helper.rb +25 -0
- data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
- data/examples/general/SRL/srl_demo.rb +57 -0
- data/examples/general/calc_iter1/calc_demo.rb +1 -1
- data/examples/general/calc_iter2/ast_building.rb +20 -0
- data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
- data/examples/general/calc_iter2/calc_demo.rb +1 -1
- data/lib/rley/base/base_parser.rb +1 -1
- data/lib/rley/base/grm_items_builder.rb +1 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
- data/lib/rley/parser/gfg_chart.rb +8 -3
- data/lib/rley/parser/gfg_earley_parser.rb +5 -2
- data/lib/rley/parser/gfg_parsing.rb +5 -1
- data/lib/rley/parser/parse_tree_builder.rb +16 -5
- data/lib/rley/ptree/terminal_node.rb +3 -2
- data/spec/rley/parser/ast_builder_spec.rb +2 -2
- data/spec/rley/parser/cst_builder_spec.rb +2 -3
- metadata +20 -4
@@ -0,0 +1,26 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
+
# language
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
module SRL
|
6
|
+
# A parser for a subset of Simple Regex Language
|
7
|
+
class Parser < Rley::Parser::GFGEarleyParser
|
8
|
+
attr_reader(:source_file)
|
9
|
+
|
10
|
+
# Constructor
|
11
|
+
def initialize()
|
12
|
+
# Builder the Earley parser with the calculator grammar
|
13
|
+
super(Grammar)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_SRL(aText)
|
17
|
+
lexer = Tokenizer.new(aText, grammar)
|
18
|
+
tokens = lexer.tokens
|
19
|
+
result = parse(tokens)
|
20
|
+
|
21
|
+
return result
|
22
|
+
end
|
23
|
+
end # class
|
24
|
+
end # module
|
25
|
+
|
26
|
+
# End of file
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# File: Multiplicity.rb
|
2
|
+
|
3
|
+
module SRL
|
4
|
+
module Regex # This module is used as a namespace
|
5
|
+
# The multiplicity specifies by how much a given expression can be repeated.
|
6
|
+
class Multiplicity
|
7
|
+
# The lowest acceptable repetition count
|
8
|
+
attr_reader(:lower_bound)
|
9
|
+
|
10
|
+
# The highest possible repetition count
|
11
|
+
attr_reader(:upper_bound)
|
12
|
+
|
13
|
+
# An indicator that specifies how to repeat (:greedy, :lazy, :possessive)
|
14
|
+
attr_reader(:policy)
|
15
|
+
|
16
|
+
# @param aLowerBound [Integer]
|
17
|
+
# @param anUpperBound [Integer, Symbol] integer or :more symbol
|
18
|
+
# @param aPolicy [Symbol] One of: (:greedy, :lazy, :possessive)
|
19
|
+
def initialize(aLowerBound, anUpperBound, aPolicy)
|
20
|
+
@lower_bound = valid_lower_bound(aLowerBound)
|
21
|
+
@upper_bound = valid_upper_bound(anUpperBound)
|
22
|
+
@policy = valid_policy(aPolicy)
|
23
|
+
end
|
24
|
+
|
25
|
+
public
|
26
|
+
# Purpose: Return the String representation of the multiplicity.
|
27
|
+
def to_str()
|
28
|
+
case upper_bound
|
29
|
+
when :more
|
30
|
+
case lower_bound
|
31
|
+
when 0
|
32
|
+
subresult = '*'
|
33
|
+
when 1
|
34
|
+
subresult = '+'
|
35
|
+
else
|
36
|
+
subresult = "{#{lower_bound},}"
|
37
|
+
end
|
38
|
+
|
39
|
+
when lower_bound
|
40
|
+
subresult = "{#{lower_bound}}"
|
41
|
+
else
|
42
|
+
if [lower_bound, upper_bound] == [0, 1]
|
43
|
+
subresult = '?'
|
44
|
+
else
|
45
|
+
subresult = "{#{lower_bound},#{upper_bound}}"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
suffix = case policy
|
50
|
+
when :greedy
|
51
|
+
''
|
52
|
+
when :lazy
|
53
|
+
'?'
|
54
|
+
when :possessive
|
55
|
+
'+'
|
56
|
+
end
|
57
|
+
|
58
|
+
return subresult + suffix
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
private
|
63
|
+
# Validation method. Return the validated lower bound value
|
64
|
+
def valid_lower_bound(aLowerBound)
|
65
|
+
err_msg = "Invalid lower bound of repetition count #{aLowerBound}"
|
66
|
+
raise StandardError, err_msg unless aLowerBound.kind_of?(Integer)
|
67
|
+
return aLowerBound
|
68
|
+
end
|
69
|
+
|
70
|
+
# Validation method. Return the validated lower bound value
|
71
|
+
def valid_upper_bound(anUpperBound)
|
72
|
+
err_msg = "Invalid upper bound of repetition count #{anUpperBound}"
|
73
|
+
unless anUpperBound.kind_of?(Integer) || (anUpperBound == :more)
|
74
|
+
raise StandardError, err_msg
|
75
|
+
end
|
76
|
+
|
77
|
+
return anUpperBound
|
78
|
+
end
|
79
|
+
|
80
|
+
# Validation method. Return the validated policy value.
|
81
|
+
def valid_policy(aPolicy)
|
82
|
+
err_msg = "Invalid repetition policy '#{aPolicy}'."
|
83
|
+
valid_policies = [:greedy, :lazy, :possessive]
|
84
|
+
raise StandardError, err_msg unless valid_policies.include? aPolicy
|
85
|
+
|
86
|
+
return aPolicy
|
87
|
+
end
|
88
|
+
|
89
|
+
end # class
|
90
|
+
|
91
|
+
end # module
|
92
|
+
end # module
|
93
|
+
|
94
|
+
# End of file
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative './regex/multiplicity'
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require_relative 'parser'
|
2
|
+
require_relative 'ast_builder'
|
3
|
+
|
4
|
+
def print_title(aTitle)
|
5
|
+
puts aTitle
|
6
|
+
puts '=' * aTitle.size
|
7
|
+
end
|
8
|
+
|
9
|
+
def print_tree(aTitle, aParseTree)
|
10
|
+
# Let's create a parse tree visitor
|
11
|
+
visitor = Rley::ParseTreeVisitor.new(aParseTree)
|
12
|
+
|
13
|
+
# Now output formatted parse tree
|
14
|
+
print_title(aTitle)
|
15
|
+
renderer = Rley::Formatter::Asciitree.new($stdout)
|
16
|
+
renderer.render(visitor)
|
17
|
+
puts ''
|
18
|
+
end
|
19
|
+
|
20
|
+
# Create a calculator parser object
|
21
|
+
parser = SRL::Parser.new
|
22
|
+
|
23
|
+
# Parse the input expression in command-line
|
24
|
+
if ARGV.empty?
|
25
|
+
my_name = File.basename(__FILE__)
|
26
|
+
msg = <<-END_MSG
|
27
|
+
Demo parser for the SRL, the Simple Regex Language (https://simple-regex.com/).
|
28
|
+
Ultimately it will support SRL in full, currently it parses only the
|
29
|
+
SRL quantifiers.
|
30
|
+
The utility prints the resulting regular expression.
|
31
|
+
|
32
|
+
Command-line syntax:
|
33
|
+
ruby #{my_name} filename
|
34
|
+
where:
|
35
|
+
the file name is a SRL source file.
|
36
|
+
|
37
|
+
Examples:
|
38
|
+
ruby #{my_name} sample01.srl
|
39
|
+
END_MSG
|
40
|
+
puts msg
|
41
|
+
exit(1)
|
42
|
+
end
|
43
|
+
puts ARGV[0]
|
44
|
+
result = parser.parse_expression(ARGV[0])
|
45
|
+
|
46
|
+
unless result.success?
|
47
|
+
# Stop if the parse failed...
|
48
|
+
puts "Parsing of '#{ARGV[0]}' failed"
|
49
|
+
puts "Reason: #{result.failure_reason.message}"
|
50
|
+
exit(1)
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Generate a concrete syntax parse tree from the parse result
|
55
|
+
cst_ptree = result.parse_tree
|
56
|
+
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
57
|
+
|
58
|
+
# Generate an abstract syntax parse tree from the parse result
|
59
|
+
tree_builder = ASTBuilder
|
60
|
+
ast_ptree = result.parse_tree(tree_builder)
|
61
|
+
# print_tree('Abstract Syntax Tree (AST)', ast_ptree)
|
62
|
+
|
63
|
+
# # Now perform the computation of math expression
|
64
|
+
# root = ast_ptree.root
|
65
|
+
# print_title('Result:')
|
66
|
+
# puts root.interpret.to_s # Output the expression result
|
67
|
+
# End of file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# File: srl_tokenizer.rb
|
2
|
+
# Tokenizer for SRL (Simple Regex Language)
|
3
|
+
require 'strscan'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
module SRL
|
7
|
+
# The tokenizer should recognize:
|
8
|
+
# Keywords: as, capture, letter
|
9
|
+
# Integer literals including single digit
|
10
|
+
# String literals (quote delimited)
|
11
|
+
# Single character literal
|
12
|
+
# Delimiters: parentheses '(' and ')'
|
13
|
+
# Separators: comma (optional)
|
14
|
+
class Tokenizer
|
15
|
+
attr_reader(:scanner)
|
16
|
+
attr_reader(:lineno)
|
17
|
+
attr_reader(:line_start)
|
18
|
+
attr_reader(:name2symbol)
|
19
|
+
|
20
|
+
@@lexeme2name = {
|
21
|
+
'(' => 'LPAREN',
|
22
|
+
')' => 'RPAREN',
|
23
|
+
',' => 'COMMA'
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
# Here are all the SRL keywords (in uppercase)
|
27
|
+
@@keywords = %w[
|
28
|
+
AND
|
29
|
+
AT
|
30
|
+
BETWEEN
|
31
|
+
EXACTLY
|
32
|
+
LEAST
|
33
|
+
MORE
|
34
|
+
NEVER
|
35
|
+
ONCE
|
36
|
+
OPTIONAL
|
37
|
+
OR
|
38
|
+
TIMES
|
39
|
+
TWICE
|
40
|
+
].map { |x| [x, x] } .to_h
|
41
|
+
|
42
|
+
class ScanError < StandardError; end
|
43
|
+
|
44
|
+
def initialize(source, aGrammar)
|
45
|
+
@scanner = StringScanner.new(source)
|
46
|
+
@name2symbol = aGrammar.name2symbol
|
47
|
+
@lineno = 1
|
48
|
+
end
|
49
|
+
|
50
|
+
def tokens()
|
51
|
+
tok_sequence = []
|
52
|
+
until @scanner.eos?
|
53
|
+
token = _next_token
|
54
|
+
tok_sequence << token unless token.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
return tok_sequence
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def _next_token()
|
63
|
+
skip_whitespaces
|
64
|
+
curr_ch = scanner.peek(1)
|
65
|
+
return nil if curr_ch.nil?
|
66
|
+
|
67
|
+
token = nil
|
68
|
+
|
69
|
+
if '(),'.include? curr_ch
|
70
|
+
# Single character token
|
71
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
72
|
+
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
73
|
+
token = build_token('INTEGER', lexeme) # An integer has two or more digits
|
74
|
+
elsif (lexeme = scanner.scan(/[0-9]/))
|
75
|
+
token = build_token('DIGIT', lexeme)
|
76
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
77
|
+
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
78
|
+
# TODO: handle case unknown identifier
|
79
|
+
elsif (lexeme = scanner.scan(/\w/))
|
80
|
+
puts 'Buff'
|
81
|
+
token = build_token('CHAR', lexeme)
|
82
|
+
else # Unknown token
|
83
|
+
erroneous = curr_ch.nil? ? '' : curr_ch
|
84
|
+
sequel = scanner.scan(/.{1,20}/)
|
85
|
+
erroneous += sequel unless sequel.nil?
|
86
|
+
raise ScanError.new("Unknown token #{erroneous}")
|
87
|
+
end
|
88
|
+
|
89
|
+
return token
|
90
|
+
end
|
91
|
+
|
92
|
+
def build_token(aSymbolName, aLexeme)
|
93
|
+
token_type = name2symbol[aSymbolName]
|
94
|
+
return Rley::Lexical::Token.new(aLexeme, token_type)
|
95
|
+
end
|
96
|
+
|
97
|
+
def skip_whitespaces()
|
98
|
+
scanner.scan(/[ \t\f\n\r]+/)
|
99
|
+
end
|
100
|
+
end # class
|
101
|
+
end # module
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require_relative 'spec_helper' # Use the RSpec framework
|
2
|
+
require_relative '../lib/parser'
|
3
|
+
require_relative '../lib/ast_builder'
|
4
|
+
|
5
|
+
describe 'Integration tests:' do
|
6
|
+
def parse(someSRL)
|
7
|
+
parser = SRL::Parser.new
|
8
|
+
result = parser.parse_SRL(someSRL)
|
9
|
+
end
|
10
|
+
|
11
|
+
def regexp_repr(aResult)
|
12
|
+
# Generate an abstract syntax parse tree from the parse result
|
13
|
+
regexp_expr_builder = ASTBuilder
|
14
|
+
tree = aResult.parse_tree(regexp_expr_builder)
|
15
|
+
regexp = tree.root
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'Parsing quantifiers:' do
|
19
|
+
it "should parse 'once' syntax" do
|
20
|
+
result = parse('once')
|
21
|
+
expect(result).to be_success
|
22
|
+
|
23
|
+
regexp = regexp_repr(result)
|
24
|
+
expect(regexp.to_str).to eq('{1}')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should parse 'twice' syntax" do
|
28
|
+
result = parse('twice')
|
29
|
+
expect(result).to be_success
|
30
|
+
|
31
|
+
regexp = regexp_repr(result)
|
32
|
+
expect(regexp.to_str).to eq('{2}')
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse 'optional' syntax" do
|
36
|
+
result = parse('optional')
|
37
|
+
expect(result).to be_success
|
38
|
+
|
39
|
+
regexp = regexp_repr(result)
|
40
|
+
expect(regexp.to_str).to eq('?')
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should parse 'exactly ... times' syntax" do
|
44
|
+
result = parse('exactly 4 times')
|
45
|
+
expect(result).to be_success
|
46
|
+
|
47
|
+
regexp = regexp_repr(result)
|
48
|
+
expect(regexp.to_str).to eq('{4}')
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should parse 'between ... and ... times' syntax" do
|
52
|
+
result = parse('between 2 and 4 times')
|
53
|
+
expect(result).to be_success
|
54
|
+
|
55
|
+
# Dropping 'times' keyword is shorter syntax
|
56
|
+
expect(parse('between 2 and 4')).to be_success
|
57
|
+
|
58
|
+
regexp = regexp_repr(result)
|
59
|
+
expect(regexp.to_str).to eq('{2, 4}')
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should parse 'once or more' syntax" do
|
63
|
+
result = parse('once or more')
|
64
|
+
expect(result).to be_success
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should parse 'never or more' syntax" do
|
68
|
+
result = parse('never or more')
|
69
|
+
expect(result).to be_success
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should parse 'at least ... times' syntax" do
|
73
|
+
result = parse('at least 10 times')
|
74
|
+
expect(result).to be_success
|
75
|
+
|
76
|
+
regexp = regexp_repr(result)
|
77
|
+
expect(regexp.to_str).to eq('{10,}')
|
78
|
+
end
|
79
|
+
|
80
|
+
end # context
|
81
|
+
|
82
|
+
end # describe
|
83
|
+
|
84
|
+
|
85
|
+
=begin
|
86
|
+
|
87
|
+
unless result.success?
|
88
|
+
# Stop if the parse failed...
|
89
|
+
puts "Parsing of '#{ARGV[0]}' failed"
|
90
|
+
puts "Reason: #{result.failure_reason.message}"
|
91
|
+
exit(1)
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# Generate a concrete syntax parse tree from the parse result
|
96
|
+
cst_ptree = result.parse_tree
|
97
|
+
print_tree('Concrete Syntax Tree (CST)', cst_ptree)
|
98
|
+
|
99
|
+
# Generate an abstract syntax parse tree from the parse result
|
100
|
+
tree_builder = ASTBuilder
|
101
|
+
ast_ptree = result.parse_tree(tree_builder)
|
102
|
+
=end
|
103
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# File: Multiplicity_spec.rb
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require_relative '../spec_helper' # Use the RSpec test framework
|
5
|
+
require_relative '../../lib/regex/multiplicity'
|
6
|
+
|
7
|
+
module SRL
|
8
|
+
# Reopen the module, in order to get rid of fully qualified names
|
9
|
+
module Regex # This module is used as a namespace
|
10
|
+
|
11
|
+
describe Multiplicity do
|
12
|
+
|
13
|
+
context "Creation & initialisation" do
|
14
|
+
it "should be created with 3 arguments" do
|
15
|
+
# Valid cases: initialized with two integer values and a policy symbol
|
16
|
+
[:greedy, :lazy, :possessive].each do |aPolicy|
|
17
|
+
expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
|
18
|
+
end
|
19
|
+
|
20
|
+
# Invalid case: initialized with invalid policy value
|
21
|
+
err = "Invalid repetition policy 'wrong'."
|
22
|
+
expect { Multiplicity.new(0, :more, 'wrong') }.to raise_error(StandardError, err)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
context "Provided services" do
|
28
|
+
it 'should know its text representation' do
|
29
|
+
policy2text = { :greedy => '' , :lazy => '?', :possessive => '+' }
|
30
|
+
|
31
|
+
# Case: zero or one
|
32
|
+
policy2text.keys.each do |aPolicy|
|
33
|
+
multi = Multiplicity.new(0, 1, aPolicy)
|
34
|
+
expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
|
35
|
+
end
|
36
|
+
|
37
|
+
# Case: zero or more
|
38
|
+
policy2text.keys.each do |aPolicy|
|
39
|
+
multi = Multiplicity.new(0, :more, aPolicy)
|
40
|
+
expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
|
41
|
+
end
|
42
|
+
|
43
|
+
# Case: one or more
|
44
|
+
policy2text.keys.each do |aPolicy|
|
45
|
+
multi = Multiplicity.new(1, :more, aPolicy)
|
46
|
+
expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
|
47
|
+
end
|
48
|
+
|
49
|
+
# Case: exactly m times
|
50
|
+
policy2text.keys.each do |aPolicy|
|
51
|
+
samples = [1, 2, 5, 100]
|
52
|
+
samples.each do |aCount|
|
53
|
+
multi = Multiplicity.new(aCount, aCount, aPolicy)
|
54
|
+
expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Case: m, n times
|
59
|
+
policy2text.keys.each do |aPolicy|
|
60
|
+
samples = [1, 2, 5, 100]
|
61
|
+
samples.each do |aCount|
|
62
|
+
upper = aCount + 1 + rand(20)
|
63
|
+
multi = Multiplicity.new(aCount, upper, aPolicy)
|
64
|
+
expect(multi.to_str).to eq("{#{aCount},#{upper}}#{policy2text[aPolicy]}")
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Case: m or more
|
69
|
+
policy2text.keys.each do |aPolicy|
|
70
|
+
samples = [2, 3, 5, 100]
|
71
|
+
samples.each do |aCount|
|
72
|
+
multi = Multiplicity.new(aCount, :more, aPolicy)
|
73
|
+
expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end # module
|
82
|
+
end # module
|
83
|
+
# End of file
|