rley 0.5.07 → 0.5.08
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
- data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
- data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
- data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
- data/examples/general/SRL/lib/ast_builder.rb +176 -0
- data/examples/general/SRL/lib/ast_building.rb +20 -0
- data/examples/general/SRL/lib/grammar.rb +32 -0
- data/examples/general/SRL/lib/parser.rb +26 -0
- data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
- data/examples/general/SRL/lib/regex_repr.rb +1 -0
- data/examples/general/SRL/lib/srl_demo.rb +67 -0
- data/examples/general/SRL/lib/tokenizer.rb +101 -0
- data/examples/general/SRL/spec/integration_spec.rb +103 -0
- data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
- data/examples/general/SRL/spec/spec_helper.rb +25 -0
- data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
- data/examples/general/SRL/srl_demo.rb +57 -0
- data/examples/general/calc_iter1/calc_demo.rb +1 -1
- data/examples/general/calc_iter2/ast_building.rb +20 -0
- data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
- data/examples/general/calc_iter2/calc_demo.rb +1 -1
- data/lib/rley/base/base_parser.rb +1 -1
- data/lib/rley/base/grm_items_builder.rb +1 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
- data/lib/rley/parser/gfg_chart.rb +8 -3
- data/lib/rley/parser/gfg_earley_parser.rb +5 -2
- data/lib/rley/parser/gfg_parsing.rb +5 -1
- data/lib/rley/parser/parse_tree_builder.rb +16 -5
- data/lib/rley/ptree/terminal_node.rb +3 -2
- data/spec/rley/parser/ast_builder_spec.rb +2 -2
- data/spec/rley/parser/cst_builder_spec.rb +2 -3
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c616b691fb51ba2eb00a25fee75ff4a80093990
|
4
|
+
data.tar.gz: 1039cfe8f29c8d1ec7c88fa83c18f9173763b8f2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df7412344421bd421fb459fe5cf8053618dea1212c4da27e83cf41225dbaf664d9b143499978e6bcef2ae293a7bf9378d3ecb4867f989553f798e9723ba8344b
|
7
|
+
data.tar.gz: 436474ceafd2689137fab890b19ca24715ebe72dd1311b3ad64313bc130cf8bbce12fe35049008d20a89634309cbac882da70bc891522d45a58e8ce310b466a7
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
### 0.5.08 / 2017-11-xx
|
2
|
+
* [FIX] Method `BaseParser::initialize` missing parameter name in doc caused a YARD warning.
|
3
|
+
* [FIX] Method `GrmItemsBuilder::build_dotted_items` missing parameter name in doc caused a YARD warning.
|
4
|
+
* [FIX] Method `NonTerminalVertex::initialize` missing parameter name in doc caused a YARD warning.
|
5
|
+
|
6
|
+
|
1
7
|
### 0.5.07 / 2017-11-11
|
2
8
|
* [NEW] File `benchmark_mini_en.rb` added in `examples/NLP` folder for parsing performance measurements.
|
3
9
|
* [CHANGE] Demo calculator in `examples/general/calc_iter2`: added support for log10 and cbrt functions. README.md slightly reworked.
|
File without changes
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'rley' # Load Rley library
|
2
|
+
|
3
|
+
########################################
|
4
|
+
# Step 1. Define a grammar for a nano English-like language
|
5
|
+
# based on example from Jurafski & Martin book (chapter 8 of the book).
|
6
|
+
# Bird, Steven, Edward Loper and Ewan Klein: "Speech and Language Processing";
|
7
|
+
# 2009, Pearson Education, Inc., ISBN 978-0135041963
|
8
|
+
# It defines the syntax of a sentence in a mini English-like language
|
9
|
+
# with a very simplified syntax and vocabulary
|
10
|
+
|
11
|
+
# Instantiate a builder object that will build the grammar for us
|
12
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
13
|
+
# Next 2 lines we define the terminal symbols
|
14
|
+
# (= word categories in the lexicon)
|
15
|
+
add_terminals('Noun', 'Proper-Noun', 'Pronoun', 'Verb')
|
16
|
+
add_terminals('Aux', 'Det', 'Preposition')
|
17
|
+
|
18
|
+
# Here we define the productions (= grammar rules)
|
19
|
+
rule 'Start' => 'S'
|
20
|
+
rule 'S' => %w[NP VP]
|
21
|
+
rule 'S' => %w[Aux NP VP]
|
22
|
+
rule 'S' => 'VP'
|
23
|
+
rule 'NP' => 'Pronoun'
|
24
|
+
rule 'NP' => 'Proper-Noun'
|
25
|
+
rule 'NP' => %w[Det Nominal]
|
26
|
+
rule 'Nominal' => %[Noun]
|
27
|
+
rule 'Nominal' => %[Nominal Noun]
|
28
|
+
rule 'VP' => 'Verb'
|
29
|
+
rule 'VP' => %w[Verb NP]
|
30
|
+
rule 'VP' => %w[Verb NP PP]
|
31
|
+
rule 'VP' => %w[Verb PP]
|
32
|
+
rule 'VP' => %w[VP PP]
|
33
|
+
rule 'PP' => %w[Preposition NP]
|
34
|
+
end
|
35
|
+
|
36
|
+
# And now, let's build the grammar...
|
37
|
+
grammar = builder.grammar
|
38
|
+
|
39
|
+
########################################
|
40
|
+
# Step 2. Creating a lexicon
|
41
|
+
# To simplify things, lexicon is implemented as a Hash with pairs of the form:
|
42
|
+
# word => terminal symbol name
|
43
|
+
Lexicon = {
|
44
|
+
'man' => 'Noun',
|
45
|
+
'dog' => 'Noun',
|
46
|
+
'cat' => 'Noun',
|
47
|
+
'telescope' => 'Noun',
|
48
|
+
'park' => 'Noun',
|
49
|
+
'saw' => 'Verb',
|
50
|
+
'ate' => 'Verb',
|
51
|
+
'walked' => 'Verb',
|
52
|
+
'John' => 'Proper-Noun',
|
53
|
+
'Mary' => 'Proper-Noun',
|
54
|
+
'Bob' => 'Proper-Noun',
|
55
|
+
'a' => 'Determiner',
|
56
|
+
'an' => 'Determiner',
|
57
|
+
'the' => 'Determiner',
|
58
|
+
'my' => 'Determiner',
|
59
|
+
'in' => 'Preposition',
|
60
|
+
'on' => 'Preposition',
|
61
|
+
'by' => 'Preposition',
|
62
|
+
'with' => 'Preposition'
|
63
|
+
}.freeze
|
64
|
+
|
65
|
+
########################################
|
66
|
+
# Step 3. Creating a tokenizer
|
67
|
+
# A tokenizer reads the input string and converts it into a sequence of tokens
|
68
|
+
# Highly simplified tokenizer implementation.
|
69
|
+
def tokenizer(aTextToParse, aGrammar)
|
70
|
+
tokens = aTextToParse.scan(/\S+/).map do |word|
|
71
|
+
term_name = Lexicon[word]
|
72
|
+
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
73
|
+
terminal = aGrammar.name2symbol[term_name]
|
74
|
+
Rley::Lexical::Token.new(word, terminal)
|
75
|
+
end
|
76
|
+
|
77
|
+
return tokens
|
78
|
+
end
|
79
|
+
|
80
|
+
########################################
|
81
|
+
# Step 4. Create a parser for that grammar
|
82
|
+
# Easy with Rley...
|
83
|
+
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
84
|
+
|
85
|
+
########################################
|
86
|
+
# Step 5. Parsing the input
|
87
|
+
input_to_parse = 'John saw Mary with a telescope'
|
88
|
+
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
|
89
|
+
# Convert input text into a sequence of token objects...
|
90
|
+
tokens = tokenizer(input_to_parse, grammar)
|
91
|
+
result = parser.parse(tokens)
|
92
|
+
|
93
|
+
puts "Parsing successful? #{result.success?}"
|
94
|
+
unless result.success?
|
95
|
+
puts result.failure_reason.message
|
96
|
+
exit(1)
|
97
|
+
end
|
98
|
+
|
99
|
+
########################################
|
100
|
+
# Step 6. Generating a parse tree from parse result
|
101
|
+
ptree = result.parse_tree
|
102
|
+
|
103
|
+
# Let's create a parse tree visitor
|
104
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
105
|
+
|
106
|
+
# Let's create a formatter (i.e. visit event listener)
|
107
|
+
# renderer = Rley::Formatter::Debug.new($stdout)
|
108
|
+
|
109
|
+
# Let's create a formatter that will render the parse tree with characters
|
110
|
+
renderer = Rley::Formatter::Asciitree.new($stdout)
|
111
|
+
|
112
|
+
# Let's create a formatter that will render the parse tree in labelled
|
113
|
+
# bracket notation
|
114
|
+
# renderer = Rley::Formatter::BracketNotation.new($stdout)
|
115
|
+
|
116
|
+
# Subscribe the formatter to the visitor's event and launch the visit
|
117
|
+
renderer.render(visitor)
|
118
|
+
# End of file
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Grammar for a simple subset of English language
|
2
|
+
# It is called nano-English because it has a more elaborate
|
3
|
+
# grammar than pico-English but remains still tiny compared to "real" English
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
|
7
|
+
########################################
|
8
|
+
# Define a grammar for a nano English-like language
|
9
|
+
# based on chapter 12 from Jurafski & Martin book.
|
10
|
+
# Daniel Jurafsky, James H. Martin: "Speech and Language Processing";
|
11
|
+
# 2009, Pearson Education, Inc., ISBN 978-0135041963
|
12
|
+
# It defines the syntax of a sentence in a mini English-like language
|
13
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
14
|
+
add_terminals('Pronoun', 'Proper-Noun')
|
15
|
+
add_terminals('Determiner', 'Noun')
|
16
|
+
add_terminals('Cardinal_number', 'Ordinal_number', 'Quant')
|
17
|
+
add_terminals('Verb', 'GerundV', 'Aux')
|
18
|
+
add_terminals('Predeterminer', 'Preposition')
|
19
|
+
|
20
|
+
rule 'language' => 'sentence'
|
21
|
+
rule 'sentence' => 'declarative'
|
22
|
+
rule 'sentence' => 'imperative'
|
23
|
+
rule 'sentence' => 'yes_no_question'
|
24
|
+
rule 'sentence' => 'wh_subject_question'
|
25
|
+
rule 'sentence' => 'wh_non_subject_question'
|
26
|
+
rule 'declarative' => %w[NP VP]
|
27
|
+
rule 'imperative' => 'VP'
|
28
|
+
rule 'yes_no_question' => %w[Aux NP VP]
|
29
|
+
rule 'wh_subject_question' => %w[Wh_NP NP VP]
|
30
|
+
rule 'wh_non_subject_question' => %w[Wh_NP Aux NP VP]
|
31
|
+
rule 'NP' => %[Predeterminer NP]
|
32
|
+
rule 'NP' => 'Pronoun'
|
33
|
+
rule 'NP' => 'Proper-Noun'
|
34
|
+
rule 'NP' => %w[Det Card Ord Quant Nominal]
|
35
|
+
rule 'VP' => 'Verb'
|
36
|
+
rule 'VP' => %w[Verb NP]
|
37
|
+
rule 'VP' => %w[Verb NP PP]
|
38
|
+
rule 'VP' => %w[Verb PP]
|
39
|
+
rule 'Det' => 'Determiner'
|
40
|
+
rule 'Det' => []
|
41
|
+
rule 'Card' => 'Cardinal_number'
|
42
|
+
rule 'Card' => []
|
43
|
+
rule 'Ord' => 'Ordinal_number'
|
44
|
+
rule 'Ord' => []
|
45
|
+
rule 'Nominal' => 'Noun'
|
46
|
+
rule 'Nominal' => %[Nominal Noun]
|
47
|
+
rule 'Nominal' => %w[Nominal GerundVP]
|
48
|
+
rule 'Nominal' => %w[Nominal RelClause]
|
49
|
+
rule 'PP' => %w[Preposition NP]
|
50
|
+
rule 'GerundVP' => 'GerundV'
|
51
|
+
rule 'GerundVP' => %w[GerundV NP]
|
52
|
+
rule 'GerundVP' => %w[GerundV NP PP]
|
53
|
+
rule 'GerundVP' => %w[GerundV PP]
|
54
|
+
rule 'RelClause' => %w[Relative_pronoun VP]
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
# And now build the grammar...
|
59
|
+
NanoGrammar = builder.grammar
|
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'rley' # Load Rley library
|
2
2
|
|
3
3
|
########################################
|
4
|
-
# Step 1. Define a grammar for a
|
4
|
+
# Step 1. Define a grammar for a pico English-like language
|
5
5
|
# based on example from NLTK book (chapter 8 of the book).
|
6
6
|
# Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
|
7
7
|
# with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
|
8
8
|
# It defines the syntax of a sentence in a mini English-like language
|
9
|
-
# with a very simplified syntax
|
9
|
+
# with a very simplified syntax and vocabulary
|
10
10
|
|
11
11
|
# Instantiate a builder object that will build the grammar for us
|
12
12
|
builder = Rley::Syntax::GrammarBuilder.new do
|
@@ -0,0 +1,176 @@
|
|
1
|
+
require_relative 'ast_building'
|
2
|
+
require_relative 'regex_repr'
|
3
|
+
|
4
|
+
# The purpose of a ASTBuilder is to build piece by piece an AST
|
5
|
+
# (Abstract Syntax Tree) from a sequence of input tokens and
|
6
|
+
# visit events produced by walking over a GFGParsing object.
|
7
|
+
# Uses the Builder GoF pattern.
|
8
|
+
# The Builder pattern creates a complex object
|
9
|
+
# (say, a parse tree) from simpler objects (terminal and non-terminal
|
10
|
+
# nodes) and using a step by step approach.
|
11
|
+
class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
12
|
+
include ASTBuilding
|
13
|
+
|
14
|
+
Terminal2NodeClass = { }.freeze
|
15
|
+
|
16
|
+
protected
|
17
|
+
|
18
|
+
# Overriding method.
|
19
|
+
# Factory method for creating a node object for the given
|
20
|
+
# input token.
|
21
|
+
# @param aTerminal [Terminal] Terminal symbol associated with the token
|
22
|
+
# @param aTokenPosition [Integer] Position of token in the input stream
|
23
|
+
# @param aToken [Token] The input token
|
24
|
+
def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
|
25
|
+
node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
|
26
|
+
|
27
|
+
return node
|
28
|
+
end
|
29
|
+
|
30
|
+
# Method to override.
|
31
|
+
# Factory method for creating a parent node object.
|
32
|
+
# @param aProduction [Production] Production rule
|
33
|
+
# @param aRange [Range] Range of tokens matched by the rule
|
34
|
+
# @param theTokens [Array] The input tokens
|
35
|
+
# @param theChildren [Array] Children nodes (one per rhs symbol)
|
36
|
+
def new_parent_node(aProduction, aRange, theTokens, theChildren)
|
37
|
+
node = case aProduction.name
|
38
|
+
when 'srl_0' # rule 'srl' => 'quantifier'
|
39
|
+
return_first_child(aRange, theTokens, theChildren)
|
40
|
+
|
41
|
+
when 'quantifier_0' # rule 'quantifier' => 'ONCE'
|
42
|
+
multiplicity(1, 1)
|
43
|
+
|
44
|
+
when 'quantifier_1' # rule 'quantifier' => 'TWICE'
|
45
|
+
multiplicity(2, 2)
|
46
|
+
|
47
|
+
when 'quantifier_2' # rule 'quantifier' => %w[EXACTLY count TIMES]
|
48
|
+
reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
|
49
|
+
|
50
|
+
when 'quantifier_3' # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
|
51
|
+
reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
|
52
|
+
|
53
|
+
when 'quantifier_4' # rule 'quantifier' => 'OPTIONAL'
|
54
|
+
multiplicity(0, 1)
|
55
|
+
|
56
|
+
when 'quantifier_5' # rule 'quantifier' => %w[ONCE OR MORE]
|
57
|
+
multiplicity(1, :more)
|
58
|
+
|
59
|
+
when 'quantifier_6' # rule 'quantifier' => %w[NEVER OR MORE]
|
60
|
+
multiplicity(0, :more)
|
61
|
+
|
62
|
+
when 'quantifier_7' # rule 'quantifier' => %w[AT LEAST count TIMES]
|
63
|
+
reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
|
64
|
+
|
65
|
+
when 'count_0', 'count_1'
|
66
|
+
return_first_child(aRange, theTokens, theChildren)
|
67
|
+
|
68
|
+
when 'times_suffix_0', 'times_suffix_1'
|
69
|
+
nil
|
70
|
+
else
|
71
|
+
raise StandardError, "Don't know production #{aProduction.name}"
|
72
|
+
end
|
73
|
+
|
74
|
+
return node
|
75
|
+
end
|
76
|
+
|
77
|
+
def multiplicity(lowerBound, upperBound)
|
78
|
+
return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
|
79
|
+
end
|
80
|
+
|
81
|
+
# rule 'quantifier' => %w[EXACTLY count TIMES]
|
82
|
+
def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
|
83
|
+
count = theChildren[1].token.lexeme.to_i
|
84
|
+
multiplicity(count, count)
|
85
|
+
end
|
86
|
+
|
87
|
+
# rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
|
88
|
+
def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
|
89
|
+
upper = theChildren[3].token.lexeme.to_i
|
90
|
+
# lower = theChildren[1].token.lexeme.to_i
|
91
|
+
multiplicity(3, upper)
|
92
|
+
end
|
93
|
+
|
94
|
+
# rule 'quantifier' => %w[AT LEAST count TIMES]
|
95
|
+
def reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
|
96
|
+
count = theChildren[2].token.lexeme.to_i
|
97
|
+
multiplicity(count, :more)
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
=begin
|
102
|
+
def reduce_binary_operator(theChildren)
|
103
|
+
operator_node = theChildren[1]
|
104
|
+
operator_node.children << theChildren[0]
|
105
|
+
operator_node.children << theChildren[2]
|
106
|
+
return operator_node
|
107
|
+
end
|
108
|
+
|
109
|
+
# rule 'simple_expression' => %w[simple_expression add_operator term]
|
110
|
+
def reduce_simple_expression_1(_production, _range, _tokens, theChildren)
|
111
|
+
reduce_binary_operator(theChildren)
|
112
|
+
end
|
113
|
+
|
114
|
+
# rule 'term' => %w[term mul_operator factor]
|
115
|
+
def reduce_term_1(_production, _range, _tokens, theChildren)
|
116
|
+
reduce_binary_operator(theChildren)
|
117
|
+
end
|
118
|
+
|
119
|
+
# rule 'factor' => %w[simple_factor POWER simple_factor]]
|
120
|
+
def reduce_factor_1(aProduction, aRange, theTokens, theChildren)
|
121
|
+
result = PowerNode.new(theChildren[1].symbol, aRange)
|
122
|
+
result.children << theChildren[0]
|
123
|
+
result.children << theChildren[2]
|
124
|
+
|
125
|
+
return result
|
126
|
+
end
|
127
|
+
|
128
|
+
# rule 'simple_factor' => %[sign scalar]
|
129
|
+
def reduce_simple_factor_0(aProduction, aRange, theTokens, theChildren)
|
130
|
+
first_child = theChildren[0]
|
131
|
+
result = if first_child.kind_of?(CalcNegateNode)
|
132
|
+
-theChildren[1]
|
133
|
+
else
|
134
|
+
theChildren[1]
|
135
|
+
end
|
136
|
+
|
137
|
+
return result
|
138
|
+
end
|
139
|
+
|
140
|
+
# rule 'simple_factor' => %w[unary_function in_parenthesis]
|
141
|
+
def reduce_simple_factor_1(aProduction, aRange, theTokens, theChildren)
|
142
|
+
func = CalcUnaryFunction.new(theChildren[0].symbol, aRange.low)
|
143
|
+
func.func_name = theChildren[0].value
|
144
|
+
func.children << theChildren[1]
|
145
|
+
return func
|
146
|
+
end
|
147
|
+
|
148
|
+
# rule 'simple_factor' => %w[MINUS in_parenthesis]
|
149
|
+
def reduce_simple_factor_2(aProduction, aRange, theTokens, theChildren)
|
150
|
+
negation = CalcNegateNode.new(theChildren[0].symbol, aRange.low)
|
151
|
+
negation.children << theChildren[1]
|
152
|
+
return negation
|
153
|
+
end
|
154
|
+
|
155
|
+
# rule 'add_operator' => 'PLUS'
|
156
|
+
def reduce_add_operator_0(_production, aRange, _tokens, theChildren)
|
157
|
+
return CalcAddNode.new(theChildren[0].symbol, aRange)
|
158
|
+
end
|
159
|
+
|
160
|
+
# rule 'add_operator' => 'MINUS'
|
161
|
+
def reduce_add_operator_1(_production, aRange, _tokens, theChildren)
|
162
|
+
return CalcSubtractNode.new(theChildren[0].symbol, aRange)
|
163
|
+
end
|
164
|
+
|
165
|
+
# rule 'mul_operator' => 'STAR'
|
166
|
+
def reduce_mul_operator_0(_production, aRange, _tokens, theChildren)
|
167
|
+
return CalcMultiplyNode.new(theChildren[0].symbol, aRange)
|
168
|
+
end
|
169
|
+
|
170
|
+
# rule 'mul_operator' => 'DIVIDE'
|
171
|
+
def reduce_mul_operator_1(_production, aRange, _tokens, theChildren)
|
172
|
+
return CalcDivideNode.new(theChildren[0].symbol, aRange)
|
173
|
+
end
|
174
|
+
=end
|
175
|
+
end # class
|
176
|
+
# End of file
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Mix-in module that provides convenenience methods for
|
2
|
+
# constructing an AST (Abstract Syntax Tree).
|
3
|
+
module ASTBuilding
|
4
|
+
def return_first_child(_range, _tokens, theChildren)
|
5
|
+
return theChildren[0]
|
6
|
+
end
|
7
|
+
|
8
|
+
def return_second_child(_range, _tokens, theChildren)
|
9
|
+
return theChildren[1]
|
10
|
+
end
|
11
|
+
|
12
|
+
def return_last_child(_range, _tokens, theChildren)
|
13
|
+
return theChildren[-1]
|
14
|
+
end
|
15
|
+
|
16
|
+
def return_epsilon(_range, _tokens, _children)
|
17
|
+
return nil
|
18
|
+
end
|
19
|
+
end # module
|
20
|
+
# End of file
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Grammar for SRL (Simple Regex Language)
|
2
|
+
require 'rley' # Load the gem
|
3
|
+
module SRL
|
4
|
+
########################################
|
5
|
+
# Work in progress.
|
6
|
+
# This is a very partial grammar of SRL.
|
7
|
+
# It will be expanded with the coming versions of Rley
|
8
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
9
|
+
add_terminals('DIGIT', 'INTEGER')
|
10
|
+
add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
|
11
|
+
add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
|
12
|
+
add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
|
13
|
+
|
14
|
+
# For the moment one focuses on quantifier syntax only...
|
15
|
+
rule 'srl' => 'quantifier'
|
16
|
+
rule 'quantifier' => 'ONCE'
|
17
|
+
rule 'quantifier' => 'TWICE'
|
18
|
+
rule 'quantifier' => %w[EXACTLY count TIMES]
|
19
|
+
rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
|
20
|
+
rule 'quantifier' => 'OPTIONAL'
|
21
|
+
rule 'quantifier' => %w[ONCE OR MORE]
|
22
|
+
rule 'quantifier' => %w[NEVER OR MORE]
|
23
|
+
rule 'quantifier' => %w[AT LEAST count TIMES]
|
24
|
+
rule 'count' => 'DIGIT'
|
25
|
+
rule 'count' => 'INTEGER'
|
26
|
+
rule 'times_suffix' => 'TIMES'
|
27
|
+
rule 'times_suffix' => []
|
28
|
+
end
|
29
|
+
|
30
|
+
# And now build the grammar and make it accessible via a global constant
|
31
|
+
Grammar = builder.grammar
|
32
|
+
end # module
|