rley 0.5.14 → 0.6.00
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -2
- data/README.md +29 -31
- data/examples/NLP/benchmark_pico_en.rb +34 -34
- data/examples/NLP/engtagger.rb +1 -1
- data/examples/NLP/nano_eng/nano_en_demo.rb +23 -28
- data/examples/NLP/nano_eng/nano_grammar.rb +1 -1
- data/examples/NLP/pico_en_demo.rb +28 -31
- data/examples/data_formats/JSON/json_ast_builder.rb +11 -70
- data/examples/data_formats/JSON/json_demo.rb +32 -14
- data/examples/data_formats/JSON/json_grammar.rb +1 -1
- data/examples/data_formats/JSON/json_lexer.rb +5 -11
- data/examples/general/SRL/lib/ast_builder.rb +5 -28
- data/examples/general/SRL/lib/tokenizer.rb +2 -5
- data/examples/general/SRL/spec/integration_spec.rb +12 -5
- data/examples/general/SRL/spec/tokenizer_spec.rb +13 -14
- data/examples/general/SRL/srl_demo.rb +16 -9
- data/examples/general/calc_iter1/calc_ast_builder.rb +29 -85
- data/examples/general/calc_iter1/calc_demo.rb +15 -6
- data/examples/general/calc_iter1/calc_lexer.rb +2 -5
- data/examples/general/calc_iter1/spec/calculator_spec.rb +18 -19
- data/examples/general/calc_iter2/calc_ast_builder.rb +9 -107
- data/examples/general/calc_iter2/calc_demo.rb +15 -8
- data/examples/general/calc_iter2/calc_lexer.rb +3 -5
- data/examples/general/calc_iter2/spec/calculator_spec.rb +18 -31
- data/lib/rley.rb +2 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +122 -0
- data/lib/rley/parse_rep/ast_base_builder.rb +128 -0
- data/lib/rley/{parser → parse_rep}/cst_builder.rb +1 -1
- data/lib/rley/{parser → parse_rep}/parse_forest_builder.rb +1 -1
- data/lib/rley/{parser → parse_rep}/parse_forest_factory.rb +2 -2
- data/lib/rley/{parser → parse_rep}/parse_rep_creator.rb +3 -3
- data/lib/rley/{parser → parse_rep}/parse_tree_builder.rb +4 -4
- data/lib/rley/{parser → parse_rep}/parse_tree_factory.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +16 -4
- data/spec/rley/engine_spec.rb +127 -0
- data/spec/rley/formatter/asciitree_spec.rb +11 -13
- data/spec/rley/formatter/bracket_notation_spec.rb +11 -13
- data/spec/rley/formatter/debug_spec.rb +13 -15
- data/spec/rley/formatter/json_spec.rb +10 -14
- data/spec/rley/{parser → parse_rep}/ambiguous_parse_spec.rb +3 -3
- data/spec/rley/{parser → parse_rep}/ast_builder_spec.rb +34 -83
- data/spec/rley/{parser → parse_rep}/cst_builder_spec.rb +3 -3
- data/spec/rley/{parser → parse_rep}/groucho_spec.rb +3 -3
- data/spec/rley/{parser → parse_rep}/parse_forest_builder_spec.rb +4 -4
- data/spec/rley/{parser → parse_rep}/parse_forest_factory_spec.rb +2 -2
- data/spec/rley/{parser → parse_rep}/parse_tree_factory_spec.rb +2 -2
- data/spec/rley/parse_tree_visitor_spec.rb +12 -15
- data/spec/rley/support/ast_builder.rb +403 -0
- data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
- metadata +27 -28
- data/examples/data_formats/JSON/json_parser.rb +0 -46
- data/examples/general/SRL/lib/ast_building.rb +0 -20
- data/examples/general/SRL/lib/parser.rb +0 -26
- data/examples/general/calc_iter1/calc_parser.rb +0 -24
- data/examples/general/calc_iter2/ast_building.rb +0 -20
- data/examples/general/calc_iter2/calc_parser.rb +0 -24
@@ -8,10 +8,10 @@ require_relative '../support/grammar_b_expr_helper'
|
|
8
8
|
require_relative '../support/grammar_arr_int_helper'
|
9
9
|
|
10
10
|
# Load the class under test
|
11
|
-
require_relative '../../../lib/rley/
|
11
|
+
require_relative '../../../lib/rley/parse_rep/cst_builder'
|
12
12
|
|
13
13
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
14
|
-
module
|
14
|
+
module ParseRep
|
15
15
|
describe CSTBuilder do
|
16
16
|
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
17
17
|
include GrammarBExprHelper # Mix-in for basic arithmetic language
|
@@ -30,7 +30,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
30
30
|
|
31
31
|
def init_walker(theParser, theTokens)
|
32
32
|
result = theParser.parse(theTokens)
|
33
|
-
factory = ParseWalkerFactory.new
|
33
|
+
factory = Parser::ParseWalkerFactory.new
|
34
34
|
accept_entry = result.accepting_entry
|
35
35
|
accept_index = result.chart.last_index
|
36
36
|
@walker = factory.build_walker(accept_entry, accept_index)
|
@@ -11,10 +11,10 @@ require_relative '../support/expectation_helper'
|
|
11
11
|
require_relative '../support/grammar_ambig01_helper'
|
12
12
|
|
13
13
|
# Load the class under test
|
14
|
-
require_relative '../../../lib/rley/
|
14
|
+
require_relative '../../../lib/rley/parse_rep/parse_forest_builder'
|
15
15
|
|
16
16
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
17
|
-
module
|
17
|
+
module ParseRep
|
18
18
|
describe 'Coping with a NLP ambiguous toy grammar' do
|
19
19
|
include GrammarHelper # Mix-in with token factory method
|
20
20
|
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
@@ -104,7 +104,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
104
104
|
|
105
105
|
|
106
106
|
before(:each) do
|
107
|
-
factory = ParseWalkerFactory.new
|
107
|
+
factory = Parser::ParseWalkerFactory.new
|
108
108
|
accept_entry = sentence_result.accepting_entry
|
109
109
|
accept_index = sentence_result.chart.last_index
|
110
110
|
@walker = factory.build_walker(accept_entry, accept_index, true)
|
@@ -8,10 +8,10 @@ require_relative '../support/expectation_helper'
|
|
8
8
|
require_relative '../support/grammar_l0_helper'
|
9
9
|
|
10
10
|
# Load the class under test
|
11
|
-
require_relative '../../../lib/rley/
|
11
|
+
require_relative '../../../lib/rley/parse_rep/parse_forest_builder'
|
12
12
|
|
13
13
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
14
|
-
module
|
14
|
+
module ParseRep
|
15
15
|
describe ParseForestBuilder do
|
16
16
|
include GrammarHelper # Mix-in with token factory method
|
17
17
|
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
@@ -89,7 +89,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
89
89
|
|
90
90
|
context 'Parse forest construction' do
|
91
91
|
before(:each) do
|
92
|
-
factory = ParseWalkerFactory.new
|
92
|
+
factory = Parser::ParseWalkerFactory.new
|
93
93
|
accept_entry = sample_result.accepting_entry
|
94
94
|
accept_index = sample_result.chart.last_index
|
95
95
|
@walker = factory.build_walker(accept_entry, accept_index, true)
|
@@ -294,7 +294,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
294
294
|
end
|
295
295
|
|
296
296
|
before(:each) do
|
297
|
-
factory = ParseWalkerFactory.new
|
297
|
+
factory = Parser::ParseWalkerFactory.new
|
298
298
|
accept_entry = sentence_result.accepting_entry
|
299
299
|
accept_index = sentence_result.chart.last_index
|
300
300
|
@walker = factory.build_walker(accept_entry, accept_index)
|
@@ -7,10 +7,10 @@ require_relative '../support/grammar_helper'
|
|
7
7
|
require_relative '../support/expectation_helper'
|
8
8
|
|
9
9
|
# Load the class under test
|
10
|
-
require_relative '../../../lib/rley/
|
10
|
+
require_relative '../../../lib/rley/parse_rep/parse_forest_factory'
|
11
11
|
|
12
12
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
13
|
-
module
|
13
|
+
module ParseRep
|
14
14
|
describe ParseForestFactory do
|
15
15
|
include GrammarHelper # Mix-in with token factory method
|
16
16
|
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
@@ -7,10 +7,10 @@ require_relative '../support/grammar_abc_helper'
|
|
7
7
|
require_relative '../support/expectation_helper'
|
8
8
|
|
9
9
|
# Load the class under test
|
10
|
-
require_relative '../../../lib/rley/
|
10
|
+
require_relative '../../../lib/rley/parse_rep/parse_tree_factory'
|
11
11
|
|
12
12
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
13
|
-
module
|
13
|
+
module ParseRep
|
14
14
|
describe ParseTreeFactory do
|
15
15
|
include GrammarHelper # Mix-in with token factory method
|
16
16
|
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
@@ -3,15 +3,16 @@ require_relative '../spec_helper'
|
|
3
3
|
require_relative './support/grammar_abc_helper'
|
4
4
|
require_relative '../../lib/rley/lexical/token'
|
5
5
|
require_relative '../../lib/rley/parser/gfg_earley_parser'
|
6
|
+
require_relative '../../lib/rley/engine'
|
6
7
|
# Load the class under test
|
7
8
|
require_relative '../../lib/rley/parse_tree_visitor'
|
8
9
|
|
9
10
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
10
|
-
describe ParseTreeVisitor do
|
11
|
-
include GrammarABCHelper # Mix-in module with builder for grammar abc
|
12
|
-
|
11
|
+
describe ParseTreeVisitor do
|
13
12
|
let(:grammar_abc) do
|
14
|
-
|
13
|
+
sandbox = Object.new
|
14
|
+
sandbox.extend(GrammarABCHelper)
|
15
|
+
builder = sandbox.grammar_abc_builder
|
15
16
|
builder.grammar
|
16
17
|
end
|
17
18
|
|
@@ -21,15 +22,9 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
21
22
|
|
22
23
|
|
23
24
|
# Helper method that mimicks the output of a tokenizer
|
24
|
-
# for the language specified by
|
25
|
+
# for the language specified by grammar_abc
|
25
26
|
let(:grm_abc_tokens1) do
|
26
|
-
[
|
27
|
-
Lexical::Token.new('a', a_),
|
28
|
-
Lexical::Token.new('a', a_),
|
29
|
-
Lexical::Token.new('b', b_),
|
30
|
-
Lexical::Token.new('c', c_),
|
31
|
-
Lexical::Token.new('c', c_)
|
32
|
-
]
|
27
|
+
%w[a a b c c].map { |ch| Lexical::Token.new(ch, ch) }
|
33
28
|
end
|
34
29
|
|
35
30
|
# Factory method that builds a sample parse tree.
|
@@ -45,9 +40,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
45
40
|
# +- c[4,5]
|
46
41
|
# Capital letters represent non-terminal nodes
|
47
42
|
let(:grm_abc_ptree1) do
|
48
|
-
|
49
|
-
|
50
|
-
parse_result.
|
43
|
+
engine = Rley::Engine.new
|
44
|
+
engine.use_grammar(grammar_abc)
|
45
|
+
parse_result = engine.parse(grm_abc_tokens1)
|
46
|
+
ptree = engine.convert(parse_result)
|
47
|
+
ptree
|
51
48
|
end
|
52
49
|
|
53
50
|
|
@@ -0,0 +1,403 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require_relative 'ast_building'
|
3
|
+
require_relative 'regex_repr'
|
4
|
+
|
5
|
+
# The purpose of a ASTBuilder is to build piece by piece an AST
|
6
|
+
# (Abstract Syntax Tree) from a sequence of input tokens and
|
7
|
+
# visit events produced by walking over a GFGParsing object.
|
8
|
+
# Uses the Builder GoF pattern.
|
9
|
+
# The Builder pattern creates a complex object
|
10
|
+
# (say, a parse tree) from simpler objects (terminal and non-terminal
|
11
|
+
# nodes) and using a step by step approach.
|
12
|
+
class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
13
|
+
include ASTBuilding
|
14
|
+
|
15
|
+
Terminal2NodeClass = { }.freeze
|
16
|
+
|
17
|
+
attr_reader :options
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
# Overriding method.
|
22
|
+
# Factory method for creating a node object for the given
|
23
|
+
# input token.
|
24
|
+
# @param aTerminal [Terminal] Terminal symbol associated with the token
|
25
|
+
# @param aTokenPosition [Integer] Position of token in the input stream
|
26
|
+
# @param aToken [Token] The input token
|
27
|
+
def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
|
28
|
+
node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
|
29
|
+
|
30
|
+
return node
|
31
|
+
end
|
32
|
+
|
33
|
+
# Method to override.
|
34
|
+
# Factory method for creating a parent node object.
|
35
|
+
# @param aProduction [Production] Production rule
|
36
|
+
# @param aRange [Range] Range of tokens matched by the rule
|
37
|
+
# @param theTokens [Array] The input tokens
|
38
|
+
# @param theChildren [Array] Children nodes (one per rhs symbol)
|
39
|
+
def new_parent_node(aProduction, aRange, theTokens, theChildren)
|
40
|
+
short_name = aProduction.name
|
41
|
+
method_name = 'reduce_' + short_name
|
42
|
+
if self.respond_to?(method_name, true)
|
43
|
+
node = send(method_name, aProduction, aRange, theTokens, theChildren)
|
44
|
+
else
|
45
|
+
# Default action...
|
46
|
+
node = case aProduction.rhs.size
|
47
|
+
when 0
|
48
|
+
nil
|
49
|
+
when 1
|
50
|
+
return_first_child(aRange, theTokens, theChildren)
|
51
|
+
else
|
52
|
+
raise StandardError, "Don't know production '#{aProduction.name}'"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
return node
|
56
|
+
end
|
57
|
+
|
58
|
+
def multiplicity(lowerBound, upperBound)
|
59
|
+
return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
|
60
|
+
end
|
61
|
+
|
62
|
+
def string_literal(aString, to_escape = true)
|
63
|
+
if aString.size > 1
|
64
|
+
chars = []
|
65
|
+
aString.each_char do |ch|
|
66
|
+
if to_escape && Regex::Character::MetaChars.include?(ch)
|
67
|
+
chars << Regex::Character.new("\\")
|
68
|
+
end
|
69
|
+
chars << Regex::Character.new(ch)
|
70
|
+
end
|
71
|
+
result = Regex::Concatenation.new(*chars)
|
72
|
+
else
|
73
|
+
if to_escape && Regex::Character::MetaChars.include?(aString)
|
74
|
+
result = Regex::Concatenation.new(Regex::Character.new("\\"),
|
75
|
+
Regex::Character.new(aString))
|
76
|
+
else
|
77
|
+
result = Regex::Character.new(aString)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
return result
|
82
|
+
end
|
83
|
+
|
84
|
+
def char_range(lowerBound, upperBound)
|
85
|
+
# TODO fix module nesting
|
86
|
+
lower = Regex::Character.new(lowerBound)
|
87
|
+
upper = Regex::Character.new(upperBound)
|
88
|
+
return Regex::CharRange.new(lower, upper)
|
89
|
+
end
|
90
|
+
|
91
|
+
def char_class(toNegate, *theChildren)
|
92
|
+
Regex::CharClass.new(toNegate, *theChildren)
|
93
|
+
end
|
94
|
+
|
95
|
+
def char_shorthand(shortName)
|
96
|
+
Regex::CharShorthand.new(shortName)
|
97
|
+
end
|
98
|
+
|
99
|
+
def wildcard()
|
100
|
+
Regex::Wildcard.new
|
101
|
+
end
|
102
|
+
|
103
|
+
def repetition(expressionToRepeat, aMultiplicity)
|
104
|
+
return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
|
105
|
+
end
|
106
|
+
|
107
|
+
def begin_anchor
|
108
|
+
return Regex::Anchor.new('^')
|
109
|
+
end
|
110
|
+
|
111
|
+
# rule('expression' => %w[pattern separator flags]).as 'flagged_expr'
|
112
|
+
def reduce_flagged_expr(aProduction, aRange, theTokens, theChildren)
|
113
|
+
@options = theChildren[2] if theChildren[2]
|
114
|
+
return_first_child(aRange, theTokens, theChildren)
|
115
|
+
end
|
116
|
+
|
117
|
+
# rule('pattern' => %w[pattern separator quantifiable]).as 'pattern_sequence'
|
118
|
+
def reduce_pattern_sequence(aProduction, aRange, theTokens, theChildren)
|
119
|
+
return Regex::Concatenation.new(theChildren[0], theChildren[2])
|
120
|
+
end
|
121
|
+
|
122
|
+
# rule('flags' => %[flags separator single_flag]).as 'flag_sequence'
|
123
|
+
def reduce_flag_sequence(aProduction, aRange, theTokens, theChildren)
|
124
|
+
theChildren[0] << theChildren[2]
|
125
|
+
end
|
126
|
+
|
127
|
+
# rule('single_flag' => %w[CASE INSENSITIVE]).as 'case_insensitive'
|
128
|
+
def reduce_case_insensitive(aProduction, aRange, theTokens, theChildren)
|
129
|
+
return [ Regex::MatchOption.new(:IGNORECASE, true) ]
|
130
|
+
end
|
131
|
+
|
132
|
+
# rule('single_flag' => %w[MULTI LINE]).as 'multi_line'
|
133
|
+
def reduce_multi_line(aProduction, aRange, theTokens, theChildren)
|
134
|
+
return [ Regex::MatchOption.new(:MULTILINE, true) ]
|
135
|
+
end
|
136
|
+
|
137
|
+
# rule('single_flag' => %w[ALL LAZY]).as 'all_lazy'
|
138
|
+
def reduce_all_lazy(aProduction, aRange, theTokens, theChildren)
|
139
|
+
return [ Regex::MatchOption.new(:ALL_LAZY, true) ]
|
140
|
+
end
|
141
|
+
|
142
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
|
143
|
+
def reduce_pinned_quantifiable(aProduction, aRange, theTokens, theChildren)
|
144
|
+
theChildren[1].begin_anchor = theChildren[0]
|
145
|
+
theChildren[1].end_anchor = theChildren[2]
|
146
|
+
return theChildren[1]
|
147
|
+
end
|
148
|
+
|
149
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable]
|
150
|
+
def reduce_begin_anchor_quantifiable(aProduction, aRange, theTokens, theChildren)
|
151
|
+
theChildren[1].begin_anchor = theChildren[0]
|
152
|
+
return theChildren[1]
|
153
|
+
end
|
154
|
+
|
155
|
+
# rule 'quantifiable' => %w[anchorable end_anchor]
|
156
|
+
def reduce_end_anchor_quantifiable(aProduction, aRange, theTokens, theChildren)
|
157
|
+
theChildren[0].end_anchor = theChildren[1]
|
158
|
+
return theChildren[0]
|
159
|
+
end
|
160
|
+
|
161
|
+
# rule 'begin_anchor' => %w[STARTS WITH]
|
162
|
+
def reduce_starts_with(aProduction, aRange, theTokens, theChildren)
|
163
|
+
begin_anchor
|
164
|
+
end
|
165
|
+
|
166
|
+
# rule 'begin_anchor' => %w[BEGIN WITH]
|
167
|
+
def reduce_begin_with(aProduction, aRange, theTokens, theChildren)
|
168
|
+
begin_anchor
|
169
|
+
end
|
170
|
+
|
171
|
+
# rule 'end_anchor' => %w[MUST END].as 'end_anchor'
|
172
|
+
def reduce_end_anchor(aProduction, aRange, theTokens, theChildren)
|
173
|
+
return Regex::Anchor.new('$')
|
174
|
+
end
|
175
|
+
|
176
|
+
# rule('anchorable' => %w[assertable assertion]).as 'asserted_anchorable'
|
177
|
+
def reduce_asserted_anchorable(aProduction, aRange, theTokens, theChildren)
|
178
|
+
assertion = theChildren.last
|
179
|
+
assertion.children.unshift(theChildren[0])
|
180
|
+
return assertion
|
181
|
+
end
|
182
|
+
|
183
|
+
# rule('assertion' => %w[IF FOLLOWED BY assertable]).as 'if_followed'
|
184
|
+
def reduce_if_followed(aProduction, aRange, theTokens, theChildren)
|
185
|
+
return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
|
186
|
+
end
|
187
|
+
|
188
|
+
# rule('assertion' => %w[IF NOT FOLLOWED BY assertable]).as 'if_not_followed'
|
189
|
+
def reduce_if_not_followed(aProduction, aRange, theTokens, theChildren)
|
190
|
+
return Regex::Lookaround.new(theChildren.last, :ahead, :negative)
|
191
|
+
end
|
192
|
+
|
193
|
+
# rule('assertion' => %w[IF ALREADY HAD assertable]).as 'if_had'
|
194
|
+
def reduce_if_had(aProduction, aRange, theTokens, theChildren)
|
195
|
+
return Regex::Lookaround.new(theChildren.last, :behind, :positive)
|
196
|
+
end
|
197
|
+
|
198
|
+
# rule('assertion' => %w[IF NOT ALREADY HAD assertable]).as 'if_not_had'
|
199
|
+
def reduce_if_not_had(aProduction, aRange, theTokens, theChildren)
|
200
|
+
return Regex::Lookaround.new(theChildren.last, :behind, :negative)
|
201
|
+
end
|
202
|
+
|
203
|
+
# rule('assertable' => %w[term quantifier]).as 'quantified_assertable'
|
204
|
+
def reduce_quantified_assertable(aProduction, aRange, theTokens, theChildren)
|
205
|
+
quantifier = theChildren[1]
|
206
|
+
term = theChildren[0]
|
207
|
+
repetition(term, quantifier)
|
208
|
+
end
|
209
|
+
|
210
|
+
# rule('letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'lowercase_from_to'
|
211
|
+
def reduce_lowercase_from_to(aProduction, aRange, theTokens, theChildren)
|
212
|
+
lower = theChildren[2].token.lexeme
|
213
|
+
upper = theChildren[4].token.lexeme
|
214
|
+
ch_range = char_range(lower, upper)
|
215
|
+
char_class(false, ch_range)
|
216
|
+
end
|
217
|
+
|
218
|
+
# rule('letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]).as 'uppercase_from_to'
|
219
|
+
def reduce_uppercase_from_to(aProduction, aRange, theTokens, theChildren)
|
220
|
+
lower = theChildren[3].token.lexeme
|
221
|
+
upper = theChildren[5].token.lexeme
|
222
|
+
ch_range = char_range(lower.upcase, upper.upcase)
|
223
|
+
char_class(false, ch_range)
|
224
|
+
end
|
225
|
+
|
226
|
+
# rule('letter_range' => 'LETTER').as 'any_lowercase'
|
227
|
+
def reduce_any_lowercase(aProduction, aRange, theTokens, theChildren)
|
228
|
+
ch_range = char_range('a', 'z')
|
229
|
+
char_class(false, ch_range)
|
230
|
+
end
|
231
|
+
|
232
|
+
# rule('letter_range' => %w[UPPERCASE LETTER]).as 'any_uppercase'
|
233
|
+
def reduce_any_uppercase(aProduction, aRange, theTokens, theChildren)
|
234
|
+
ch_range = char_range('A', 'Z')
|
235
|
+
char_class(false, ch_range)
|
236
|
+
end
|
237
|
+
|
238
|
+
# rule('digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]).as 'digits_from_to'
|
239
|
+
def reduce_digits_from_to(aProduction, aRange, theTokens, theChildren)
|
240
|
+
reduce_lowercase_from_to(aProduction, aRange, theTokens, theChildren)
|
241
|
+
end
|
242
|
+
|
243
|
+
# rule('digit_range' => 'digit_or_number').as 'simple_digit_range'
|
244
|
+
def reduce_simple_digit_range(aProduction, aRange, theTokens, theChildren)
|
245
|
+
char_shorthand('d')
|
246
|
+
end
|
247
|
+
|
248
|
+
# rule('character_class' => %w[ANY CHARACTER]).as 'any_character'
|
249
|
+
def reduce_any_character(aProduction, aRange, theTokens, theChildren)
|
250
|
+
char_shorthand('w')
|
251
|
+
end
|
252
|
+
|
253
|
+
# rule('character_class' => %w[NO CHARACTER]).as 'no_character'
|
254
|
+
def reduce_no_character(aProduction, aRange, theTokens, theChildren)
|
255
|
+
char_shorthand('W')
|
256
|
+
end
|
257
|
+
|
258
|
+
# rule('character_class' => 'WHITESPACE').as 'whitespace'
|
259
|
+
def reduce_whitespace(aProduction, aRange, theTokens, theChildren)
|
260
|
+
char_shorthand('s')
|
261
|
+
end
|
262
|
+
|
263
|
+
# rule('character_class' => %w[NO WHITESPACE]).as 'no_whitespace'
|
264
|
+
def reduce_no_whitespace(aProduction, aRange, theTokens, theChildren)
|
265
|
+
char_shorthand('S')
|
266
|
+
end
|
267
|
+
|
268
|
+
# rule('character_class' => 'ANYTHING').as 'anything'
|
269
|
+
def reduce_anything(aProduction, aRange, theTokens, theChildren)
|
270
|
+
wildcard
|
271
|
+
end
|
272
|
+
|
273
|
+
# rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
|
274
|
+
def reduce_one_of(aProduction, aRange, theTokens, theChildren)
|
275
|
+
raw_literal = theChildren[-1].token.lexeme.dup
|
276
|
+
alternatives = raw_literal.chars.map { |ch| Regex::Character.new(ch) }
|
277
|
+
return Regex::CharClass.new(false, *alternatives) # TODO check other implementations
|
278
|
+
end
|
279
|
+
|
280
|
+
# rule('special_char' => 'TAB').as 'tab'
|
281
|
+
def reduce_tab(aProduction, aRange, theTokens, theChildren)
|
282
|
+
Regex::Character.new('\t')
|
283
|
+
end
|
284
|
+
|
285
|
+
# rule('special_char' => 'BACKSLASH').as 'backslash'
|
286
|
+
def reduce_backslash(aProduction, aRange, theTokens, theChildren)
|
287
|
+
Regex::Character.new('\\')
|
288
|
+
end
|
289
|
+
|
290
|
+
# rule('special_char' => %w[NEW LINE]).as 'new_line'
|
291
|
+
def reduce_new_line(aProduction, aRange, theTokens, theChildren)
|
292
|
+
# TODO: control portability
|
293
|
+
Regex::Character.new('\n')
|
294
|
+
end
|
295
|
+
|
296
|
+
# rule('literal' => %w[LITERALLY STRING_LIT]).as 'literally'
|
297
|
+
def reduce_literally(aProduction, aRange, theTokens, theChildren)
|
298
|
+
# What if literal is empty?...
|
299
|
+
|
300
|
+
raw_literal = theChildren[-1].token.lexeme.dup
|
301
|
+
return string_literal(raw_literal)
|
302
|
+
end
|
303
|
+
|
304
|
+
#rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
|
305
|
+
def reduce_any_of(aProduction, aRange, theTokens, theChildren)
|
306
|
+
return Regex::Alternation.new(*theChildren[3])
|
307
|
+
end
|
308
|
+
|
309
|
+
# rule('alternatives' => %w[alternatives separator quantifiable]).as 'alternative_list'
|
310
|
+
def reduce_alternative_list(aProduction, aRange, theTokens, theChildren)
|
311
|
+
return theChildren[0] << theChildren[-1]
|
312
|
+
end
|
313
|
+
|
314
|
+
# rule('alternatives' => 'quantifiable').as 'simple_alternative'
|
315
|
+
def reduce_simple_alternative(aProduction, aRange, theTokens, theChildren)
|
316
|
+
return [theChildren.last]
|
317
|
+
end
|
318
|
+
|
319
|
+
# rule('grouping' => %w[LPAREN pattern RPAREN]).as 'grouping_parenthenses'
|
320
|
+
def reduce_grouping_parenthenses(aProduction, aRange, theTokens, theChildren)
|
321
|
+
return Regex::NonCapturingGroup.new(theChildren[1])
|
322
|
+
end
|
323
|
+
|
324
|
+
# rule('capturing_group' => %w[CAPTURE assertable]).as 'capture'
|
325
|
+
def reduce_capture(aProduction, aRange, theTokens, theChildren)
|
326
|
+
return Regex::CapturingGroup.new(theChildren[1])
|
327
|
+
end
|
328
|
+
|
329
|
+
# rule('capturing_group' => %w[CAPTURE assertable UNTIL assertable]).as 'capture_until'
|
330
|
+
def reduce_capture_until(aProduction, aRange, theTokens, theChildren)
|
331
|
+
group = Regex::CapturingGroup.new(theChildren[1])
|
332
|
+
return Regex::Concatenation.new(group, theChildren[3])
|
333
|
+
end
|
334
|
+
|
335
|
+
# rule('capturing_group' => %w[CAPTURE assertable AS var_name]).as 'named_capture'
|
336
|
+
def reduce_named_capture(aProduction, aRange, theTokens, theChildren)
|
337
|
+
name = theChildren[3].token.lexeme.dup
|
338
|
+
return Regex::CapturingGroup.new(theChildren[1], name)
|
339
|
+
end
|
340
|
+
|
341
|
+
# rule('capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]).as 'named_capture_until'
|
342
|
+
def reduce_named_capture_until(aProduction, aRange, theTokens, theChildren)
|
343
|
+
name = theChildren[3].token.lexeme.dup
|
344
|
+
group = Regex::CapturingGroup.new(theChildren[1], name)
|
345
|
+
return Regex::Concatenation.new(group, theChildren[5])
|
346
|
+
end
|
347
|
+
|
348
|
+
# rule('quantifier' => 'ONCE').as 'once'
|
349
|
+
def reduce_once(aProduction, aRange, theTokens, theChildren)
|
350
|
+
multiplicity(1, 1)
|
351
|
+
end
|
352
|
+
|
353
|
+
# rule('quantifier' => 'TWICE').as 'twice'
|
354
|
+
def reduce_twice(aProduction, aRange, theTokens, theChildren)
|
355
|
+
multiplicity(2, 2)
|
356
|
+
end
|
357
|
+
|
358
|
+
# rule('quantifier' => %w[EXACTLY count TIMES]).as 'exactly'
|
359
|
+
def reduce_exactly(aProduction, aRange, theTokens, theChildren)
|
360
|
+
count = theChildren[1].token.lexeme.to_i
|
361
|
+
multiplicity(count, count)
|
362
|
+
end
|
363
|
+
|
364
|
+
# rule('quantifier' => %w[BETWEEN count AND count times_suffix]).as 'between_and'
|
365
|
+
def reduce_between_and(aProduction, aRange, theTokens, theChildren)
|
366
|
+
lower = theChildren[1].token.lexeme.to_i
|
367
|
+
upper = theChildren[3].token.lexeme.to_i
|
368
|
+
multiplicity(lower, upper)
|
369
|
+
end
|
370
|
+
|
371
|
+
# rule('quantifier' => 'OPTIONAL').as 'optional'
|
372
|
+
def reduce_optional(aProduction, aRange, theTokens, theChildren)
|
373
|
+
multiplicity(0, 1)
|
374
|
+
end
|
375
|
+
|
376
|
+
# rule('quantifier' => %w[ONCE OR MORE]).as 'once_or_more'
|
377
|
+
def reduce_once_or_more(aProduction, aRange, theTokens, theChildren)
|
378
|
+
multiplicity(1, :more)
|
379
|
+
end
|
380
|
+
|
381
|
+
# rule('quantifier' => %w[NEVER OR MORE]).as 'never_or_more'
|
382
|
+
def reduce_never_or_more(aProduction, aRange, theTokens, theChildren)
|
383
|
+
multiplicity(0, :more)
|
384
|
+
end
|
385
|
+
|
386
|
+
# rule('quantifier' => %w[AT LEAST count TIMES]).as 'at_least'
|
387
|
+
def reduce_at_least(aProduction, aRange, theTokens, theChildren)
|
388
|
+
count = theChildren[2].token.lexeme.to_i
|
389
|
+
multiplicity(count, :more)
|
390
|
+
end
|
391
|
+
|
392
|
+
# rule('times_suffix' => 'TIMES').as 'times_keyword'
|
393
|
+
def reduce_times_keyword(aProduction, aRange, theTokens, theChildren)
|
394
|
+
return nil
|
395
|
+
end
|
396
|
+
|
397
|
+
# rule('times_suffix' => []).as 'times_dropped'
|
398
|
+
def reduce_times_dropped(aProduction, aRange, theTokens, theChildren)
|
399
|
+
return nil
|
400
|
+
end
|
401
|
+
|
402
|
+
end # class
|
403
|
+
# End of file
|