rley 0.7.06 → 0.8.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -6
- data/CHANGELOG.md +20 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +13 -13
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +7 -6
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +12 -12
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/base/dotted_item.rb +28 -31
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +22 -25
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/notation/all_notation_nodes.rb +2 -0
- data/lib/rley/notation/ast_builder.rb +191 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +113 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +504 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +192 -0
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +118 -26
- data/lib/rley/parser/gfg_parsing.rb +22 -33
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +19 -16
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grm_symbol.rb +7 -7
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +16 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +23 -21
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +184 -0
- data/spec/rley/notation/tokenizer_spec.rb +370 -0
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +24 -26
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/dangling_else_spec.rb +445 -0
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
- data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
- data/spec/rley/support/grammar_abc_helper.rb +3 -5
- data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +14 -17
- data/spec/rley/support/grammar_pb_helper.rb +8 -7
- data/spec/rley/support/grammar_sppf_helper.rb +3 -3
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +17 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +48 -74
- data/.simplecov +0 -7
- data/lib/rley/parser/parse_state.rb +0 -83
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -101
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,184 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper' # Use the RSpec framework
|
4
|
+
|
5
|
+
require_relative '../../../lib/rley/notation/ast_builder'
|
6
|
+
# Load the class under test
|
7
|
+
require_relative '../../../lib/rley/notation/parser'
|
8
|
+
|
9
|
+
module Rley
|
10
|
+
module Notation
|
11
|
+
describe Parser do
|
12
|
+
subject { Parser.new }
|
13
|
+
|
14
|
+
# Utility method to walk towards deeply nested node
|
15
|
+
# @param aNTNode [Rley::PTree::NonTerminalNode]
|
16
|
+
# @param subnodePath[Array<Integer>] An Array of subnode indices
|
17
|
+
def walk_subnodes(aNTNode, subnodePath)
|
18
|
+
curr_node = aNTNode
|
19
|
+
subnodePath.each do |index|
|
20
|
+
curr_node = curr_node.subnodes[index]
|
21
|
+
end
|
22
|
+
|
23
|
+
curr_node
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'Initialization:' do
|
27
|
+
it 'should be initialized without argument' do
|
28
|
+
expect { Parser.new }.not_to raise_error
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should have its parse engine initialized' do
|
32
|
+
expect(subject.engine).to be_kind_of(Rley::Engine)
|
33
|
+
end
|
34
|
+
end # context
|
35
|
+
|
36
|
+
context 'Parsing into CST:' do
|
37
|
+
subject do
|
38
|
+
instance = Parser.new
|
39
|
+
instance.engine.configuration.repr_builder = Rley::ParseRep::CSTBuilder
|
40
|
+
|
41
|
+
instance
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should parse single symbol names' do
|
45
|
+
samples = %w[IF ifCondition statement]
|
46
|
+
|
47
|
+
# One drawback od CSTs: they have a deeply nested structure
|
48
|
+
samples.each do |source|
|
49
|
+
ptree = subject.parse(source)
|
50
|
+
expect(ptree.root).to be_kind_of(Rley::PTree::NonTerminalNode)
|
51
|
+
expect(ptree.root.symbol.name).to eq('notation')
|
52
|
+
expect(ptree.root.subnodes[0]).to be_kind_of(Rley::PTree::NonTerminalNode)
|
53
|
+
expect(ptree.root.subnodes[0].symbol.name).to eq('rhs')
|
54
|
+
expect(ptree.root.subnodes[0].subnodes[0]).to be_kind_of(Rley::PTree::NonTerminalNode)
|
55
|
+
member_seq = ptree.root.subnodes[0].subnodes[0]
|
56
|
+
expect(member_seq.symbol.name).to eq('member_seq')
|
57
|
+
expect(member_seq.subnodes[0]).to be_kind_of(Rley::PTree::NonTerminalNode)
|
58
|
+
expect(member_seq.subnodes[0].symbol.name).to eq('member')
|
59
|
+
expect(member_seq.subnodes[0].subnodes[0]).to be_kind_of(Rley::PTree::NonTerminalNode)
|
60
|
+
expect(member_seq.subnodes[0].subnodes[0].symbol.name).to eq('strait_member')
|
61
|
+
strait_member = member_seq.subnodes[0].subnodes[0]
|
62
|
+
expect(strait_member.subnodes[0]).to be_kind_of(Rley::PTree::NonTerminalNode)
|
63
|
+
expect(strait_member.subnodes[0].symbol.name).to eq('base_member')
|
64
|
+
expect(strait_member.subnodes[0].subnodes[0]).to be_kind_of(Rley::PTree::TerminalNode)
|
65
|
+
expect(strait_member.subnodes[0].subnodes[0].token.lexeme).to eq(source)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end # context
|
70
|
+
|
71
|
+
context 'Parsing into AST:' do
|
72
|
+
subject do
|
73
|
+
instance = Parser.new
|
74
|
+
instance.engine.configuration.repr_builder = ASTBuilder
|
75
|
+
|
76
|
+
instance
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'should parse single symbol names' do
|
80
|
+
samples = %w[IF ifCondition statement]
|
81
|
+
|
82
|
+
samples.each do |source|
|
83
|
+
ptree = subject.parse(source)
|
84
|
+
expect(ptree.root).to be_kind_of(SymbolNode)
|
85
|
+
expect(ptree.root.name).to eq(source)
|
86
|
+
expect(ptree.root.repetition).to eq(:exactly_one)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'should parse a sequence of symbols' do
|
91
|
+
sequence = 'INT_LIT ELLIPSIS INT_LIT'
|
92
|
+
|
93
|
+
ptree = subject.parse(sequence)
|
94
|
+
expect(ptree.root).to be_kind_of(SequenceNode)
|
95
|
+
expect(ptree.root.subnodes[0]).to be_kind_of(SymbolNode)
|
96
|
+
expect(ptree.root.subnodes[0].name).to eq('INT_LIT')
|
97
|
+
expect(ptree.root.subnodes[1]).to be_kind_of(SymbolNode)
|
98
|
+
expect(ptree.root.subnodes[1].name).to eq('ELLIPSIS')
|
99
|
+
expect(ptree.root.subnodes[2]).to be_kind_of(SymbolNode)
|
100
|
+
expect(ptree.root.subnodes[2].name).to eq('INT_LIT')
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'should parse an optional symbol' do
|
104
|
+
optional = 'member_seq?'
|
105
|
+
|
106
|
+
ptree = subject.parse(optional)
|
107
|
+
expect(ptree.root).to be_kind_of(SymbolNode)
|
108
|
+
expect(ptree.root.name).to eq('member_seq')
|
109
|
+
expect(ptree.root.repetition).to eq(:zero_or_one)
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'should parse a symbol with a + modifier' do
|
113
|
+
one_or_more = 'member+'
|
114
|
+
|
115
|
+
ptree = subject.parse(one_or_more)
|
116
|
+
expect(ptree.root).to be_kind_of(SymbolNode)
|
117
|
+
expect(ptree.root.name).to eq('member')
|
118
|
+
expect(ptree.root.repetition).to eq(:one_or_more)
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'should parse a symbol with a * modifier' do
|
122
|
+
zero_or_more = 'declaration* EOF'
|
123
|
+
|
124
|
+
ptree = subject.parse(zero_or_more)
|
125
|
+
expect(ptree.root).to be_kind_of(SequenceNode)
|
126
|
+
expect(ptree.root.subnodes[0]).to be_kind_of(SymbolNode)
|
127
|
+
expect(ptree.root.subnodes[0].name).to eq('declaration')
|
128
|
+
expect(ptree.root.subnodes[0].repetition).to eq(:zero_or_more)
|
129
|
+
expect(ptree.root.subnodes[1]).to be_kind_of(SymbolNode)
|
130
|
+
expect(ptree.root.subnodes[1].name).to eq('EOF')
|
131
|
+
expect(ptree.root.subnodes[1].repetition).to eq(:exactly_one)
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'should parse a grouping with a modifier' do
|
135
|
+
input = "IF ifCondition statement (ELSE statement)?"
|
136
|
+
|
137
|
+
ptree = subject.parse(input)
|
138
|
+
expect(ptree.root).to be_kind_of(SequenceNode)
|
139
|
+
expect(ptree.root.subnodes[0]).to be_kind_of(SymbolNode)
|
140
|
+
expect(ptree.root.subnodes[0].name).to eq('IF')
|
141
|
+
expect(ptree.root.subnodes[1]).to be_kind_of(SymbolNode)
|
142
|
+
expect(ptree.root.subnodes[1].name).to eq('ifCondition')
|
143
|
+
expect(ptree.root.subnodes[2]).to be_kind_of(SymbolNode)
|
144
|
+
expect(ptree.root.subnodes[2].name).to eq('statement')
|
145
|
+
expect(ptree.root.subnodes[3]).to be_kind_of(SequenceNode)
|
146
|
+
expect(ptree.root.subnodes[3].repetition).to eq(:zero_or_one)
|
147
|
+
expect(ptree.root.subnodes[3].subnodes[0]).to be_kind_of(SymbolNode)
|
148
|
+
expect(ptree.root.subnodes[3].subnodes[0].name).to eq('ELSE')
|
149
|
+
expect(ptree.root.subnodes[3].subnodes[1]).to be_kind_of(SymbolNode)
|
150
|
+
expect(ptree.root.subnodes[3].subnodes[1].name).to eq('statement')
|
151
|
+
end
|
152
|
+
|
153
|
+
it 'should parse an annotated symbol' do
|
154
|
+
optional = 'member_seq{repeat: 0..1}'
|
155
|
+
|
156
|
+
ptree = subject.parse(optional)
|
157
|
+
expect(ptree.root).to be_kind_of(SymbolNode)
|
158
|
+
expect(ptree.root.name).to eq('member_seq')
|
159
|
+
expect(ptree.root.repetition).to eq(:zero_or_one)
|
160
|
+
end
|
161
|
+
|
162
|
+
it 'should parse a grouping with embedded annotation' do
|
163
|
+
if_stmt = "IF ifCondition statement ( ELSE { match_closest: 'IF' } statement )?"
|
164
|
+
|
165
|
+
ptree = subject.parse(if_stmt)
|
166
|
+
expect(ptree.root).to be_kind_of(SequenceNode)
|
167
|
+
expect(ptree.root.subnodes[0]).to be_kind_of(SymbolNode)
|
168
|
+
expect(ptree.root.subnodes[0].name).to eq('IF')
|
169
|
+
expect(ptree.root.subnodes[1]).to be_kind_of(SymbolNode)
|
170
|
+
expect(ptree.root.subnodes[1].name).to eq('ifCondition')
|
171
|
+
expect(ptree.root.subnodes[2]).to be_kind_of(SymbolNode)
|
172
|
+
expect(ptree.root.subnodes[2].name).to eq('statement')
|
173
|
+
optional = ptree.root.subnodes[3]
|
174
|
+
expect(optional).to be_kind_of(SequenceNode)
|
175
|
+
expect(optional.repetition).to eq(:zero_or_one)
|
176
|
+
expect(optional.subnodes[0]).to be_kind_of(SymbolNode)
|
177
|
+
expect(optional.subnodes[0].name).to eq('ELSE')
|
178
|
+
expect(optional.subnodes[0].annotation).to eq({'match_closest' => 'IF'})
|
179
|
+
expect(optional.subnodes[1].name).to eq('statement')
|
180
|
+
end
|
181
|
+
end # context
|
182
|
+
end # describe
|
183
|
+
end # module
|
184
|
+
end # module
|
@@ -0,0 +1,370 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
|
5
|
+
# Load the class under test
|
6
|
+
require_relative '../../../lib/rley/notation/tokenizer'
|
7
|
+
|
8
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
9
|
+
module Notation # Open this namespace to avoid module qualifier prefixes
|
10
|
+
describe Tokenizer do
|
11
|
+
# Utility method for comparing actual and expected token
|
12
|
+
# sequence. The final EOF is removed from the input sequence.
|
13
|
+
def match_expectations(aScanner, theExpectations)
|
14
|
+
tokens = aScanner.tokens
|
15
|
+
|
16
|
+
tokens.each_with_index do |token, i|
|
17
|
+
terminal, lexeme = theExpectations[i]
|
18
|
+
expect(token.terminal).to eq(terminal)
|
19
|
+
expect(token.lexeme).to eq(lexeme)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'Initialization:' do
|
24
|
+
let(:sample_text) { 'begin-object member-list end-object' }
|
25
|
+
subject { Tokenizer.new }
|
26
|
+
|
27
|
+
it 'could be initialized with a text to tokenize or...' do
|
28
|
+
expect { Tokenizer.new(sample_text) }.not_to raise_error
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'could be initialized without argument...' do
|
32
|
+
expect { Tokenizer.new }.not_to raise_error
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should have its scanner initialized' do
|
36
|
+
expect(subject.scanner).to be_kind_of(StringScanner)
|
37
|
+
end
|
38
|
+
end # context
|
39
|
+
|
40
|
+
context 'Input tokenization:' do
|
41
|
+
it 'should recognize single special character token' do
|
42
|
+
input = '(){}?*+,'
|
43
|
+
subject.start_with(input)
|
44
|
+
expectations = [
|
45
|
+
# [token lexeme]
|
46
|
+
%w[LEFT_PAREN (],
|
47
|
+
%w[RIGHT_PAREN )],
|
48
|
+
%w[LEFT_BRACE {],
|
49
|
+
%w[RIGHT_BRACE }],
|
50
|
+
%w[QUESTION_MARK ?],
|
51
|
+
%w[STAR *],
|
52
|
+
%w[PLUS +],
|
53
|
+
%w[COMMA ,]
|
54
|
+
]
|
55
|
+
match_expectations(subject, expectations)
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'should recognize one or two special character tokens' do
|
59
|
+
input = '..'
|
60
|
+
subject.start_with(input)
|
61
|
+
expectations = [
|
62
|
+
# [token lexeme]
|
63
|
+
%w[ELLIPSIS ..]
|
64
|
+
]
|
65
|
+
match_expectations(subject, expectations)
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should treat ? * + as symbols if they occur as suffix' do
|
69
|
+
input = 'a+ + b* * 3 ?'
|
70
|
+
subject.start_with(input)
|
71
|
+
expectations = [
|
72
|
+
# [token lexeme]
|
73
|
+
%w[SYMBOL a],
|
74
|
+
%w[PLUS +],
|
75
|
+
%w[SYMBOL +],
|
76
|
+
%w[SYMBOL b],
|
77
|
+
%w[STAR *],
|
78
|
+
%w[SYMBOL *],
|
79
|
+
%w[INT_LIT 3],
|
80
|
+
%w[SYMBOL ?]
|
81
|
+
]
|
82
|
+
match_expectations(subject, expectations)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should recognize annotation keywords' do
|
86
|
+
keywords = 'match_closest: repeat:'
|
87
|
+
subject.start_with(keywords)
|
88
|
+
expectations = [
|
89
|
+
# [token lexeme]
|
90
|
+
%w[KEY match_closest],
|
91
|
+
%w[KEY repeat]
|
92
|
+
]
|
93
|
+
match_expectations(subject, expectations)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should recognize ordinal integer values' do
|
97
|
+
input = <<-RLEY_END
|
98
|
+
3 123
|
99
|
+
987654 0
|
100
|
+
RLEY_END
|
101
|
+
|
102
|
+
expectations = [
|
103
|
+
['3', 3],
|
104
|
+
['123', 123],
|
105
|
+
['987654', 987654],
|
106
|
+
['0', 0]
|
107
|
+
]
|
108
|
+
|
109
|
+
subject.start_with(input)
|
110
|
+
subject.tokens[0..-2].each_with_index do |tok, i|
|
111
|
+
expect(tok).to be_kind_of(Rley::Lexical::Token)
|
112
|
+
expect(tok.terminal).to eq('INT_LIT')
|
113
|
+
(lexeme, val) = expectations[i]
|
114
|
+
expect(tok.lexeme).to eq(lexeme)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'should recognize string literals' do
|
119
|
+
input = <<-RLEY_END
|
120
|
+
""
|
121
|
+
"string"
|
122
|
+
"123"
|
123
|
+
''
|
124
|
+
'string'
|
125
|
+
'123'
|
126
|
+
RLEY_END
|
127
|
+
|
128
|
+
expectations = [
|
129
|
+
'',
|
130
|
+
'string',
|
131
|
+
'123'
|
132
|
+
] * 2
|
133
|
+
|
134
|
+
subject.start_with(input)
|
135
|
+
subject.tokens.each_with_index do |str, i|
|
136
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
137
|
+
expect(str.terminal).to eq('STR_LIT')
|
138
|
+
(lexeme, val) = expectations[i]
|
139
|
+
expect(str.lexeme).to eq(lexeme)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
it 'should recognize a sequence of symbols' do
|
144
|
+
input = "IF ifCondition statement ELSE statement"
|
145
|
+
expectations = [
|
146
|
+
'IF',
|
147
|
+
'ifCondition',
|
148
|
+
'statement',
|
149
|
+
'ELSE',
|
150
|
+
'statement'
|
151
|
+
]
|
152
|
+
|
153
|
+
subject.start_with(input)
|
154
|
+
subject.tokens.each_with_index do |str, i|
|
155
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
156
|
+
expect(str.terminal).to eq('SYMBOL')
|
157
|
+
(lexeme, val) = expectations[i]
|
158
|
+
expect(str.lexeme).to eq(lexeme)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
it 'should recognize an optional symbol' do
|
163
|
+
input = "RETURN expression? SEMICOLON"
|
164
|
+
expectations = [
|
165
|
+
['RETURN', 'SYMBOL'],
|
166
|
+
['expression', 'SYMBOL'],
|
167
|
+
['?', 'QUESTION_MARK'],
|
168
|
+
['SEMICOLON', 'SYMBOL'],
|
169
|
+
]
|
170
|
+
|
171
|
+
subject.start_with(input)
|
172
|
+
subject.tokens.each_with_index do |str, i|
|
173
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
174
|
+
(lexeme, token) = expectations[i]
|
175
|
+
expect(str.lexeme).to eq(lexeme)
|
176
|
+
expect(str.terminal).to eq(token)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
it 'should recognize a symbol with a star quantifier' do
|
181
|
+
input = "declaration* EOF"
|
182
|
+
expectations = [
|
183
|
+
['declaration', 'SYMBOL'],
|
184
|
+
['*', 'STAR'],
|
185
|
+
['EOF', 'SYMBOL'],
|
186
|
+
]
|
187
|
+
|
188
|
+
subject.start_with(input)
|
189
|
+
subject.tokens.each_with_index do |str, i|
|
190
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
191
|
+
(lexeme, token) = expectations[i]
|
192
|
+
expect(str.lexeme).to eq(lexeme)
|
193
|
+
expect(str.terminal).to eq(token)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
it 'should recognize a symbol with a plus quantifier' do
|
198
|
+
input = "declaration+ EOF"
|
199
|
+
expectations = [
|
200
|
+
['declaration', 'SYMBOL'],
|
201
|
+
['+', 'PLUS'],
|
202
|
+
['EOF', 'SYMBOL'],
|
203
|
+
]
|
204
|
+
|
205
|
+
subject.start_with(input)
|
206
|
+
subject.tokens.each_with_index do |str, i|
|
207
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
208
|
+
(lexeme, token) = expectations[i]
|
209
|
+
expect(str.lexeme).to eq(lexeme)
|
210
|
+
expect(str.terminal).to eq(token)
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
it 'should recognize a grouping with a quantifier' do
|
215
|
+
input = "IF ifCondition statement (ELSE statement)?"
|
216
|
+
expectations = [
|
217
|
+
['IF', 'SYMBOL'],
|
218
|
+
['ifCondition', 'SYMBOL'],
|
219
|
+
['statement', 'SYMBOL'],
|
220
|
+
['(', 'LEFT_PAREN'],
|
221
|
+
['ELSE', 'SYMBOL'],
|
222
|
+
['statement', 'SYMBOL'],
|
223
|
+
[')', 'RIGHT_PAREN'],
|
224
|
+
['?', 'QUESTION_MARK']
|
225
|
+
]
|
226
|
+
|
227
|
+
subject.start_with(input)
|
228
|
+
subject.tokens.each_with_index do |str, i|
|
229
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
230
|
+
(lexeme, token) = expectations[i]
|
231
|
+
expect(str.lexeme).to eq(lexeme)
|
232
|
+
expect(str.terminal).to eq(token)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
it 'should recognize a match closest constraint' do
|
237
|
+
input = "IF ifCondition statement ELSE { match_closest: 'IF' } statement"
|
238
|
+
expectations = [
|
239
|
+
['IF', 'SYMBOL'],
|
240
|
+
['ifCondition', 'SYMBOL'],
|
241
|
+
['statement', 'SYMBOL'],
|
242
|
+
['ELSE', 'SYMBOL'],
|
243
|
+
['{', 'LEFT_BRACE'],
|
244
|
+
['match_closest', 'KEY'],
|
245
|
+
['IF', 'STR_LIT'],
|
246
|
+
['}', 'RIGHT_BRACE'],
|
247
|
+
['statement', 'SYMBOL']
|
248
|
+
]
|
249
|
+
|
250
|
+
subject.start_with(input)
|
251
|
+
subject.tokens.each_with_index do |str, i|
|
252
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
253
|
+
(lexeme, token) = expectations[i]
|
254
|
+
expect(str.lexeme).to eq(lexeme)
|
255
|
+
expect(str.terminal).to eq(token)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
it 'should recognize a repeat constraint' do
|
260
|
+
input = "IF ifCondition statement { repeat: 1 } ELSE statement"
|
261
|
+
expectations = [
|
262
|
+
['IF', 'SYMBOL'],
|
263
|
+
['ifCondition', 'SYMBOL'],
|
264
|
+
['statement', 'SYMBOL'],
|
265
|
+
['{', 'LEFT_BRACE'],
|
266
|
+
['repeat', 'KEY'],
|
267
|
+
['1', 'INT_LIT'],
|
268
|
+
['}', 'RIGHT_BRACE'],
|
269
|
+
['ELSE', 'SYMBOL'],
|
270
|
+
['statement', 'SYMBOL']
|
271
|
+
]
|
272
|
+
|
273
|
+
subject.start_with(input)
|
274
|
+
subject.tokens.each_with_index do |str, i|
|
275
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
276
|
+
(lexeme, token) = expectations[i]
|
277
|
+
expect(str.lexeme).to eq(lexeme)
|
278
|
+
expect(str.terminal).to eq(token)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
it 'should recognize a grouping with a repeat constraint' do
|
283
|
+
input = "IF ifCondition statement ( ELSE statement ){ repeat: 0..1 }"
|
284
|
+
expectations = [
|
285
|
+
['IF', 'SYMBOL'],
|
286
|
+
['ifCondition', 'SYMBOL'],
|
287
|
+
['statement', 'SYMBOL'],
|
288
|
+
['(', 'LEFT_PAREN'],
|
289
|
+
['ELSE', 'SYMBOL'],
|
290
|
+
['statement', 'SYMBOL'],
|
291
|
+
[')', 'RIGHT_PAREN'],
|
292
|
+
['{', 'LEFT_BRACE'],
|
293
|
+
['repeat', 'KEY'],
|
294
|
+
['0', 'INT_LIT'],
|
295
|
+
['..', 'ELLIPSIS'],
|
296
|
+
['1', 'INT_LIT'],
|
297
|
+
['}', 'RIGHT_BRACE']
|
298
|
+
]
|
299
|
+
|
300
|
+
subject.start_with(input)
|
301
|
+
subject.tokens.each_with_index do |str, i|
|
302
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
303
|
+
(lexeme, token) = expectations[i]
|
304
|
+
expect(str.lexeme).to eq(lexeme)
|
305
|
+
expect(str.terminal).to eq(token)
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
it 'should recognize a combination of constraints' do
|
310
|
+
input = "IF ifCondition statement ELSE { repeat: 1, match_closest: 'IF' } statement"
|
311
|
+
expectations = [
|
312
|
+
['IF', 'SYMBOL'],
|
313
|
+
['ifCondition', 'SYMBOL'],
|
314
|
+
['statement', 'SYMBOL'],
|
315
|
+
['ELSE', 'SYMBOL'],
|
316
|
+
['{', 'LEFT_BRACE'],
|
317
|
+
['repeat', 'KEY'],
|
318
|
+
['1', 'INT_LIT'],
|
319
|
+
[',', 'COMMA'],
|
320
|
+
['match_closest', 'KEY'],
|
321
|
+
['IF', 'STR_LIT'],
|
322
|
+
['}', 'RIGHT_BRACE'],
|
323
|
+
['statement', 'SYMBOL']
|
324
|
+
]
|
325
|
+
|
326
|
+
subject.start_with(input)
|
327
|
+
subject.tokens.each_with_index do |str, i|
|
328
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
329
|
+
(lexeme, token) = expectations[i]
|
330
|
+
expect(str.lexeme).to eq(lexeme)
|
331
|
+
expect(str.terminal).to eq(token)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
it 'should recognize a grouping with a nested constraint' do
|
336
|
+
input = "IF ifCondition statement ( ELSE { match_closest: 'IF' } statement ){ repeat: 0..1 }"
|
337
|
+
expectations = [
|
338
|
+
['IF', 'SYMBOL'],
|
339
|
+
['ifCondition', 'SYMBOL'],
|
340
|
+
['statement', 'SYMBOL'],
|
341
|
+
['(', 'LEFT_PAREN'],
|
342
|
+
['ELSE', 'SYMBOL'],
|
343
|
+
['{', 'LEFT_BRACE'],
|
344
|
+
['match_closest', 'KEY'],
|
345
|
+
['IF', 'STR_LIT'],
|
346
|
+
['}', 'RIGHT_BRACE'],
|
347
|
+
['statement', 'SYMBOL'],
|
348
|
+
[')', 'RIGHT_PAREN'],
|
349
|
+
['{', 'LEFT_BRACE'],
|
350
|
+
['repeat', 'KEY'],
|
351
|
+
['0', 'INT_LIT'],
|
352
|
+
['..', 'ELLIPSIS'],
|
353
|
+
['1', 'INT_LIT'],
|
354
|
+
['}', 'RIGHT_BRACE']
|
355
|
+
]
|
356
|
+
|
357
|
+
subject.start_with(input)
|
358
|
+
subject.tokens.each_with_index do |str, i|
|
359
|
+
expect(str).to be_kind_of(Rley::Lexical::Token)
|
360
|
+
(lexeme, token) = expectations[i]
|
361
|
+
expect(str.lexeme).to eq(lexeme)
|
362
|
+
expect(str.terminal).to eq(token)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end # context
|
366
|
+
end # describe
|
367
|
+
end # module
|
368
|
+
end # module
|
369
|
+
|
370
|
+
# End of file
|