rley 0.7.06 → 0.8.01
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -6
- data/CHANGELOG.md +20 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +13 -13
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +7 -6
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +12 -12
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/base/dotted_item.rb +28 -31
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +22 -25
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/notation/all_notation_nodes.rb +2 -0
- data/lib/rley/notation/ast_builder.rb +191 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +113 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +504 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +192 -0
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +118 -26
- data/lib/rley/parser/gfg_parsing.rb +22 -33
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +19 -16
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grm_symbol.rb +7 -7
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +16 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +23 -21
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +184 -0
- data/spec/rley/notation/tokenizer_spec.rb +370 -0
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +24 -26
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/dangling_else_spec.rb +445 -0
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
- data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
- data/spec/rley/support/grammar_abc_helper.rb +3 -5
- data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +14 -17
- data/spec/rley/support/grammar_pb_helper.rb +8 -7
- data/spec/rley/support/grammar_sppf_helper.rb +3 -3
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +17 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +48 -74
- data/.simplecov +0 -7
- data/lib/rley/parser/parse_state.rb +0 -83
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -101
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'sequence_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node representing an expression bracketed by parentheses.
|
8
|
+
class GroupingNode < SequenceNode
|
9
|
+
# @param aPosition [Rley::Lexical::Position] Start position.
|
10
|
+
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
11
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
12
|
+
def initialize(aPosition, sequence, theRepetition = nil)
|
13
|
+
super(aPosition, sequence, theRepetition)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
17
|
+
# @param visitor [Notation::ASTVisitor] the visitor
|
18
|
+
def accept(visitor)
|
19
|
+
visitor.visit_grouping_node(self)
|
20
|
+
end
|
21
|
+
end # class
|
22
|
+
end # module
|
23
|
+
end # module
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
require_relative 'ast_builder'
|
6
|
+
|
7
|
+
module Rley
|
8
|
+
module Notation
|
9
|
+
# A Lox parser that produce concrete parse trees.
|
10
|
+
# Concrete parse trees are the default kind of parse tree
|
11
|
+
# generated by the Rley library.
|
12
|
+
# They consist of two node types only:
|
13
|
+
# - NonTerminalNode
|
14
|
+
# - TerminalNode
|
15
|
+
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
+
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
+
# While concrete parse tree nodes can be generated out of the box,
|
18
|
+
# they have the following drawbacks:
|
19
|
+
# - Generic node classes that aren't always suited for the needs of
|
20
|
+
# the language being processing.
|
21
|
+
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
+
# further processing.
|
23
|
+
class Parser
|
24
|
+
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
+
attr_reader(:engine)
|
26
|
+
|
27
|
+
def initialize
|
28
|
+
# Create a Rley facade object
|
29
|
+
@engine = Rley::Engine.new do |cfg|
|
30
|
+
cfg.diagnose = true
|
31
|
+
cfg.repr_builder = Notation::ASTBuilder
|
32
|
+
end
|
33
|
+
|
34
|
+
# Step 1. Load RGN grammar
|
35
|
+
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Parse the given Lox program into a parse tree.
|
39
|
+
# @param source [String] Lox program to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
+
def parse(source)
|
42
|
+
lexer = Tokenizer.new(source)
|
43
|
+
result = engine.parse(lexer.tokens)
|
44
|
+
|
45
|
+
unless result.success?
|
46
|
+
# Stop if the parse failed...
|
47
|
+
line1 = "Parsing failed\n"
|
48
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
+
raise SyntaxError, line1 + line2
|
50
|
+
end
|
51
|
+
|
52
|
+
return engine.convert(result) # engine.to_ptree(result)
|
53
|
+
end
|
54
|
+
end # class
|
55
|
+
end # module
|
56
|
+
end # module
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node for a sequence of AST nodes
|
8
|
+
class SequenceNode < ASTNode
|
9
|
+
# @return [Array<ASTNode>]
|
10
|
+
attr_reader :subnodes
|
11
|
+
|
12
|
+
attr_accessor :constraints
|
13
|
+
|
14
|
+
# @param aPosition [Rley::Lexical::Position] Start position.
|
15
|
+
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
16
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
17
|
+
def initialize(aPosition, sequence, theRepetition = nil)
|
18
|
+
super(aPosition)
|
19
|
+
@subnodes = sequence
|
20
|
+
repetition=(theRepetition) if theRepetition
|
21
|
+
@constraints = []
|
22
|
+
end
|
23
|
+
|
24
|
+
def size
|
25
|
+
subnodes.size
|
26
|
+
end
|
27
|
+
|
28
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
29
|
+
# @param visitor [Notation::ASTVisitor] the visitor
|
30
|
+
def accept(visitor)
|
31
|
+
visitor.visit_sequence_node(self)
|
32
|
+
end
|
33
|
+
end # class
|
34
|
+
end # module
|
35
|
+
end # module
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node for a grammar symbol occurring in rhs of a rule
|
8
|
+
class SymbolNode < ASTNode
|
9
|
+
# @return [String] name of grammar symbol
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
|
+
# @param aName [String] name of grammar symbol
|
14
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
15
|
+
def initialize(aPosition, aName, theRepetition = nil)
|
16
|
+
super(aPosition)
|
17
|
+
@name = aName
|
18
|
+
repetition=(theRepetition) if theRepetition
|
19
|
+
end
|
20
|
+
|
21
|
+
# Abstract method (must be overriden in subclasses).
|
22
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
23
|
+
# @param _visitor [LoxxyTreeVisitor] the visitor
|
24
|
+
def accept(visitor)
|
25
|
+
visitor.visit_symbol_node(self)
|
26
|
+
end
|
27
|
+
end # class
|
28
|
+
end # module
|
29
|
+
end # module
|
@@ -0,0 +1,192 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'strscan'
|
4
|
+
require_relative '../lexical/token'
|
5
|
+
|
6
|
+
module Rley
|
7
|
+
module Notation
|
8
|
+
# A tokenizer for the Rley notation language.
|
9
|
+
# Responsibility: break input into a sequence of token objects.
|
10
|
+
# The tokenizer should recognize:
|
11
|
+
# Identifiers,
|
12
|
+
# Number literals including single digit
|
13
|
+
# String literals (quote delimited)
|
14
|
+
# Delimiters: e.g. parentheses '(', ')'
|
15
|
+
# Separators: e.g. comma
|
16
|
+
class Tokenizer
|
17
|
+
# @return [StringScanner] Low-level input scanner
|
18
|
+
attr_reader(:scanner)
|
19
|
+
|
20
|
+
# @return [Integer] The current line number
|
21
|
+
attr_reader(:lineno)
|
22
|
+
|
23
|
+
# @return [Integer] Position of last start of line in the input
|
24
|
+
attr_reader(:line_start)
|
25
|
+
|
26
|
+
# One or two special character tokens.
|
27
|
+
@@lexeme2name = {
|
28
|
+
'(' => 'LEFT_PAREN',
|
29
|
+
')' => 'RIGHT_PAREN',
|
30
|
+
'{' => 'LEFT_BRACE',
|
31
|
+
'}' => 'RIGHT_BRACE',
|
32
|
+
',' => 'COMMA',
|
33
|
+
'+' => 'PLUS',
|
34
|
+
'?' => 'QUESTION_MARK',
|
35
|
+
'*' => 'STAR',
|
36
|
+
'..' => 'ELLIPSIS'
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
# Here are all the implemented Rley notation keywords
|
40
|
+
@@keywords = %w[
|
41
|
+
match_closest repeat
|
42
|
+
].map { |x| [x, x] }.to_h
|
43
|
+
|
44
|
+
# Constructor. Initialize a tokenizer for Lox input.
|
45
|
+
# @param source [String] Lox text to tokenize.
|
46
|
+
def initialize(source = nil)
|
47
|
+
@scanner = StringScanner.new('')
|
48
|
+
start_with(source) if source
|
49
|
+
end
|
50
|
+
|
51
|
+
# Reset the tokenizer and make the given text, the current input.
|
52
|
+
# @param source [String] Lox text to tokenize.
|
53
|
+
def start_with(source)
|
54
|
+
@scanner.string = source
|
55
|
+
@lineno = 1
|
56
|
+
@line_start = 0
|
57
|
+
end
|
58
|
+
|
59
|
+
# Scan the source and return an array of tokens.
|
60
|
+
# @return [Array<Rley::Lexical::Token>] | Returns a sequence of tokens
|
61
|
+
def tokens
|
62
|
+
tok_sequence = []
|
63
|
+
until @scanner.eos?
|
64
|
+
token = _next_token
|
65
|
+
tok_sequence << token unless token.nil?
|
66
|
+
end
|
67
|
+
|
68
|
+
return tok_sequence
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def _next_token
|
74
|
+
pos_before = scanner.pos
|
75
|
+
skip_intertoken_spaces
|
76
|
+
ws_found = true if scanner.pos > pos_before
|
77
|
+
curr_ch = scanner.peek(1)
|
78
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
79
|
+
|
80
|
+
token = nil
|
81
|
+
|
82
|
+
if '(){},'.include? curr_ch
|
83
|
+
# Single delimiter, separator or character
|
84
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
|
+
elsif '?*+,'.include? curr_ch # modifier character
|
86
|
+
# modifiers without prefix text are symbols
|
87
|
+
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
|
+
token = build_token(symb, scanner.getch)
|
89
|
+
elsif (lexeme = scanner.scan(/\.\./))
|
90
|
+
# One or two special character tokens
|
91
|
+
token = build_token(@@lexeme2name[lexeme], lexeme)
|
92
|
+
elsif scanner.check(/"|'/) # Start of string detected...
|
93
|
+
token = build_string_token
|
94
|
+
elsif (lexeme = scanner.scan(/\d+/))
|
95
|
+
token = build_token('INT_LIT', lexeme)
|
96
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
|
97
|
+
keyw = @@keywords[lexeme.chop!]
|
98
|
+
token = build_token('KEY', lexeme) if keyw
|
99
|
+
# ... error case
|
100
|
+
elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
|
101
|
+
token = build_token('SYMBOL', lexeme)
|
102
|
+
else # Unknown token
|
103
|
+
col = scanner.pos - @line_start + 1
|
104
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
105
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
106
|
+
end
|
107
|
+
|
108
|
+
return token
|
109
|
+
end
|
110
|
+
|
111
|
+
def build_token(aSymbolName, aLexeme)
|
112
|
+
begin
|
113
|
+
lex_length = aLexeme ? aLexeme.size : 0
|
114
|
+
col = scanner.pos - lex_length - @line_start + 1
|
115
|
+
pos = Rley::Lexical::Position.new(@lineno, col)
|
116
|
+
token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
|
117
|
+
|
118
|
+
rescue StandardError => e
|
119
|
+
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
120
|
+
raise e
|
121
|
+
end
|
122
|
+
|
123
|
+
return token
|
124
|
+
end
|
125
|
+
|
126
|
+
# precondition: current position at leading quote
|
127
|
+
def build_string_token
|
128
|
+
delimiter = scanner.scan(/./)
|
129
|
+
scan_pos = scanner.pos
|
130
|
+
line = @lineno
|
131
|
+
column_start = scan_pos - @line_start
|
132
|
+
literal = +''
|
133
|
+
loop do
|
134
|
+
substr = scanner.scan(/[^"'\\\r\n]*/)
|
135
|
+
if scanner.eos?
|
136
|
+
pos_start = "line #{line}:#{column_start}"
|
137
|
+
raise ScanError, "Error: [#{pos_start}]: Unterminated string."
|
138
|
+
else
|
139
|
+
literal << substr
|
140
|
+
special = scanner.scan(/["'\\\r\n]/)
|
141
|
+
case special
|
142
|
+
when delimiter # Terminating quote found
|
143
|
+
break
|
144
|
+
when "\r"
|
145
|
+
next_line
|
146
|
+
special << scanner.scan(/./) if scanner.match?(/\n/)
|
147
|
+
literal << special
|
148
|
+
when "\n"
|
149
|
+
next_line
|
150
|
+
literal << special
|
151
|
+
# when '\\'
|
152
|
+
# ch = scanner.scan(/./)
|
153
|
+
# next unless ch
|
154
|
+
|
155
|
+
# escaped = @@escape_chars[ch]
|
156
|
+
# if escaped
|
157
|
+
# literal << escaped
|
158
|
+
# else
|
159
|
+
# literal << ch
|
160
|
+
# end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
pos = Rley::Lexical::Position.new(line, column_start)
|
165
|
+
lexeme = scanner.string[scan_pos - 1..scanner.pos - 1]
|
166
|
+
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Skip non-significant whitespaces and comments.
|
170
|
+
# Advance the scanner until something significant is found.
|
171
|
+
def skip_intertoken_spaces
|
172
|
+
loop do
|
173
|
+
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
174
|
+
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
175
|
+
if nl_found
|
176
|
+
ws_found = true
|
177
|
+
next_line
|
178
|
+
end
|
179
|
+
|
180
|
+
break unless ws_found
|
181
|
+
end
|
182
|
+
|
183
|
+
scanner.pos
|
184
|
+
end
|
185
|
+
|
186
|
+
def next_line
|
187
|
+
@lineno += 1
|
188
|
+
@line_start = scanner.pos
|
189
|
+
end
|
190
|
+
end # class
|
191
|
+
end # module
|
192
|
+
end # module
|
@@ -15,8 +15,8 @@ module Rley # This module is used as a namespace
|
|
15
15
|
@signatures = subnodes.map { |_| prime_enumerator.next }
|
16
16
|
end
|
17
17
|
|
18
|
-
def signature_exist?
|
19
|
-
|
18
|
+
def signature_exist?
|
19
|
+
!@signatures.nil?
|
20
20
|
end
|
21
21
|
end # class
|
22
22
|
end # module
|
@@ -69,7 +69,7 @@ module Rley # This module is used as a namespace
|
|
69
69
|
end
|
70
70
|
|
71
71
|
# The signal to begin the visit of the parse forest.
|
72
|
-
def start
|
72
|
+
def start
|
73
73
|
pforest.accept(self)
|
74
74
|
end
|
75
75
|
|
@@ -169,7 +169,7 @@ module Rley # This module is used as a namespace
|
|
169
169
|
def broadcast(msg, *args)
|
170
170
|
subscribers.each do |subscr|
|
171
171
|
next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
|
172
|
-
|
172
|
+
|
173
173
|
subscr.send(msg, *args)
|
174
174
|
end
|
175
175
|
end
|
@@ -190,7 +190,7 @@ module Rley # This module is used as a namespace
|
|
190
190
|
|
191
191
|
def pop_node
|
192
192
|
return if legs.empty?
|
193
|
-
|
193
|
+
|
194
194
|
legs.pop
|
195
195
|
end
|
196
196
|
end # class
|
@@ -20,14 +20,14 @@ module Rley # This module is used as a namespace
|
|
20
20
|
# Returned hash contains pairs of the form:
|
21
21
|
# terminal name => Class implementing the terminal tokens
|
22
22
|
# terminal name => Hash with pairs: production name => Class
|
23
|
-
def terminal2node
|
23
|
+
def terminal2node
|
24
24
|
raise NotImplementedError
|
25
25
|
end
|
26
26
|
|
27
27
|
# Method to override in subclass.
|
28
28
|
# Default class for representing terminal nodes.
|
29
29
|
# @return [Class]
|
30
|
-
def terminalnode_class
|
30
|
+
def terminalnode_class
|
31
31
|
PTree::TerminalNode
|
32
32
|
end
|
33
33
|
|
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
37
37
|
# @param aProductionName [String]
|
38
38
|
# @return [String]
|
39
39
|
def method_name(aProductionName)
|
40
|
-
|
40
|
+
"reduce_#{aProductionName}"
|
41
41
|
end
|
42
42
|
|
43
43
|
# Utility method.
|
@@ -46,7 +46,7 @@ module Rley # This module is used as a namespace
|
|
46
46
|
# @param _tokens [Array<Lexical::Token>]
|
47
47
|
# @param theChildren [Array<Object>]
|
48
48
|
def return_first_child(_range, _tokens, theChildren)
|
49
|
-
|
49
|
+
theChildren[0]
|
50
50
|
end
|
51
51
|
|
52
52
|
# Utility method.
|
@@ -55,7 +55,7 @@ module Rley # This module is used as a namespace
|
|
55
55
|
# @param _tokens [Array<Lexical::Token>]
|
56
56
|
# @param theChildren [Array<Object>]
|
57
57
|
def return_second_child(_range, _tokens, theChildren)
|
58
|
-
|
58
|
+
theChildren[1]
|
59
59
|
end
|
60
60
|
|
61
61
|
# Utility method.
|
@@ -64,7 +64,7 @@ module Rley # This module is used as a namespace
|
|
64
64
|
# @param _tokens [Array<Lexical::Token>]
|
65
65
|
# @param theChildren [Array<Object>]
|
66
66
|
def return_last_child(_range, _tokens, theChildren)
|
67
|
-
|
67
|
+
theChildren[-1]
|
68
68
|
end
|
69
69
|
|
70
70
|
# Simply return an epsilon symbol
|
@@ -72,7 +72,7 @@ module Rley # This module is used as a namespace
|
|
72
72
|
# @param _tokens [Array<Lexical::Token>]
|
73
73
|
# @param _children [Array<Object>]
|
74
74
|
def return_epsilon(_range, _tokens, _children)
|
75
|
-
|
75
|
+
nil
|
76
76
|
end
|
77
77
|
|
78
78
|
protected
|
@@ -81,7 +81,7 @@ module Rley # This module is used as a namespace
|
|
81
81
|
# Create a parse tree object with given
|
82
82
|
# node as root node.
|
83
83
|
def create_tree(aRootNode)
|
84
|
-
|
84
|
+
Rley::PTree::ParseTree.new(aRootNode)
|
85
85
|
end
|
86
86
|
|
87
87
|
# Factory method for creating a node object for the given
|
@@ -96,9 +96,7 @@ module Rley # This module is used as a namespace
|
|
96
96
|
# Lexical ambiguity...
|
97
97
|
klass = klass.fetch(aProduction.name)
|
98
98
|
end
|
99
|
-
|
100
|
-
|
101
|
-
return node
|
99
|
+
klass.new(aToken, aTokenPosition)
|
102
100
|
end
|
103
101
|
|
104
102
|
# Method to override.
|
@@ -125,6 +123,45 @@ module Rley # This module is used as a namespace
|
|
125
123
|
end
|
126
124
|
return node
|
127
125
|
end
|
126
|
+
|
127
|
+
# Standard method for handling one or more modifier: symbol+
|
128
|
+
# rule('symbol_plus' => 'symbol_plus symbol')
|
129
|
+
# def reduce_base_plus_more(_production, _range, _tokens, theChildren)
|
130
|
+
# theChildren[0] << theChildren[1]
|
131
|
+
# end
|
132
|
+
|
133
|
+
# Standard rule method handling one or more modifier: symbol+
|
134
|
+
# rule('symbol_plus' => 'symbol')
|
135
|
+
# def reduce_base_plus_last(_production, _range, _tokens, theChildren)
|
136
|
+
# [theChildren[0]]
|
137
|
+
# end
|
138
|
+
|
139
|
+
# Implicit rule generated for * modifier
|
140
|
+
# rule('X') => 'X item'.as '_star_more'
|
141
|
+
def reduce__star_more(_production, _range, _tokens, theChildren)
|
142
|
+
theChildren[0] << theChildren[1]
|
143
|
+
theChildren[0]
|
144
|
+
end
|
145
|
+
|
146
|
+
# Implicit rule generated for * modifier
|
147
|
+
# rule('X') => ''.as '_star_none'
|
148
|
+
def reduce__star_none(_production, _range, _tokens, theChildren)
|
149
|
+
[]
|
150
|
+
end
|
151
|
+
|
152
|
+
# Implicit rule generated for + modifier
|
153
|
+
# rule('X') => 'X item'.as '_plus_more'
|
154
|
+
def reduce__plus_more(_production, _range, _tokens, theChildren)
|
155
|
+
theChildren[0] << theChildren[1]
|
156
|
+
theChildren[0]
|
157
|
+
end
|
158
|
+
|
159
|
+
# Implicit rule generated for + modifier
|
160
|
+
# rule('X') => 'item'.as '_plus_one'
|
161
|
+
def reduce__plus_one(_production, _range, _tokens, theChildren)
|
162
|
+
[theChildren[0]]
|
163
|
+
end
|
164
|
+
|
128
165
|
end # class
|
129
166
|
end # module
|
130
167
|
end # module
|