rley 0.7.06 → 0.8.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -6
- data/CHANGELOG.md +20 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +13 -13
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +7 -6
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +12 -12
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/base/dotted_item.rb +28 -31
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +22 -25
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/notation/all_notation_nodes.rb +2 -0
- data/lib/rley/notation/ast_builder.rb +191 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +113 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +504 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +192 -0
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +118 -26
- data/lib/rley/parser/gfg_parsing.rb +22 -33
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +19 -16
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grm_symbol.rb +7 -7
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +16 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +23 -21
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +184 -0
- data/spec/rley/notation/tokenizer_spec.rb +370 -0
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +24 -26
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/dangling_else_spec.rb +445 -0
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
- data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
- data/spec/rley/support/grammar_abc_helper.rb +3 -5
- data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +14 -17
- data/spec/rley/support/grammar_pb_helper.rb +8 -7
- data/spec/rley/support/grammar_sppf_helper.rb +3 -3
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +17 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +48 -74
- data/.simplecov +0 -7
- data/lib/rley/parser/parse_state.rb +0 -83
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -101
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'sequence_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node representing an expression bracketed by parentheses.
|
8
|
+
class GroupingNode < SequenceNode
|
9
|
+
# @param aPosition [Rley::Lexical::Position] Start position.
|
10
|
+
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
11
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
12
|
+
def initialize(aPosition, sequence, theRepetition = nil)
|
13
|
+
super(aPosition, sequence, theRepetition)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
17
|
+
# @param visitor [Notation::ASTVisitor] the visitor
|
18
|
+
def accept(visitor)
|
19
|
+
visitor.visit_grouping_node(self)
|
20
|
+
end
|
21
|
+
end # class
|
22
|
+
end # module
|
23
|
+
end # module
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
require_relative 'ast_builder'
|
6
|
+
|
7
|
+
module Rley
|
8
|
+
module Notation
|
9
|
+
# A Lox parser that produce concrete parse trees.
|
10
|
+
# Concrete parse trees are the default kind of parse tree
|
11
|
+
# generated by the Rley library.
|
12
|
+
# They consist of two node types only:
|
13
|
+
# - NonTerminalNode
|
14
|
+
# - TerminalNode
|
15
|
+
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
+
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
+
# While concrete parse tree nodes can be generated out of the box,
|
18
|
+
# they have the following drawbacks:
|
19
|
+
# - Generic node classes that aren't always suited for the needs of
|
20
|
+
# the language being processing.
|
21
|
+
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
+
# further processing.
|
23
|
+
class Parser
|
24
|
+
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
+
attr_reader(:engine)
|
26
|
+
|
27
|
+
def initialize
|
28
|
+
# Create a Rley facade object
|
29
|
+
@engine = Rley::Engine.new do |cfg|
|
30
|
+
cfg.diagnose = true
|
31
|
+
cfg.repr_builder = Notation::ASTBuilder
|
32
|
+
end
|
33
|
+
|
34
|
+
# Step 1. Load RGN grammar
|
35
|
+
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Parse the given Lox program into a parse tree.
|
39
|
+
# @param source [String] Lox program to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
+
def parse(source)
|
42
|
+
lexer = Tokenizer.new(source)
|
43
|
+
result = engine.parse(lexer.tokens)
|
44
|
+
|
45
|
+
unless result.success?
|
46
|
+
# Stop if the parse failed...
|
47
|
+
line1 = "Parsing failed\n"
|
48
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
+
raise SyntaxError, line1 + line2
|
50
|
+
end
|
51
|
+
|
52
|
+
return engine.convert(result) # engine.to_ptree(result)
|
53
|
+
end
|
54
|
+
end # class
|
55
|
+
end # module
|
56
|
+
end # module
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node for a sequence of AST nodes
|
8
|
+
class SequenceNode < ASTNode
|
9
|
+
# @return [Array<ASTNode>]
|
10
|
+
attr_reader :subnodes
|
11
|
+
|
12
|
+
attr_accessor :constraints
|
13
|
+
|
14
|
+
# @param aPosition [Rley::Lexical::Position] Start position.
|
15
|
+
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
16
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
17
|
+
def initialize(aPosition, sequence, theRepetition = nil)
|
18
|
+
super(aPosition)
|
19
|
+
@subnodes = sequence
|
20
|
+
repetition=(theRepetition) if theRepetition
|
21
|
+
@constraints = []
|
22
|
+
end
|
23
|
+
|
24
|
+
def size
|
25
|
+
subnodes.size
|
26
|
+
end
|
27
|
+
|
28
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
29
|
+
# @param visitor [Notation::ASTVisitor] the visitor
|
30
|
+
def accept(visitor)
|
31
|
+
visitor.visit_sequence_node(self)
|
32
|
+
end
|
33
|
+
end # class
|
34
|
+
end # module
|
35
|
+
end # module
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node for a grammar symbol occurring in rhs of a rule
|
8
|
+
class SymbolNode < ASTNode
|
9
|
+
# @return [String] name of grammar symbol
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
|
+
# @param aName [String] name of grammar symbol
|
14
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
15
|
+
def initialize(aPosition, aName, theRepetition = nil)
|
16
|
+
super(aPosition)
|
17
|
+
@name = aName
|
18
|
+
repetition=(theRepetition) if theRepetition
|
19
|
+
end
|
20
|
+
|
21
|
+
# Abstract method (must be overriden in subclasses).
|
22
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
23
|
+
# @param _visitor [LoxxyTreeVisitor] the visitor
|
24
|
+
def accept(visitor)
|
25
|
+
visitor.visit_symbol_node(self)
|
26
|
+
end
|
27
|
+
end # class
|
28
|
+
end # module
|
29
|
+
end # module
|
@@ -0,0 +1,192 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'strscan'
|
4
|
+
require_relative '../lexical/token'
|
5
|
+
|
6
|
+
module Rley
|
7
|
+
module Notation
|
8
|
+
# A tokenizer for the Rley notation language.
|
9
|
+
# Responsibility: break input into a sequence of token objects.
|
10
|
+
# The tokenizer should recognize:
|
11
|
+
# Identifiers,
|
12
|
+
# Number literals including single digit
|
13
|
+
# String literals (quote delimited)
|
14
|
+
# Delimiters: e.g. parentheses '(', ')'
|
15
|
+
# Separators: e.g. comma
|
16
|
+
class Tokenizer
|
17
|
+
# @return [StringScanner] Low-level input scanner
|
18
|
+
attr_reader(:scanner)
|
19
|
+
|
20
|
+
# @return [Integer] The current line number
|
21
|
+
attr_reader(:lineno)
|
22
|
+
|
23
|
+
# @return [Integer] Position of last start of line in the input
|
24
|
+
attr_reader(:line_start)
|
25
|
+
|
26
|
+
# One or two special character tokens.
|
27
|
+
@@lexeme2name = {
|
28
|
+
'(' => 'LEFT_PAREN',
|
29
|
+
')' => 'RIGHT_PAREN',
|
30
|
+
'{' => 'LEFT_BRACE',
|
31
|
+
'}' => 'RIGHT_BRACE',
|
32
|
+
',' => 'COMMA',
|
33
|
+
'+' => 'PLUS',
|
34
|
+
'?' => 'QUESTION_MARK',
|
35
|
+
'*' => 'STAR',
|
36
|
+
'..' => 'ELLIPSIS'
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
# Here are all the implemented Rley notation keywords
|
40
|
+
@@keywords = %w[
|
41
|
+
match_closest repeat
|
42
|
+
].map { |x| [x, x] }.to_h
|
43
|
+
|
44
|
+
# Constructor. Initialize a tokenizer for Lox input.
|
45
|
+
# @param source [String] Lox text to tokenize.
|
46
|
+
def initialize(source = nil)
|
47
|
+
@scanner = StringScanner.new('')
|
48
|
+
start_with(source) if source
|
49
|
+
end
|
50
|
+
|
51
|
+
# Reset the tokenizer and make the given text, the current input.
|
52
|
+
# @param source [String] Lox text to tokenize.
|
53
|
+
def start_with(source)
|
54
|
+
@scanner.string = source
|
55
|
+
@lineno = 1
|
56
|
+
@line_start = 0
|
57
|
+
end
|
58
|
+
|
59
|
+
# Scan the source and return an array of tokens.
|
60
|
+
# @return [Array<Rley::Lexical::Token>] | Returns a sequence of tokens
|
61
|
+
def tokens
|
62
|
+
tok_sequence = []
|
63
|
+
until @scanner.eos?
|
64
|
+
token = _next_token
|
65
|
+
tok_sequence << token unless token.nil?
|
66
|
+
end
|
67
|
+
|
68
|
+
return tok_sequence
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def _next_token
|
74
|
+
pos_before = scanner.pos
|
75
|
+
skip_intertoken_spaces
|
76
|
+
ws_found = true if scanner.pos > pos_before
|
77
|
+
curr_ch = scanner.peek(1)
|
78
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
79
|
+
|
80
|
+
token = nil
|
81
|
+
|
82
|
+
if '(){},'.include? curr_ch
|
83
|
+
# Single delimiter, separator or character
|
84
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
|
+
elsif '?*+,'.include? curr_ch # modifier character
|
86
|
+
# modifiers without prefix text are symbols
|
87
|
+
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
|
+
token = build_token(symb, scanner.getch)
|
89
|
+
elsif (lexeme = scanner.scan(/\.\./))
|
90
|
+
# One or two special character tokens
|
91
|
+
token = build_token(@@lexeme2name[lexeme], lexeme)
|
92
|
+
elsif scanner.check(/"|'/) # Start of string detected...
|
93
|
+
token = build_string_token
|
94
|
+
elsif (lexeme = scanner.scan(/\d+/))
|
95
|
+
token = build_token('INT_LIT', lexeme)
|
96
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
|
97
|
+
keyw = @@keywords[lexeme.chop!]
|
98
|
+
token = build_token('KEY', lexeme) if keyw
|
99
|
+
# ... error case
|
100
|
+
elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
|
101
|
+
token = build_token('SYMBOL', lexeme)
|
102
|
+
else # Unknown token
|
103
|
+
col = scanner.pos - @line_start + 1
|
104
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
105
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
106
|
+
end
|
107
|
+
|
108
|
+
return token
|
109
|
+
end
|
110
|
+
|
111
|
+
def build_token(aSymbolName, aLexeme)
|
112
|
+
begin
|
113
|
+
lex_length = aLexeme ? aLexeme.size : 0
|
114
|
+
col = scanner.pos - lex_length - @line_start + 1
|
115
|
+
pos = Rley::Lexical::Position.new(@lineno, col)
|
116
|
+
token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
|
117
|
+
|
118
|
+
rescue StandardError => e
|
119
|
+
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
120
|
+
raise e
|
121
|
+
end
|
122
|
+
|
123
|
+
return token
|
124
|
+
end
|
125
|
+
|
126
|
+
# precondition: current position at leading quote
|
127
|
+
def build_string_token
|
128
|
+
delimiter = scanner.scan(/./)
|
129
|
+
scan_pos = scanner.pos
|
130
|
+
line = @lineno
|
131
|
+
column_start = scan_pos - @line_start
|
132
|
+
literal = +''
|
133
|
+
loop do
|
134
|
+
substr = scanner.scan(/[^"'\\\r\n]*/)
|
135
|
+
if scanner.eos?
|
136
|
+
pos_start = "line #{line}:#{column_start}"
|
137
|
+
raise ScanError, "Error: [#{pos_start}]: Unterminated string."
|
138
|
+
else
|
139
|
+
literal << substr
|
140
|
+
special = scanner.scan(/["'\\\r\n]/)
|
141
|
+
case special
|
142
|
+
when delimiter # Terminating quote found
|
143
|
+
break
|
144
|
+
when "\r"
|
145
|
+
next_line
|
146
|
+
special << scanner.scan(/./) if scanner.match?(/\n/)
|
147
|
+
literal << special
|
148
|
+
when "\n"
|
149
|
+
next_line
|
150
|
+
literal << special
|
151
|
+
# when '\\'
|
152
|
+
# ch = scanner.scan(/./)
|
153
|
+
# next unless ch
|
154
|
+
|
155
|
+
# escaped = @@escape_chars[ch]
|
156
|
+
# if escaped
|
157
|
+
# literal << escaped
|
158
|
+
# else
|
159
|
+
# literal << ch
|
160
|
+
# end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
pos = Rley::Lexical::Position.new(line, column_start)
|
165
|
+
lexeme = scanner.string[scan_pos - 1..scanner.pos - 1]
|
166
|
+
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
167
|
+
end
|
168
|
+
|
169
|
+
# Skip non-significant whitespaces and comments.
|
170
|
+
# Advance the scanner until something significant is found.
|
171
|
+
def skip_intertoken_spaces
|
172
|
+
loop do
|
173
|
+
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
174
|
+
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
175
|
+
if nl_found
|
176
|
+
ws_found = true
|
177
|
+
next_line
|
178
|
+
end
|
179
|
+
|
180
|
+
break unless ws_found
|
181
|
+
end
|
182
|
+
|
183
|
+
scanner.pos
|
184
|
+
end
|
185
|
+
|
186
|
+
def next_line
|
187
|
+
@lineno += 1
|
188
|
+
@line_start = scanner.pos
|
189
|
+
end
|
190
|
+
end # class
|
191
|
+
end # module
|
192
|
+
end # module
|
@@ -15,8 +15,8 @@ module Rley # This module is used as a namespace
|
|
15
15
|
@signatures = subnodes.map { |_| prime_enumerator.next }
|
16
16
|
end
|
17
17
|
|
18
|
-
def signature_exist?
|
19
|
-
|
18
|
+
def signature_exist?
|
19
|
+
!@signatures.nil?
|
20
20
|
end
|
21
21
|
end # class
|
22
22
|
end # module
|
@@ -69,7 +69,7 @@ module Rley # This module is used as a namespace
|
|
69
69
|
end
|
70
70
|
|
71
71
|
# The signal to begin the visit of the parse forest.
|
72
|
-
def start
|
72
|
+
def start
|
73
73
|
pforest.accept(self)
|
74
74
|
end
|
75
75
|
|
@@ -169,7 +169,7 @@ module Rley # This module is used as a namespace
|
|
169
169
|
def broadcast(msg, *args)
|
170
170
|
subscribers.each do |subscr|
|
171
171
|
next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
|
172
|
-
|
172
|
+
|
173
173
|
subscr.send(msg, *args)
|
174
174
|
end
|
175
175
|
end
|
@@ -190,7 +190,7 @@ module Rley # This module is used as a namespace
|
|
190
190
|
|
191
191
|
def pop_node
|
192
192
|
return if legs.empty?
|
193
|
-
|
193
|
+
|
194
194
|
legs.pop
|
195
195
|
end
|
196
196
|
end # class
|
@@ -20,14 +20,14 @@ module Rley # This module is used as a namespace
|
|
20
20
|
# Returned hash contains pairs of the form:
|
21
21
|
# terminal name => Class implementing the terminal tokens
|
22
22
|
# terminal name => Hash with pairs: production name => Class
|
23
|
-
def terminal2node
|
23
|
+
def terminal2node
|
24
24
|
raise NotImplementedError
|
25
25
|
end
|
26
26
|
|
27
27
|
# Method to override in subclass.
|
28
28
|
# Default class for representing terminal nodes.
|
29
29
|
# @return [Class]
|
30
|
-
def terminalnode_class
|
30
|
+
def terminalnode_class
|
31
31
|
PTree::TerminalNode
|
32
32
|
end
|
33
33
|
|
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
37
37
|
# @param aProductionName [String]
|
38
38
|
# @return [String]
|
39
39
|
def method_name(aProductionName)
|
40
|
-
|
40
|
+
"reduce_#{aProductionName}"
|
41
41
|
end
|
42
42
|
|
43
43
|
# Utility method.
|
@@ -46,7 +46,7 @@ module Rley # This module is used as a namespace
|
|
46
46
|
# @param _tokens [Array<Lexical::Token>]
|
47
47
|
# @param theChildren [Array<Object>]
|
48
48
|
def return_first_child(_range, _tokens, theChildren)
|
49
|
-
|
49
|
+
theChildren[0]
|
50
50
|
end
|
51
51
|
|
52
52
|
# Utility method.
|
@@ -55,7 +55,7 @@ module Rley # This module is used as a namespace
|
|
55
55
|
# @param _tokens [Array<Lexical::Token>]
|
56
56
|
# @param theChildren [Array<Object>]
|
57
57
|
def return_second_child(_range, _tokens, theChildren)
|
58
|
-
|
58
|
+
theChildren[1]
|
59
59
|
end
|
60
60
|
|
61
61
|
# Utility method.
|
@@ -64,7 +64,7 @@ module Rley # This module is used as a namespace
|
|
64
64
|
# @param _tokens [Array<Lexical::Token>]
|
65
65
|
# @param theChildren [Array<Object>]
|
66
66
|
def return_last_child(_range, _tokens, theChildren)
|
67
|
-
|
67
|
+
theChildren[-1]
|
68
68
|
end
|
69
69
|
|
70
70
|
# Simply return an epsilon symbol
|
@@ -72,7 +72,7 @@ module Rley # This module is used as a namespace
|
|
72
72
|
# @param _tokens [Array<Lexical::Token>]
|
73
73
|
# @param _children [Array<Object>]
|
74
74
|
def return_epsilon(_range, _tokens, _children)
|
75
|
-
|
75
|
+
nil
|
76
76
|
end
|
77
77
|
|
78
78
|
protected
|
@@ -81,7 +81,7 @@ module Rley # This module is used as a namespace
|
|
81
81
|
# Create a parse tree object with given
|
82
82
|
# node as root node.
|
83
83
|
def create_tree(aRootNode)
|
84
|
-
|
84
|
+
Rley::PTree::ParseTree.new(aRootNode)
|
85
85
|
end
|
86
86
|
|
87
87
|
# Factory method for creating a node object for the given
|
@@ -96,9 +96,7 @@ module Rley # This module is used as a namespace
|
|
96
96
|
# Lexical ambiguity...
|
97
97
|
klass = klass.fetch(aProduction.name)
|
98
98
|
end
|
99
|
-
|
100
|
-
|
101
|
-
return node
|
99
|
+
klass.new(aToken, aTokenPosition)
|
102
100
|
end
|
103
101
|
|
104
102
|
# Method to override.
|
@@ -125,6 +123,45 @@ module Rley # This module is used as a namespace
|
|
125
123
|
end
|
126
124
|
return node
|
127
125
|
end
|
126
|
+
|
127
|
+
# Standard method for handling one or more modifier: symbol+
|
128
|
+
# rule('symbol_plus' => 'symbol_plus symbol')
|
129
|
+
# def reduce_base_plus_more(_production, _range, _tokens, theChildren)
|
130
|
+
# theChildren[0] << theChildren[1]
|
131
|
+
# end
|
132
|
+
|
133
|
+
# Standard rule method handling one or more modifier: symbol+
|
134
|
+
# rule('symbol_plus' => 'symbol')
|
135
|
+
# def reduce_base_plus_last(_production, _range, _tokens, theChildren)
|
136
|
+
# [theChildren[0]]
|
137
|
+
# end
|
138
|
+
|
139
|
+
# Implicit rule generated for * modifier
|
140
|
+
# rule('X') => 'X item'.as '_star_more'
|
141
|
+
def reduce__star_more(_production, _range, _tokens, theChildren)
|
142
|
+
theChildren[0] << theChildren[1]
|
143
|
+
theChildren[0]
|
144
|
+
end
|
145
|
+
|
146
|
+
# Implicit rule generated for * modifier
|
147
|
+
# rule('X') => ''.as '_star_none'
|
148
|
+
def reduce__star_none(_production, _range, _tokens, theChildren)
|
149
|
+
[]
|
150
|
+
end
|
151
|
+
|
152
|
+
# Implicit rule generated for + modifier
|
153
|
+
# rule('X') => 'X item'.as '_plus_more'
|
154
|
+
def reduce__plus_more(_production, _range, _tokens, theChildren)
|
155
|
+
theChildren[0] << theChildren[1]
|
156
|
+
theChildren[0]
|
157
|
+
end
|
158
|
+
|
159
|
+
# Implicit rule generated for + modifier
|
160
|
+
# rule('X') => 'item'.as '_plus_one'
|
161
|
+
def reduce__plus_one(_production, _range, _tokens, theChildren)
|
162
|
+
[theChildren[0]]
|
163
|
+
end
|
164
|
+
|
128
165
|
end # class
|
129
166
|
end # module
|
130
167
|
end # module
|