rley 0.7.06 → 0.8.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -6
- data/CHANGELOG.md +20 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +13 -13
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +7 -6
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +12 -12
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/base/dotted_item.rb +28 -31
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +22 -25
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/notation/all_notation_nodes.rb +2 -0
- data/lib/rley/notation/ast_builder.rb +191 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +113 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +504 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +192 -0
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +118 -26
- data/lib/rley/parser/gfg_parsing.rb +22 -33
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +19 -16
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grm_symbol.rb +7 -7
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +16 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +23 -21
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +184 -0
- data/spec/rley/notation/tokenizer_spec.rb +370 -0
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +24 -26
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/dangling_else_spec.rb +445 -0
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
- data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
- data/spec/rley/support/grammar_abc_helper.rb +3 -5
- data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +14 -17
- data/spec/rley/support/grammar_pb_helper.rb +8 -7
- data/spec/rley/support/grammar_sppf_helper.rb +3 -3
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +17 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +48 -74
- data/.simplecov +0 -7
- data/lib/rley/parser/parse_state.rb +0 -83
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -101
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rley
|
4
|
+
module Notation
|
5
|
+
# Abstract class.
|
6
|
+
# Instances of its subclasses represent nodes of an abstract syntax tree
|
7
|
+
# that is the product of the parse of an input text.
|
8
|
+
class ASTNode
|
9
|
+
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
10
|
+
attr_reader :position
|
11
|
+
|
12
|
+
# @return [Symbol]
|
13
|
+
attr_accessor :repetition
|
14
|
+
|
15
|
+
# @return [Hash]
|
16
|
+
attr_reader :annotation
|
17
|
+
|
18
|
+
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
19
|
+
def initialize(aPosition)
|
20
|
+
@position = aPosition
|
21
|
+
@repetition = :exactly_one
|
22
|
+
@annotation = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def annotation=(aMapping)
|
26
|
+
repeat_key = 'repeat'
|
27
|
+
@repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
|
28
|
+
@annotation = aMapping
|
29
|
+
end
|
30
|
+
|
31
|
+
# Notification that the parsing has successfully completed
|
32
|
+
def done!
|
33
|
+
# Default: do nothing ...
|
34
|
+
end
|
35
|
+
|
36
|
+
# Abstract method (must be overriden in subclasses).
|
37
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
38
|
+
# @param _visitor [LoxxyTreeVisitor] the visitor
|
39
|
+
def accept(_visitor)
|
40
|
+
raise NotImplementedError
|
41
|
+
end
|
42
|
+
end # class
|
43
|
+
end # module
|
44
|
+
end # module
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Rley
|
2
|
+
module Notation
|
3
|
+
class ASTVisitor
|
4
|
+
# Link to the top node to visit
|
5
|
+
attr_reader(:top)
|
6
|
+
|
7
|
+
# List of objects that subscribed to the visit event notification.
|
8
|
+
attr_reader(:subscribers)
|
9
|
+
|
10
|
+
# Build a visitor for the given top.
|
11
|
+
# @param aTop [Notation::ASTNode] the parse tree to visit.
|
12
|
+
def initialize(aTop)
|
13
|
+
raise StandardError if aTop.nil?
|
14
|
+
|
15
|
+
@top = aTop
|
16
|
+
@subscribers = []
|
17
|
+
end
|
18
|
+
|
19
|
+
# Add a subscriber for the visit event notifications.
|
20
|
+
# @param aSubscriber [Object]
|
21
|
+
def subscribe(aSubscriber)
|
22
|
+
subscribers << aSubscriber
|
23
|
+
end
|
24
|
+
|
25
|
+
# Remove the given object from the subscription list.
|
26
|
+
# The object won't be notified of visit events.
|
27
|
+
# @param aSubscriber [Object]
|
28
|
+
def unsubscribe(aSubscriber)
|
29
|
+
subscribers.delete_if { |entry| entry == aSubscriber }
|
30
|
+
end
|
31
|
+
|
32
|
+
# The signal to begin the visit of the top.
|
33
|
+
def start
|
34
|
+
top.accept(self)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Visit event. The visitor is about to visit the ptree.
|
38
|
+
# @param aParseTree [Rley::PTree::ParseTree] the ptree to visit.
|
39
|
+
def start_visit_ptree(aParseTree)
|
40
|
+
broadcast(:before_ptree, aParseTree)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Visit event. The visitor has completed the visit of the ptree.
|
44
|
+
# @param aParseTree [Rley::PTree::ParseTree] the visited ptree.
|
45
|
+
def end_visit_ptree(aParseTree)
|
46
|
+
broadcast(:after_ptree, aParseTree)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Visit event. The visitor is about to visit a symbol node.
|
50
|
+
# @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
|
51
|
+
def visit_symbol_node(aSymbolNode)
|
52
|
+
broadcast(:before_symbol_node, aSymbolNode, self)
|
53
|
+
broadcast(:after_symbol_node, aSymbolNode, self)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Visit event. The visitor is about to visit a sequence node.
|
57
|
+
# @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
|
58
|
+
def visit_sequence_node(aSequenceNode)
|
59
|
+
broadcast(:before_sequence_node, aSequenceNode, self)
|
60
|
+
traverse_subnodes(aSequenceNode)
|
61
|
+
broadcast(:after_sequence_node, aSequenceNode, self)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Visit event. The visitor is about to visit a grouping node.
|
65
|
+
# @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
|
66
|
+
def visit_grouping_node(aGroupingNode)
|
67
|
+
broadcast(:before_grouping_node, aGroupingNode, self)
|
68
|
+
traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
|
69
|
+
broadcast(:after_grouping_node, aGroupingNode, self)
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
# Visit event. The visitor is about to visit the subnodes of a non
|
75
|
+
# terminal node.
|
76
|
+
# @param aParentNode [Ast::LocCompoundExpr] the parent node.
|
77
|
+
def traverse_subnodes(aParentNode)
|
78
|
+
subnodes = aParentNode.subnodes
|
79
|
+
broadcast(:before_subnodes, aParentNode, subnodes)
|
80
|
+
|
81
|
+
# Let's proceed with the visit of subnodes
|
82
|
+
subnodes.each { |a_node| a_node.accept(self) }
|
83
|
+
|
84
|
+
broadcast(:after_subnodes, aParentNode, subnodes)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Visit event. The visitor is about to visit one given subnode of a non
|
88
|
+
# terminal node.
|
89
|
+
# @param aParentNode [Ast::LocCompoundExpr] the parent node.
|
90
|
+
# @param index [integer] index of child subnode
|
91
|
+
def traverse_given_subnode(aParentNode, index)
|
92
|
+
subnode = aParentNode.subnodes[index]
|
93
|
+
broadcast(:before_given_subnode, aParentNode, subnode)
|
94
|
+
|
95
|
+
# Now, let's proceed with the visit of that subnode
|
96
|
+
subnode.accept(self)
|
97
|
+
|
98
|
+
broadcast(:after_given_subnode, aParentNode, subnode)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Send a notification to all subscribers.
|
102
|
+
# @param msg [Symbol] event to notify
|
103
|
+
# @param args [Array] arguments of the notification.
|
104
|
+
def broadcast(msg, *args)
|
105
|
+
subscribers.each do |subscr|
|
106
|
+
next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
|
107
|
+
|
108
|
+
subscr.send(msg, *args)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end # class
|
112
|
+
end # module
|
113
|
+
end # module
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../syntax/base_grammar_builder'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
########################################
|
8
|
+
# Syntax for right-hand side of production rules
|
9
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
10
|
+
add_terminals('LEFT_PAREN', 'RIGHT_PAREN') # For '(', ')' grouping delimiters
|
11
|
+
add_terminals('LEFT_BRACE', 'RIGHT_BRACE') # For '{', '}' annotation delimiters
|
12
|
+
add_terminals('QUESTION_MARK', 'STAR', 'PLUS') # For postfix quantifiers
|
13
|
+
add_terminals('COMMA', 'ELLIPSIS')
|
14
|
+
|
15
|
+
add_terminals('STR_LIT') # For string literal values
|
16
|
+
add_terminals('INT_LIT') # For integer literal values
|
17
|
+
add_terminals('SYMBOL') # Grammar symbols
|
18
|
+
add_terminals('KEY') # Key literal
|
19
|
+
|
20
|
+
rule('notation' => 'rhs')
|
21
|
+
rule('rhs' => 'member_seq').tag 'sequence'
|
22
|
+
rule('rhs' => [])
|
23
|
+
rule('member_seq' => 'member_seq member').tag 'more_members'
|
24
|
+
rule('member_seq' => 'member').tag 'one_member'
|
25
|
+
rule('member' => 'strait_member')
|
26
|
+
rule('member' => 'quantified_member')
|
27
|
+
rule('strait_member' => 'base_member')
|
28
|
+
rule('strait_member' => 'base_member annotation').tag 'annotated_member'
|
29
|
+
rule('base_member' => 'SYMBOL').tag 'symbol'
|
30
|
+
rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN').tag 'grouping'
|
31
|
+
rule('quantified_member' => 'base_member quantifier').tag 'quantified_member'
|
32
|
+
rule('quantifier' => 'QUESTION_MARK').tag 'question_mark'
|
33
|
+
rule('quantifier' => 'STAR').tag 'star'
|
34
|
+
rule('quantifier' => 'PLUS').tag 'plus'
|
35
|
+
rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag 'annotation'
|
36
|
+
rule('mapping' => 'mapping COMMA key_value').tag 'more_pairs'
|
37
|
+
rule('mapping' => 'key_value').tag 'one_pair'
|
38
|
+
rule('key_value' => 'KEY value').tag 'raw_pair'
|
39
|
+
rule('value' => 'STR_LIT')
|
40
|
+
rule('value' => 'INT_LIT')
|
41
|
+
rule('value' => 'range')
|
42
|
+
rule('range' => 'INT_LIT ELLIPSIS INT_LIT').tag 'bound_range'
|
43
|
+
rule('range' => 'INT_LIT ELLIPSIS')
|
44
|
+
end
|
45
|
+
|
46
|
+
# And now build the Rley Grammar Notation (RGN) grammar...
|
47
|
+
RGNGrammar = builder.grammar
|
48
|
+
end # module
|
49
|
+
end # module
|
@@ -0,0 +1,504 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
require_relative 'parser'
|
6
|
+
require_relative 'ast_visitor'
|
7
|
+
require_relative '../syntax/match_closest'
|
8
|
+
|
9
|
+
module Rley # This module is used as a namespace
|
10
|
+
module Notation # This module is used as a namespace
|
11
|
+
# Structure used for production rules that are implicitly generated by Rley
|
12
|
+
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
|
+
|
14
|
+
# Builder GoF pattern. Builder builds a complex object
|
15
|
+
# (say, a grammar) from simpler objects (terminals and productions)
|
16
|
+
# and using a step by step approach.
|
17
|
+
class GrammarBuilder
|
18
|
+
# @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
|
19
|
+
# to the matching grammar symbol object.
|
20
|
+
attr_reader(:symbols)
|
21
|
+
|
22
|
+
# @return [Notation::Parser] Parser for the right-side of productions
|
23
|
+
attr_reader(:parser)
|
24
|
+
|
25
|
+
# @return [Hash{ASTVisitor, Array}]
|
26
|
+
attr_reader(:visitor2rhs)
|
27
|
+
|
28
|
+
# @return [Array<Production>] The list of production rules for
|
29
|
+
# the grammar to build.
|
30
|
+
attr_reader(:productions)
|
31
|
+
|
32
|
+
# @return [Hash{String, String}] The synthesized raw productions
|
33
|
+
attr_reader(:synthetized)
|
34
|
+
|
35
|
+
# Creates a new grammar builder.
|
36
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
37
|
+
# @example Building a tiny English grammar
|
38
|
+
# builder = Rley::Notation::GrammarBuilder.new do
|
39
|
+
# add_terminals('n', 'v', 'adj', 'det')
|
40
|
+
# rule 'S' => 'NP VP'
|
41
|
+
# rule 'VP' => 'v NP'
|
42
|
+
# rule 'NP' => 'det n'
|
43
|
+
# rule 'NP' => 'adj NP'
|
44
|
+
# end
|
45
|
+
# tiny_eng = builder.grammar
|
46
|
+
def initialize(&aBlock)
|
47
|
+
@symbols = {}
|
48
|
+
@productions = []
|
49
|
+
@parser = Notation::Parser.new
|
50
|
+
@visitor2rhs = {}
|
51
|
+
@synthetized = {}
|
52
|
+
|
53
|
+
if block_given?
|
54
|
+
instance_exec(&aBlock)
|
55
|
+
grammar_complete!
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Retrieve a grammar symbol from its name.
|
60
|
+
# Raise an exception if not found.
|
61
|
+
# @param aSymbolName [String] the name of a grammar symbol.
|
62
|
+
# @return [GrmSymbol] the retrieved symbol object.
|
63
|
+
def [](aSymbolName)
|
64
|
+
symbols[aSymbolName]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Add the given terminal symbols to the grammar of the language
|
68
|
+
# @param terminalSymbols [String or Terminal] 1..* terminal symbols.
|
69
|
+
# @return [void]
|
70
|
+
def add_terminals(*terminalSymbols)
|
71
|
+
new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
|
72
|
+
symbols.merge!(new_symbs)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add the given marker symbol to the grammar of the language
|
76
|
+
# @param aMarkerSymbol [String] A mazker symbol
|
77
|
+
# @return [void]
|
78
|
+
def add_marker(aMarkerSymbol)
|
79
|
+
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
80
|
+
symbols[new_symb.name] = new_symb
|
81
|
+
end
|
82
|
+
|
83
|
+
# Add a production rule in the grammar given one
|
84
|
+
# key-value pair of the form: String => String.
|
85
|
+
# Where the key is the name of the non-terminal appearing in the
|
86
|
+
# left side of the rule.
|
87
|
+
# The value is a sequence of grammar symbol names (optionally quantified).
|
88
|
+
# The rule is created and inserted in the grammar.
|
89
|
+
# @example Equivalent call syntax
|
90
|
+
# builder.add_production('A' => 'a A c)
|
91
|
+
# builder.rule('A' => 'a A c]) # 'rule' is a synonym
|
92
|
+
# @param aProductionRepr [Hash{String, String}]
|
93
|
+
# A Hash-based representation of a production.
|
94
|
+
# @return [Production] The created Production instance
|
95
|
+
def add_production(aProductionRepr)
|
96
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
97
|
+
lhs = get_grm_symbol(lhs_name)
|
98
|
+
rhs = rhs_repr.kind_of?(Array) && rhs_repr.empty? ? '' : rhs_repr.strip
|
99
|
+
constraints = []
|
100
|
+
if rhs.empty?
|
101
|
+
rhs_members = []
|
102
|
+
else
|
103
|
+
ast = parser.parse(rhs)
|
104
|
+
visitor = ASTVisitor.new(ast)
|
105
|
+
visitor2rhs[visitor] = []
|
106
|
+
visitor.subscribe(self)
|
107
|
+
visitor.start
|
108
|
+
root_node = ast.root
|
109
|
+
constraints = root_node.constraints unless root_node.kind_of?(SymbolNode)
|
110
|
+
|
111
|
+
rhs_members = visitor2rhs.delete(visitor)
|
112
|
+
end
|
113
|
+
new_prod = Syntax::Production.new(lhs, rhs_members)
|
114
|
+
new_prod.constraints = constraints
|
115
|
+
productions << new_prod
|
116
|
+
end
|
117
|
+
|
118
|
+
productions.last
|
119
|
+
end
|
120
|
+
|
121
|
+
# Given the grammar symbols and productions added to the builder,
|
122
|
+
# build the resulting grammar (if not yet done).
|
123
|
+
# @return [Grammar] the created grammar object.
|
124
|
+
def grammar
|
125
|
+
unless @grammar
|
126
|
+
raise StandardError, 'No symbol found for grammar' if symbols.empty?
|
127
|
+
if productions.empty?
|
128
|
+
raise StandardError, 'No production found for grammar'
|
129
|
+
end
|
130
|
+
|
131
|
+
# Check that each terminal appears at least in a rhs of a production
|
132
|
+
all_terminals = symbols.values.select do |a_symb|
|
133
|
+
a_symb.kind_of?(Syntax::Terminal)
|
134
|
+
end
|
135
|
+
in_use = Set.new
|
136
|
+
productions.each do |prod|
|
137
|
+
prod.rhs.members.each do |symb|
|
138
|
+
in_use << symb if symb.kind_of?(Syntax::Terminal)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
|
143
|
+
unless unused.empty?
|
144
|
+
suffix = "#{unused.map(&:name).join(', ')}."
|
145
|
+
raise StandardError, "Useless terminal symbol(s): #{suffix}"
|
146
|
+
end
|
147
|
+
|
148
|
+
@grammar = Syntax::Grammar.new(productions.dup)
|
149
|
+
end
|
150
|
+
|
151
|
+
@grammar
|
152
|
+
end
|
153
|
+
|
154
|
+
alias rule add_production
|
155
|
+
|
156
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
157
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
158
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
159
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
160
|
+
def suffix_qmark
|
161
|
+
'_qmark'
|
162
|
+
end
|
163
|
+
|
164
|
+
def suffix_qmark_one
|
165
|
+
'_qmark_one'
|
166
|
+
end
|
167
|
+
|
168
|
+
def suffix_qmark_none
|
169
|
+
'_qmark_none'
|
170
|
+
end
|
171
|
+
|
172
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
173
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
174
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
175
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
176
|
+
def suffix_star
|
177
|
+
'_star'
|
178
|
+
end
|
179
|
+
|
180
|
+
def suffix_star_more
|
181
|
+
'_star_more'
|
182
|
+
end
|
183
|
+
|
184
|
+
def suffix_star_none
|
185
|
+
'_star_none'
|
186
|
+
end
|
187
|
+
|
188
|
+
# When a symbol, say symb, in a rhs is followed by a '+' modifier,
|
189
|
+
# then a rule will be generated with a lhs named symb + suffix_plus
|
190
|
+
# implicitly called: rule('digit_plus' => 'digit_plus digit').tag suffix_plus_more
|
191
|
+
# implicitly called: rule('digit_plus' => 'digit').tag suffix_plus_last
|
192
|
+
def suffix_plus
|
193
|
+
'_plus'
|
194
|
+
end
|
195
|
+
|
196
|
+
def suffix_plus_more
|
197
|
+
'_plus_more'
|
198
|
+
end
|
199
|
+
|
200
|
+
def suffix_plus_one
|
201
|
+
'_plus_one'
|
202
|
+
end
|
203
|
+
|
204
|
+
def repetition2suffix(aRepetition)
|
205
|
+
mapping = {
|
206
|
+
zero_or_one: suffix_qmark,
|
207
|
+
zero_or_more: suffix_star,
|
208
|
+
exactly_one: '',
|
209
|
+
one_or_more: suffix_plus
|
210
|
+
}
|
211
|
+
|
212
|
+
mapping[aRepetition]
|
213
|
+
end
|
214
|
+
|
215
|
+
def modifier2suffix(aModifier)
|
216
|
+
mapping = {
|
217
|
+
'?' => suffix_qmark,
|
218
|
+
'*' => suffix_star,
|
219
|
+
'+' => suffix_plus
|
220
|
+
}
|
221
|
+
|
222
|
+
mapping[aModifier]
|
223
|
+
end
|
224
|
+
|
225
|
+
##################################
|
226
|
+
# RGN's AST visit notification events
|
227
|
+
# ################################
|
228
|
+
def after_symbol_node(aSymbolNode, aVisitor)
|
229
|
+
symb_name = aSymbolNode.name
|
230
|
+
|
231
|
+
case aSymbolNode.repetition
|
232
|
+
when :zero_or_one
|
233
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
234
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
235
|
+
name_modified = "#{symb_name}#{suffix_qmark}"
|
236
|
+
unless symbols.include? name_modified
|
237
|
+
add_nonterminal(name_modified)
|
238
|
+
add_raw_rule(name_modified, "#{symb_name}", suffix_qmark_one)
|
239
|
+
add_raw_rule(name_modified, '', suffix_qmark_none)
|
240
|
+
end
|
241
|
+
symb_name = name_modified
|
242
|
+
|
243
|
+
when :zero_or_more
|
244
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
245
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
246
|
+
name_modified = "#{symb_name}#{suffix_star}"
|
247
|
+
unless symbols.include? name_modified
|
248
|
+
add_nonterminal(name_modified)
|
249
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
250
|
+
add_raw_rule(name_modified, [], suffix_star_none)
|
251
|
+
end
|
252
|
+
symb_name = name_modified
|
253
|
+
|
254
|
+
when :exactly_one
|
255
|
+
# Do nothing
|
256
|
+
|
257
|
+
when :one_or_more
|
258
|
+
name_modified = "#{symb_name}#{suffix_plus}"
|
259
|
+
unless symbols.include? name_modified
|
260
|
+
add_nonterminal(name_modified)
|
261
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
262
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
263
|
+
end
|
264
|
+
symb_name = name_modified
|
265
|
+
else
|
266
|
+
raise StandardError, 'Unhandled multiplicity'
|
267
|
+
end
|
268
|
+
|
269
|
+
symb = get_grm_symbol(symb_name)
|
270
|
+
visitor2rhs[aVisitor] << symb
|
271
|
+
end
|
272
|
+
|
273
|
+
def after_sequence_node(aSequenceNode, _visitor)
|
274
|
+
aSequenceNode.subnodes.each_with_index do |sn, i|
|
275
|
+
next if sn.annotation.empty?
|
276
|
+
matching = sn.annotation['match_closest']
|
277
|
+
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def after_grouping_node(aGroupingNode, aVisitor)
|
282
|
+
after_sequence_node(aGroupingNode, aVisitor)
|
283
|
+
symb_name = sequence_name(aGroupingNode)
|
284
|
+
|
285
|
+
unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
|
286
|
+
add_nonterminal(symb_name)
|
287
|
+
rhs = serialize_sequence(aGroupingNode)
|
288
|
+
add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
|
289
|
+
end
|
290
|
+
name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
|
291
|
+
|
292
|
+
case aGroupingNode.repetition
|
293
|
+
when :zero_or_one
|
294
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
295
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
296
|
+
unless symbols.include? name_modified
|
297
|
+
add_nonterminal(name_modified)
|
298
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
|
299
|
+
add_raw_rule(name_modified, [], suffix_qmark_none, true)
|
300
|
+
end
|
301
|
+
|
302
|
+
when :zero_or_more
|
303
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
304
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
305
|
+
unless symbols.include? name_modified
|
306
|
+
add_nonterminal(name_modified)
|
307
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
308
|
+
add_raw_rule(name_modified, '', suffix_star_none)
|
309
|
+
end
|
310
|
+
|
311
|
+
when :exactly_one
|
312
|
+
# Do nothing
|
313
|
+
|
314
|
+
when :one_or_more
|
315
|
+
unless symbols.include? name_modified
|
316
|
+
add_nonterminal(name_modified)
|
317
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
318
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
319
|
+
end
|
320
|
+
else
|
321
|
+
raise StandardError, 'Unhandled multiplicity'
|
322
|
+
end
|
323
|
+
|
324
|
+
unless aGroupingNode.repetition == :exactly_one
|
325
|
+
symb = get_grm_symbol(name_modified)
|
326
|
+
visitor2rhs[aVisitor] << symb
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
# A notification to the builderobject that the programmer
|
331
|
+
# has completed the entry of terminals and production rules
|
332
|
+
def grammar_complete!
|
333
|
+
process_raw_rules()
|
334
|
+
end
|
335
|
+
|
336
|
+
private
|
337
|
+
|
338
|
+
def add_nonterminal(aName)
|
339
|
+
symbols[aName] = Syntax::NonTerminal.new(aName)
|
340
|
+
end
|
341
|
+
|
342
|
+
def simple_rule(aProductionRepr)
|
343
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
344
|
+
lhs = get_grm_symbol(lhs_name)
|
345
|
+
|
346
|
+
if rhs_repr.kind_of?(String)
|
347
|
+
rhs = rhs_repr.strip.scan(/\S+/)
|
348
|
+
else
|
349
|
+
rhs = rhs_repr
|
350
|
+
end
|
351
|
+
|
352
|
+
members = rhs.map do |name|
|
353
|
+
if name.end_with?('?', '*', '+')
|
354
|
+
modifier = name[-1]
|
355
|
+
suffix = modifier2suffix(aModifier)
|
356
|
+
get_grm_symbol("#{name.chop}#{suffix}")
|
357
|
+
else
|
358
|
+
get_grm_symbol(name)
|
359
|
+
end
|
360
|
+
end
|
361
|
+
new_prod = Syntax::Production.new(lhs, members)
|
362
|
+
productions << new_prod
|
363
|
+
end
|
364
|
+
|
365
|
+
productions.last
|
366
|
+
end
|
367
|
+
|
368
|
+
# Add the given grammar symbols.
|
369
|
+
# @param aClass [Class] The class of grammar symbols to instantiate.
|
370
|
+
# @param theSymbols [Array] array of elements are treated as follows:
|
371
|
+
# if the element is already a grammar symbol, then it added as is,
|
372
|
+
# otherwise it is considered as the name of a grammar symbol
|
373
|
+
# of the specified class to build.
|
374
|
+
def build_symbols(aClass, theSymbols)
|
375
|
+
symbs = {}
|
376
|
+
theSymbols.each do |s|
|
377
|
+
new_symbol = build_symbol(aClass, s)
|
378
|
+
symbs[new_symbol.name] = new_symbol
|
379
|
+
end
|
380
|
+
|
381
|
+
symbs
|
382
|
+
end
|
383
|
+
|
384
|
+
# If the argument is already a grammar symbol object then it is
|
385
|
+
# returned as is. Otherwise, the argument is treated as a name
|
386
|
+
# for a new instance of the given class.
|
387
|
+
# @param aClass [Class] The class of grammar symbols to instantiate
|
388
|
+
# @param aSymbolArg [GrmSymbol-like or String]
|
389
|
+
# @return [Array] list of grammar symbols
|
390
|
+
def build_symbol(aClass, aSymbolArg)
|
391
|
+
if aSymbolArg.kind_of?(Syntax::GrmSymbol)
|
392
|
+
aSymbolArg
|
393
|
+
else
|
394
|
+
aClass.new(aSymbolArg)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
# Retrieve the non-terminal symbol with given name.
|
399
|
+
# If it doesn't exist yet, then it is created on the fly.
|
400
|
+
# @param aSymbolName [String] the name of the grammar symbol to retrieve
|
401
|
+
# @return [NonTerminal]
|
402
|
+
def get_grm_symbol(aSymbolName)
|
403
|
+
unless aSymbolName.end_with?('+') && aSymbolName.length > 1
|
404
|
+
name = aSymbolName
|
405
|
+
else
|
406
|
+
name = aSymbolName.chop
|
407
|
+
case aSymbolName[-1]
|
408
|
+
when '+'
|
409
|
+
name_modified = "#{name}#{suffix_plus}"
|
410
|
+
unless symbols.include? name_modified
|
411
|
+
symbols[name_modified] = NonTerminal.new(name_modified)
|
412
|
+
rule(name_modified => [name_modified, name]).as suffix_plus_more
|
413
|
+
rule(name_modified => name).as suffix_plus_last
|
414
|
+
end
|
415
|
+
name = name_modified
|
416
|
+
else
|
417
|
+
err_msg = "Unknown symbol modifier #{aSymbolName[-1]}"
|
418
|
+
raise NotImplementedError, err_msg
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
symbols[name] = Syntax::NonTerminal.new(name) unless symbols.include? name
|
423
|
+
|
424
|
+
symbols[name]
|
425
|
+
end
|
426
|
+
|
427
|
+
def sequence_name(aSequenceNode)
|
428
|
+
subnode_names = +''
|
429
|
+
aSequenceNode.subnodes.each do |subn|
|
430
|
+
case subn
|
431
|
+
when SymbolNode
|
432
|
+
subnode_names << "_#{subn.name}"
|
433
|
+
when SequenceNode
|
434
|
+
subnode_names << "_#{sequence_name(subn)}"
|
435
|
+
end
|
436
|
+
suffix = repetition2suffix(subn.repetition)
|
437
|
+
subnode_names << suffix
|
438
|
+
end
|
439
|
+
|
440
|
+
"seq#{subnode_names}"
|
441
|
+
end
|
442
|
+
|
443
|
+
def node_base_name(aNode)
|
444
|
+
if aNode.kind_of?(SymbolNode)
|
445
|
+
aNode.name
|
446
|
+
else
|
447
|
+
sequence_name(aNode)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
def node_decorated_name(aNdoe)
|
452
|
+
base_name = node_base_name(aNode)
|
453
|
+
suffix = repetition2suffix(aNode.repetition)
|
454
|
+
|
455
|
+
"#{base_name}#{suffix}"
|
456
|
+
end
|
457
|
+
|
458
|
+
def serialize_sequence(aSequenceNode)
|
459
|
+
text = +''
|
460
|
+
aSequenceNode.subnodes.each do |sn|
|
461
|
+
text << ' '
|
462
|
+
case sn
|
463
|
+
when SymbolNode
|
464
|
+
text << sn.name
|
465
|
+
when SequenceNode
|
466
|
+
text << sequence_name(sn)
|
467
|
+
end
|
468
|
+
|
469
|
+
suffix = suffix = repetition2suffix(sn.repetition)
|
470
|
+
text << suffix
|
471
|
+
end
|
472
|
+
|
473
|
+
text.strip
|
474
|
+
end
|
475
|
+
|
476
|
+
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
477
|
+
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
478
|
+
if synthetized.include?(aSymbol)
|
479
|
+
@synthetized[aSymbol] << raw_rule
|
480
|
+
else
|
481
|
+
@synthetized[aSymbol] = [raw_rule]
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
def process_raw_rules
|
486
|
+
until synthetized.empty? do
|
487
|
+
raw_rules = synthetized.delete(synthetized.keys.first)
|
488
|
+
raw_rules.each do |raw|
|
489
|
+
new_prod = nil
|
490
|
+
if raw.simple
|
491
|
+
new_prod = simple_rule(raw.lhs => raw.rhs)
|
492
|
+
else
|
493
|
+
new_prod = rule(raw.lhs => raw.rhs)
|
494
|
+
end
|
495
|
+
new_prod.tag(raw.tag)
|
496
|
+
new_prod.constraints = raw.constraints
|
497
|
+
end
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end # class
|
501
|
+
end # module
|
502
|
+
end # module
|
503
|
+
|
504
|
+
# End of file
|