rley 0.8.06 → 0.8.08
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/appveyor.yml +1 -3
- data/examples/NLP/benchmark_pico_en.rb +6 -6
- data/examples/NLP/engtagger.rb +6 -6
- data/examples/general/calc_iter1/calc_lexer.rb +1 -1
- data/examples/general/calc_iter2/calc_lexer.rb +1 -1
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
- data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
- data/examples/tokenizer/run_tokenizer.rb +1 -1
- data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +3 -3
- data/lib/rley/lexical/token.rb +1 -1
- data/lib/rley/ptree/non_terminal_node.rb +1 -1
- data/lib/rley/rgn/all_notation_nodes.rb +5 -0
- data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
- data/lib/rley/{notation → rgn}/ast_node.rb +12 -11
- data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
- data/lib/rley/rgn/composite_node.rb +28 -0
- data/lib/rley/{notation → rgn}/grammar.rb +1 -1
- data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
- data/lib/rley/{notation → rgn}/parser.rb +4 -4
- data/lib/rley/rgn/repetition_node.rb +62 -0
- data/lib/rley/rgn/sequence_node.rb +30 -0
- data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
- data/lib/rley/{notation → rgn}/tokenizer.rb +1 -1
- data/spec/rley/parser/dangling_else_spec.rb +3 -3
- data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -48
- data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
- data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
- data/spec/rley/rgn/repetition_node_spec.rb +56 -0
- data/spec/rley/rgn/sequence_node_spec.rb +48 -0
- data/spec/rley/rgn/symbol_node_spec.rb +33 -0
- data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
- metadata +31 -24
- data/lib/rley/notation/all_notation_nodes.rb +0 -4
- data/lib/rley/notation/grouping_node.rb +0 -23
- data/lib/rley/notation/sequence_node.rb +0 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55b425fac78001960b95935a8dc8de88d1c0826bea5fba5fae72455f53cb084a
|
4
|
+
data.tar.gz: fdf0f9ed39985923134cd3ecc4bdd9325c9aab783437ab1d61a07e333ad3f2eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e63b7a5df081fa326b48f7f2e2cedbaf877ed377927e408278e3c8078a30fdcc69714ff234ba98dccc1eb3b05eaea53ebb39846a43e2cb794b883d91ce1c6953
|
7
|
+
data.tar.gz: e60bc7b045fe8d7203ddf26ecd603646137d43cbbbe79b533b2088c0f7d15bb46bc1dd58ae4d811ba8d78327fdcb0045cfddec4d708a76968d33c1602bf71cf4
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
### 0.8.08 / 2021-10-31
|
2
|
+
- Fixes in example files, Refactored module `Notation` renamed `to `RGN´
|
3
|
+
|
4
|
+
|
5
|
+
* [CHANGE] Module `Notation` changed to `RGN`.
|
6
|
+
|
7
|
+
* [FIX] `examples/general/calc_iter1/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
|
8
|
+
* [FIX] `examples/general/calc_iter2/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
|
9
|
+
* [FIX] File `examples/general/left.rb` Call to wrong grammar builder
|
10
|
+
* [FIX] File `examples/general/right.rb` Call to wrong grammar builder
|
11
|
+
* [FIX] File `examples/NLP/benchmark_pico_en.rb` Grammar rules used obsolete rule syntax
|
12
|
+
* [FIX] File `examples/NLP/engtagger.rb` Grammar rules used obsolete rule syntax
|
13
|
+
|
1
14
|
### 0.8.06 / 2021-10-06
|
2
15
|
- Added more comment in `/examples/tokenizer` files
|
3
16
|
|
data/appveyor.yml
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
version: '{build}'
|
2
2
|
max_jobs: 5
|
3
3
|
environment:
|
4
|
-
matrix:
|
4
|
+
matrix:
|
5
5
|
- Ruby_version: 26-x64
|
6
6
|
- Ruby_version: 25-x64
|
7
7
|
- Ruby_version: 24-x64
|
8
|
-
- Ruby_version: 23-x64
|
9
8
|
- Ruby_version: 26
|
10
9
|
- Ruby_version: 25
|
11
10
|
- Ruby_version: 24
|
12
|
-
- Ruby_version: 23
|
13
11
|
|
14
12
|
install:
|
15
13
|
- set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
|
@@ -24,13 +24,13 @@ engine.build_grammar do
|
|
24
24
|
add_terminals('Determiner', 'Preposition')
|
25
25
|
|
26
26
|
# Here we define the productions (= grammar rules)
|
27
|
-
rule 'S' =>
|
27
|
+
rule 'S' => 'NP VP'
|
28
28
|
rule 'NP' => 'Proper-Noun'
|
29
|
-
rule 'NP' =>
|
30
|
-
rule 'NP' =>
|
31
|
-
rule 'VP' =>
|
32
|
-
rule 'VP' =>
|
33
|
-
rule 'PP' =>
|
29
|
+
rule 'NP' => 'Determiner Noun'
|
30
|
+
rule 'NP' => 'Determiner Noun PP'
|
31
|
+
rule 'VP' => 'Verb NP'
|
32
|
+
rule 'VP' => 'Verb NP PP'
|
33
|
+
rule 'PP' => 'Preposition NP'
|
34
34
|
end
|
35
35
|
|
36
36
|
########################################
|
data/examples/NLP/engtagger.rb
CHANGED
@@ -136,13 +136,13 @@ nlp_engine.build_grammar do
|
|
136
136
|
add_terminals('DET', 'IN', 'VBD')
|
137
137
|
|
138
138
|
# Here we define the productions (= grammar rules)
|
139
|
-
rule 'S' =>
|
139
|
+
rule 'S' => 'NP VP'
|
140
140
|
rule 'NP' => 'NNP'
|
141
|
-
rule 'NP' =>
|
142
|
-
rule 'NP' =>
|
143
|
-
rule 'VP' =>
|
144
|
-
rule 'VP' =>
|
145
|
-
rule 'PP' =>
|
141
|
+
rule 'NP' => 'DET NN'
|
142
|
+
rule 'NP' => 'DET NN PP'
|
143
|
+
rule 'VP' => 'VBD NP'
|
144
|
+
rule 'VP' => 'VBD NP PP'
|
145
|
+
rule 'PP' => 'IN NP'
|
146
146
|
end
|
147
147
|
|
148
148
|
# text = "Yo I'm not done with you"
|
data/examples/general/left.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'rley' # Load Rley library
|
5
5
|
|
6
6
|
# Instantiate a builder object that will build the grammar for us.
|
7
|
-
builder = Rley::
|
7
|
+
builder = Rley::grammar_builder do
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
data/examples/general/right.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'rley' # Load Rley library
|
5
5
|
|
6
6
|
# Instantiate a builder object that will build the grammar for us
|
7
|
-
builder = Rley::
|
7
|
+
builder = Rley::grammar_builder do
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
@@ -1,12 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# encoding: UTF-8
|
4
|
+
|
4
5
|
#--
|
5
6
|
# This file is automatically generated. Do not modify it.
|
6
7
|
# Generated by: oedipus_lex version 2.5.3.
|
7
8
|
# Source: loxxy_raw_scanner.rex
|
8
9
|
#++
|
9
10
|
|
11
|
+
# As Rubocop shouts about "offences" in the generated code,
|
12
|
+
# we disable the detection of most of them...
|
10
13
|
# rubocop: disable Style/MutableConstant
|
11
14
|
# rubocop: disable Layout/SpaceBeforeSemicolon
|
12
15
|
# rubocop: disable Style/Alias
|
@@ -4,7 +4,7 @@ require 'rley'
|
|
4
4
|
require_relative 'loxxy_raw_scanner.rex'
|
5
5
|
|
6
6
|
# Tokenizer for the Lox language that is compatible with a Rley parser.
|
7
|
-
# It works as an adapter between the parser and the scanner generated
|
7
|
+
# It works as an adapter between the parser and the scanner generated
|
8
8
|
# with `oedipus_lex` scanner generator.
|
9
9
|
# The purpose is to transform the output of a `LoxxyRawScanner` instance
|
10
10
|
# into o sequence of tokens in the format expected by Rley.
|
@@ -60,7 +60,7 @@ class LoxxyTokenizer
|
|
60
60
|
end
|
61
61
|
|
62
62
|
# Set the text to tokenize.
|
63
|
-
# @param source [String] The text to tokenize
|
63
|
+
# @param source [String] The text to tokenize
|
64
64
|
def start_with(source)
|
65
65
|
@input = source
|
66
66
|
end
|
@@ -29,5 +29,5 @@ LOX_END
|
|
29
29
|
|
30
30
|
loxxy_tokenizer = LoxxyTokenizer.new(lox_source)
|
31
31
|
tokens = loxxy_tokenizer.tokens
|
32
|
-
File::open('tokens.
|
32
|
+
File::open('tokens.yml', 'w') { |f| YAML.dump(tokens, f) }
|
33
33
|
puts 'Done: tokenizer results saved in YAML.'
|
File without changes
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/engine.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative './
|
3
|
+
require_relative './rgn/grammar_builder'
|
4
4
|
require_relative './parser/gfg_earley_parser'
|
5
5
|
require_relative './parse_tree_visitor'
|
6
6
|
require_relative './parse_forest_visitor'
|
@@ -61,7 +61,7 @@ module Rley # This module is used as a namespace
|
|
61
61
|
# add_production('elements' => 'INTEGER')
|
62
62
|
# end
|
63
63
|
def build_grammar(&aBlock)
|
64
|
-
builder = Rley::
|
64
|
+
builder = Rley::RGN::GrammarBuilder.new(&aBlock)
|
65
65
|
@grammar = builder.grammar
|
66
66
|
end
|
67
67
|
|
data/lib/rley/interface.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative './
|
3
|
+
require_relative './rgn/grammar_builder'
|
4
4
|
|
5
5
|
module Rley # Module used as a namespace
|
6
6
|
# Factory method.
|
7
7
|
# A grammar builder constructs a Rley grammar piece by piece
|
8
8
|
# from DSL instructions in a provided code block.
|
9
9
|
# @param aBlock [Proc] a code block
|
10
|
-
# @return [Rley::
|
10
|
+
# @return [Rley::RGN::GrammarBuilder] An object that builds a grammar.
|
11
11
|
def self.grammar_builder(&aBlock)
|
12
|
-
Rley::
|
12
|
+
Rley::RGN::GrammarBuilder.new(&aBlock)
|
13
13
|
end
|
14
14
|
end # module
|
15
15
|
|
data/lib/rley/lexical/token.rb
CHANGED
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
37
37
|
# @param theLexeme [String] the lexeme (= piece of text from input)
|
38
38
|
# @param aTerminal [Syntax::Terminal, String]
|
39
39
|
# The terminal symbol corresponding to the lexeme.
|
40
|
-
# @param
|
40
|
+
# @param aPosition [Rley::Lexical::Position] position of the token in source file
|
41
41
|
def initialize(theLexeme, aTerminal, aPosition = nil)
|
42
42
|
raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
|
43
43
|
|
@@ -27,7 +27,7 @@ module Rley # This module is used as a namespace
|
|
27
27
|
selfie = super(indentation)
|
28
28
|
prefix = "\n" + (' ' * connector.size * indentation) + connector
|
29
29
|
subnodes_repr = subnodes.reduce(+'') do |sub_result, subnode|
|
30
|
-
sub_result << prefix + subnode.to_string(indentation + 1)
|
30
|
+
sub_result << (prefix + subnode.to_string(indentation + 1))
|
31
31
|
end
|
32
32
|
|
33
33
|
selfie + subnodes_repr
|
@@ -5,7 +5,7 @@ require_relative '../engine'
|
|
5
5
|
require_relative 'all_notation_nodes'
|
6
6
|
|
7
7
|
module Rley
|
8
|
-
module
|
8
|
+
module RGN
|
9
9
|
# The purpose of ASTBuilder is to build piece by piece an AST
|
10
10
|
# (Abstract Syntax Tree) from a sequence of input tokens and
|
11
11
|
# visit events produced by walking over a GFGParsing object.
|
@@ -72,7 +72,7 @@ module Rley
|
|
72
72
|
end
|
73
73
|
|
74
74
|
#####################################
|
75
|
-
# SEMANTIC ACTIONS
|
75
|
+
# RGN SEMANTIC ACTIONS
|
76
76
|
#####################################
|
77
77
|
|
78
78
|
# rule('rhs' => 'member_seq').tag 'sequence'
|
@@ -80,7 +80,7 @@ module Rley
|
|
80
80
|
if theChildren[0].size == 1
|
81
81
|
theChildren[0].first
|
82
82
|
else
|
83
|
-
SequenceNode.new(theChildren[0]
|
83
|
+
SequenceNode.new(theChildren[0])
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
@@ -96,9 +96,15 @@ module Rley
|
|
96
96
|
|
97
97
|
# rule('strait_member' => 'base_member annotation')
|
98
98
|
def reduce_annotated_member(_production, _range, _tokens, theChildren)
|
99
|
-
|
100
|
-
|
101
|
-
|
99
|
+
if theChildren[1].include?('repeat')
|
100
|
+
node = RepetitionNode.new(theChildren[0], theChildren[1].fetch('repeat'))
|
101
|
+
theChildren[1].delete('repeat')
|
102
|
+
theChildren[0].annotation = theChildren[1]
|
103
|
+
node
|
104
|
+
else
|
105
|
+
theChildren[0].annotation = theChildren[1]
|
106
|
+
theChildren[0]
|
107
|
+
end
|
102
108
|
end
|
103
109
|
|
104
110
|
# rule('base_member' => 'SYMBOL')
|
@@ -107,20 +113,21 @@ module Rley
|
|
107
113
|
end
|
108
114
|
|
109
115
|
# rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
|
110
|
-
def reduce_grouping(_production, _range,
|
116
|
+
def reduce_grouping(_production, _range, _tokens, theChildren)
|
111
117
|
if theChildren[1].size == 1
|
112
118
|
theChildren[1].first
|
113
119
|
else
|
114
|
-
|
115
|
-
pos = tokens[rank].position
|
116
|
-
GroupingNode.new(pos, theChildren[1], nil)
|
120
|
+
SequenceNode.new(theChildren[1])
|
117
121
|
end
|
118
122
|
end
|
119
123
|
|
120
124
|
# rule('quantified_member' => 'base_member quantifier')
|
121
125
|
def reduce_quantified_member(_production, _range, _tokens, theChildren)
|
122
|
-
theChildren
|
123
|
-
|
126
|
+
if theChildren == :exactly_one
|
127
|
+
theChildren[0]
|
128
|
+
else
|
129
|
+
RGN::RepetitionNode.new(theChildren[0], theChildren[1])
|
130
|
+
end
|
124
131
|
end
|
125
132
|
|
126
133
|
# rule('quantifier' => 'QUESTION_MARK')
|
@@ -1,24 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Rley
|
4
|
-
module
|
4
|
+
module RGN
|
5
5
|
# Abstract class.
|
6
6
|
# Instances of its subclasses represent nodes of an abstract syntax tree
|
7
7
|
# that is the product of the parse of an input text.
|
8
8
|
class ASTNode
|
9
|
-
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
10
|
-
attr_reader :position
|
11
|
-
|
12
|
-
# @return [Symbol]
|
13
|
-
attr_accessor :repetition
|
14
|
-
|
15
9
|
# @return [Hash]
|
16
10
|
attr_reader :annotation
|
17
11
|
|
18
|
-
|
19
|
-
def initialize(aPosition)
|
20
|
-
@position = aPosition
|
21
|
-
@repetition = :exactly_one
|
12
|
+
def initialize
|
22
13
|
@annotation = {}
|
23
14
|
end
|
24
15
|
|
@@ -28,6 +19,16 @@ module Rley
|
|
28
19
|
@annotation = aMapping
|
29
20
|
end
|
30
21
|
|
22
|
+
def annotation_to_text
|
23
|
+
map_arr = []
|
24
|
+
@annotation.each_pair do |key, val|
|
25
|
+
literal = val.kind_of?(String) ? "'#{val}'" : val
|
26
|
+
map_arr << "#{key}: #{literal}"
|
27
|
+
end
|
28
|
+
|
29
|
+
"{ #{map_arr.join(', ')} }"
|
30
|
+
end
|
31
|
+
|
31
32
|
# Notification that the parsing has successfully completed
|
32
33
|
def done!
|
33
34
|
# Default: do nothing ...
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Rley
|
4
|
-
module
|
4
|
+
module RGN
|
5
5
|
class ASTVisitor
|
6
6
|
# Link to the top node to visit
|
7
7
|
attr_reader(:top)
|
@@ -10,7 +10,7 @@ module Rley
|
|
10
10
|
attr_reader(:subscribers)
|
11
11
|
|
12
12
|
# Build a visitor for the given top.
|
13
|
-
# @param aTop [
|
13
|
+
# @param aTop [RGN::ASTNode] the parse tree to visit.
|
14
14
|
def initialize(aTop)
|
15
15
|
raise StandardError if aTop.nil?
|
16
16
|
|
@@ -49,26 +49,26 @@ module Rley
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Visit event. The visitor is about to visit a symbol node.
|
52
|
-
# @param aSymbolNode [
|
52
|
+
# @param aSymbolNode [RGN::SymbolNode] the symbol node to visit
|
53
53
|
def visit_symbol_node(aSymbolNode)
|
54
54
|
broadcast(:before_symbol_node, aSymbolNode, self)
|
55
55
|
broadcast(:after_symbol_node, aSymbolNode, self)
|
56
56
|
end
|
57
57
|
|
58
58
|
# Visit event. The visitor is about to visit a sequence node.
|
59
|
-
# @param aSequenceNode [
|
59
|
+
# @param aSequenceNode [RGN::SequenceNode] the sequence node to visit
|
60
60
|
def visit_sequence_node(aSequenceNode)
|
61
61
|
broadcast(:before_sequence_node, aSequenceNode, self)
|
62
62
|
traverse_subnodes(aSequenceNode)
|
63
63
|
broadcast(:after_sequence_node, aSequenceNode, self)
|
64
64
|
end
|
65
65
|
|
66
|
-
# Visit event. The visitor is about to visit a
|
67
|
-
# @param
|
68
|
-
def
|
69
|
-
broadcast(:
|
70
|
-
traverse_subnodes(
|
71
|
-
broadcast(:
|
66
|
+
# Visit event. The visitor is about to visit a repetition node.
|
67
|
+
# @param aRepetitionNode [RGN::GroupingNode] the repetition node to visit
|
68
|
+
def visit_repetition_node(aRepetitionNode)
|
69
|
+
broadcast(:before_repetition_node, aRepetitionNode, self)
|
70
|
+
traverse_subnodes(aRepetitionNode) if aRepetitionNode.repetition == :exactly_one
|
71
|
+
broadcast(:after_repetition_node, aRepetitionNode, self)
|
72
72
|
end
|
73
73
|
|
74
74
|
private
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module RGN
|
7
|
+
# Abstract class for a syntax node that is the parent
|
8
|
+
# of one or more subnodes.
|
9
|
+
class CompositeNode < ASTNode
|
10
|
+
# @return [Array<ASTNode>]
|
11
|
+
attr_reader :subnodes
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
attr_accessor :constraints
|
15
|
+
|
16
|
+
# @param children [Array<ASTNode>] sequence of children nodes
|
17
|
+
def initialize(children)
|
18
|
+
super()
|
19
|
+
@subnodes = children
|
20
|
+
@constraints = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def size
|
24
|
+
subnodes.size
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
end # module
|