rley 0.8.06 → 0.8.08
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/appveyor.yml +1 -3
- data/examples/NLP/benchmark_pico_en.rb +6 -6
- data/examples/NLP/engtagger.rb +6 -6
- data/examples/general/calc_iter1/calc_lexer.rb +1 -1
- data/examples/general/calc_iter2/calc_lexer.rb +1 -1
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
- data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
- data/examples/tokenizer/run_tokenizer.rb +1 -1
- data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +3 -3
- data/lib/rley/lexical/token.rb +1 -1
- data/lib/rley/ptree/non_terminal_node.rb +1 -1
- data/lib/rley/rgn/all_notation_nodes.rb +5 -0
- data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
- data/lib/rley/{notation → rgn}/ast_node.rb +12 -11
- data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
- data/lib/rley/rgn/composite_node.rb +28 -0
- data/lib/rley/{notation → rgn}/grammar.rb +1 -1
- data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
- data/lib/rley/{notation → rgn}/parser.rb +4 -4
- data/lib/rley/rgn/repetition_node.rb +62 -0
- data/lib/rley/rgn/sequence_node.rb +30 -0
- data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
- data/lib/rley/{notation → rgn}/tokenizer.rb +1 -1
- data/spec/rley/parser/dangling_else_spec.rb +3 -3
- data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -48
- data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
- data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
- data/spec/rley/rgn/repetition_node_spec.rb +56 -0
- data/spec/rley/rgn/sequence_node_spec.rb +48 -0
- data/spec/rley/rgn/symbol_node_spec.rb +33 -0
- data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
- metadata +31 -24
- data/lib/rley/notation/all_notation_nodes.rb +0 -4
- data/lib/rley/notation/grouping_node.rb +0 -23
- data/lib/rley/notation/sequence_node.rb +0 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55b425fac78001960b95935a8dc8de88d1c0826bea5fba5fae72455f53cb084a
|
4
|
+
data.tar.gz: fdf0f9ed39985923134cd3ecc4bdd9325c9aab783437ab1d61a07e333ad3f2eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e63b7a5df081fa326b48f7f2e2cedbaf877ed377927e408278e3c8078a30fdcc69714ff234ba98dccc1eb3b05eaea53ebb39846a43e2cb794b883d91ce1c6953
|
7
|
+
data.tar.gz: e60bc7b045fe8d7203ddf26ecd603646137d43cbbbe79b533b2088c0f7d15bb46bc1dd58ae4d811ba8d78327fdcb0045cfddec4d708a76968d33c1602bf71cf4
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
### 0.8.08 / 2021-10-31
|
2
|
+
- Fixes in example files, Refactored module `Notation` renamed `to `RGN´
|
3
|
+
|
4
|
+
|
5
|
+
* [CHANGE] Module `Notation` changed to `RGN`.
|
6
|
+
|
7
|
+
* [FIX] `examples/general/calc_iter1/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
|
8
|
+
* [FIX] `examples/general/calc_iter2/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
|
9
|
+
* [FIX] File `examples/general/left.rb` Call to wrong grammar builder
|
10
|
+
* [FIX] File `examples/general/right.rb` Call to wrong grammar builder
|
11
|
+
* [FIX] File `examples/NLP/benchmark_pico_en.rb` Grammar rules used obsolete rule syntax
|
12
|
+
* [FIX] File `examples/NLP/engtagger.rb` Grammar rules used obsolete rule syntax
|
13
|
+
|
1
14
|
### 0.8.06 / 2021-10-06
|
2
15
|
- Added more comment in `/examples/tokenizer` files
|
3
16
|
|
data/appveyor.yml
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
version: '{build}'
|
2
2
|
max_jobs: 5
|
3
3
|
environment:
|
4
|
-
matrix:
|
4
|
+
matrix:
|
5
5
|
- Ruby_version: 26-x64
|
6
6
|
- Ruby_version: 25-x64
|
7
7
|
- Ruby_version: 24-x64
|
8
|
-
- Ruby_version: 23-x64
|
9
8
|
- Ruby_version: 26
|
10
9
|
- Ruby_version: 25
|
11
10
|
- Ruby_version: 24
|
12
|
-
- Ruby_version: 23
|
13
11
|
|
14
12
|
install:
|
15
13
|
- set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
|
@@ -24,13 +24,13 @@ engine.build_grammar do
|
|
24
24
|
add_terminals('Determiner', 'Preposition')
|
25
25
|
|
26
26
|
# Here we define the productions (= grammar rules)
|
27
|
-
rule 'S' =>
|
27
|
+
rule 'S' => 'NP VP'
|
28
28
|
rule 'NP' => 'Proper-Noun'
|
29
|
-
rule 'NP' =>
|
30
|
-
rule 'NP' =>
|
31
|
-
rule 'VP' =>
|
32
|
-
rule 'VP' =>
|
33
|
-
rule 'PP' =>
|
29
|
+
rule 'NP' => 'Determiner Noun'
|
30
|
+
rule 'NP' => 'Determiner Noun PP'
|
31
|
+
rule 'VP' => 'Verb NP'
|
32
|
+
rule 'VP' => 'Verb NP PP'
|
33
|
+
rule 'PP' => 'Preposition NP'
|
34
34
|
end
|
35
35
|
|
36
36
|
########################################
|
data/examples/NLP/engtagger.rb
CHANGED
@@ -136,13 +136,13 @@ nlp_engine.build_grammar do
|
|
136
136
|
add_terminals('DET', 'IN', 'VBD')
|
137
137
|
|
138
138
|
# Here we define the productions (= grammar rules)
|
139
|
-
rule 'S' =>
|
139
|
+
rule 'S' => 'NP VP'
|
140
140
|
rule 'NP' => 'NNP'
|
141
|
-
rule 'NP' =>
|
142
|
-
rule 'NP' =>
|
143
|
-
rule 'VP' =>
|
144
|
-
rule 'VP' =>
|
145
|
-
rule 'PP' =>
|
141
|
+
rule 'NP' => 'DET NN'
|
142
|
+
rule 'NP' => 'DET NN PP'
|
143
|
+
rule 'VP' => 'VBD NP'
|
144
|
+
rule 'VP' => 'VBD NP PP'
|
145
|
+
rule 'PP' => 'IN NP'
|
146
146
|
end
|
147
147
|
|
148
148
|
# text = "Yo I'm not done with you"
|
data/examples/general/left.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'rley' # Load Rley library
|
5
5
|
|
6
6
|
# Instantiate a builder object that will build the grammar for us.
|
7
|
-
builder = Rley::
|
7
|
+
builder = Rley::grammar_builder do
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
data/examples/general/right.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'rley' # Load Rley library
|
5
5
|
|
6
6
|
# Instantiate a builder object that will build the grammar for us
|
7
|
-
builder = Rley::
|
7
|
+
builder = Rley::grammar_builder do
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
@@ -1,12 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# encoding: UTF-8
|
4
|
+
|
4
5
|
#--
|
5
6
|
# This file is automatically generated. Do not modify it.
|
6
7
|
# Generated by: oedipus_lex version 2.5.3.
|
7
8
|
# Source: loxxy_raw_scanner.rex
|
8
9
|
#++
|
9
10
|
|
11
|
+
# As Rubocop shouts about "offences" in the generated code,
|
12
|
+
# we disable the detection of most of them...
|
10
13
|
# rubocop: disable Style/MutableConstant
|
11
14
|
# rubocop: disable Layout/SpaceBeforeSemicolon
|
12
15
|
# rubocop: disable Style/Alias
|
@@ -4,7 +4,7 @@ require 'rley'
|
|
4
4
|
require_relative 'loxxy_raw_scanner.rex'
|
5
5
|
|
6
6
|
# Tokenizer for the Lox language that is compatible with a Rley parser.
|
7
|
-
# It works as an adapter between the parser and the scanner generated
|
7
|
+
# It works as an adapter between the parser and the scanner generated
|
8
8
|
# with `oedipus_lex` scanner generator.
|
9
9
|
# The purpose is to transform the output of a `LoxxyRawScanner` instance
|
10
10
|
# into o sequence of tokens in the format expected by Rley.
|
@@ -60,7 +60,7 @@ class LoxxyTokenizer
|
|
60
60
|
end
|
61
61
|
|
62
62
|
# Set the text to tokenize.
|
63
|
-
# @param source [String] The text to tokenize
|
63
|
+
# @param source [String] The text to tokenize
|
64
64
|
def start_with(source)
|
65
65
|
@input = source
|
66
66
|
end
|
@@ -29,5 +29,5 @@ LOX_END
|
|
29
29
|
|
30
30
|
loxxy_tokenizer = LoxxyTokenizer.new(lox_source)
|
31
31
|
tokens = loxxy_tokenizer.tokens
|
32
|
-
File::open('tokens.
|
32
|
+
File::open('tokens.yml', 'w') { |f| YAML.dump(tokens, f) }
|
33
33
|
puts 'Done: tokenizer results saved in YAML.'
|
File without changes
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/engine.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative './
|
3
|
+
require_relative './rgn/grammar_builder'
|
4
4
|
require_relative './parser/gfg_earley_parser'
|
5
5
|
require_relative './parse_tree_visitor'
|
6
6
|
require_relative './parse_forest_visitor'
|
@@ -61,7 +61,7 @@ module Rley # This module is used as a namespace
|
|
61
61
|
# add_production('elements' => 'INTEGER')
|
62
62
|
# end
|
63
63
|
def build_grammar(&aBlock)
|
64
|
-
builder = Rley::
|
64
|
+
builder = Rley::RGN::GrammarBuilder.new(&aBlock)
|
65
65
|
@grammar = builder.grammar
|
66
66
|
end
|
67
67
|
|
data/lib/rley/interface.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative './
|
3
|
+
require_relative './rgn/grammar_builder'
|
4
4
|
|
5
5
|
module Rley # Module used as a namespace
|
6
6
|
# Factory method.
|
7
7
|
# A grammar builder constructs a Rley grammar piece by piece
|
8
8
|
# from DSL instructions in a provided code block.
|
9
9
|
# @param aBlock [Proc] a code block
|
10
|
-
# @return [Rley::
|
10
|
+
# @return [Rley::RGN::GrammarBuilder] An object that builds a grammar.
|
11
11
|
def self.grammar_builder(&aBlock)
|
12
|
-
Rley::
|
12
|
+
Rley::RGN::GrammarBuilder.new(&aBlock)
|
13
13
|
end
|
14
14
|
end # module
|
15
15
|
|
data/lib/rley/lexical/token.rb
CHANGED
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
37
37
|
# @param theLexeme [String] the lexeme (= piece of text from input)
|
38
38
|
# @param aTerminal [Syntax::Terminal, String]
|
39
39
|
# The terminal symbol corresponding to the lexeme.
|
40
|
-
# @param
|
40
|
+
# @param aPosition [Rley::Lexical::Position] position of the token in source file
|
41
41
|
def initialize(theLexeme, aTerminal, aPosition = nil)
|
42
42
|
raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
|
43
43
|
|
@@ -27,7 +27,7 @@ module Rley # This module is used as a namespace
|
|
27
27
|
selfie = super(indentation)
|
28
28
|
prefix = "\n" + (' ' * connector.size * indentation) + connector
|
29
29
|
subnodes_repr = subnodes.reduce(+'') do |sub_result, subnode|
|
30
|
-
sub_result << prefix + subnode.to_string(indentation + 1)
|
30
|
+
sub_result << (prefix + subnode.to_string(indentation + 1))
|
31
31
|
end
|
32
32
|
|
33
33
|
selfie + subnodes_repr
|
@@ -5,7 +5,7 @@ require_relative '../engine'
|
|
5
5
|
require_relative 'all_notation_nodes'
|
6
6
|
|
7
7
|
module Rley
|
8
|
-
module
|
8
|
+
module RGN
|
9
9
|
# The purpose of ASTBuilder is to build piece by piece an AST
|
10
10
|
# (Abstract Syntax Tree) from a sequence of input tokens and
|
11
11
|
# visit events produced by walking over a GFGParsing object.
|
@@ -72,7 +72,7 @@ module Rley
|
|
72
72
|
end
|
73
73
|
|
74
74
|
#####################################
|
75
|
-
# SEMANTIC ACTIONS
|
75
|
+
# RGN SEMANTIC ACTIONS
|
76
76
|
#####################################
|
77
77
|
|
78
78
|
# rule('rhs' => 'member_seq').tag 'sequence'
|
@@ -80,7 +80,7 @@ module Rley
|
|
80
80
|
if theChildren[0].size == 1
|
81
81
|
theChildren[0].first
|
82
82
|
else
|
83
|
-
SequenceNode.new(theChildren[0]
|
83
|
+
SequenceNode.new(theChildren[0])
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
@@ -96,9 +96,15 @@ module Rley
|
|
96
96
|
|
97
97
|
# rule('strait_member' => 'base_member annotation')
|
98
98
|
def reduce_annotated_member(_production, _range, _tokens, theChildren)
|
99
|
-
|
100
|
-
|
101
|
-
|
99
|
+
if theChildren[1].include?('repeat')
|
100
|
+
node = RepetitionNode.new(theChildren[0], theChildren[1].fetch('repeat'))
|
101
|
+
theChildren[1].delete('repeat')
|
102
|
+
theChildren[0].annotation = theChildren[1]
|
103
|
+
node
|
104
|
+
else
|
105
|
+
theChildren[0].annotation = theChildren[1]
|
106
|
+
theChildren[0]
|
107
|
+
end
|
102
108
|
end
|
103
109
|
|
104
110
|
# rule('base_member' => 'SYMBOL')
|
@@ -107,20 +113,21 @@ module Rley
|
|
107
113
|
end
|
108
114
|
|
109
115
|
# rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
|
110
|
-
def reduce_grouping(_production, _range,
|
116
|
+
def reduce_grouping(_production, _range, _tokens, theChildren)
|
111
117
|
if theChildren[1].size == 1
|
112
118
|
theChildren[1].first
|
113
119
|
else
|
114
|
-
|
115
|
-
pos = tokens[rank].position
|
116
|
-
GroupingNode.new(pos, theChildren[1], nil)
|
120
|
+
SequenceNode.new(theChildren[1])
|
117
121
|
end
|
118
122
|
end
|
119
123
|
|
120
124
|
# rule('quantified_member' => 'base_member quantifier')
|
121
125
|
def reduce_quantified_member(_production, _range, _tokens, theChildren)
|
122
|
-
theChildren
|
123
|
-
|
126
|
+
if theChildren == :exactly_one
|
127
|
+
theChildren[0]
|
128
|
+
else
|
129
|
+
RGN::RepetitionNode.new(theChildren[0], theChildren[1])
|
130
|
+
end
|
124
131
|
end
|
125
132
|
|
126
133
|
# rule('quantifier' => 'QUESTION_MARK')
|
@@ -1,24 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Rley
|
4
|
-
module
|
4
|
+
module RGN
|
5
5
|
# Abstract class.
|
6
6
|
# Instances of its subclasses represent nodes of an abstract syntax tree
|
7
7
|
# that is the product of the parse of an input text.
|
8
8
|
class ASTNode
|
9
|
-
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
10
|
-
attr_reader :position
|
11
|
-
|
12
|
-
# @return [Symbol]
|
13
|
-
attr_accessor :repetition
|
14
|
-
|
15
9
|
# @return [Hash]
|
16
10
|
attr_reader :annotation
|
17
11
|
|
18
|
-
|
19
|
-
def initialize(aPosition)
|
20
|
-
@position = aPosition
|
21
|
-
@repetition = :exactly_one
|
12
|
+
def initialize
|
22
13
|
@annotation = {}
|
23
14
|
end
|
24
15
|
|
@@ -28,6 +19,16 @@ module Rley
|
|
28
19
|
@annotation = aMapping
|
29
20
|
end
|
30
21
|
|
22
|
+
def annotation_to_text
|
23
|
+
map_arr = []
|
24
|
+
@annotation.each_pair do |key, val|
|
25
|
+
literal = val.kind_of?(String) ? "'#{val}'" : val
|
26
|
+
map_arr << "#{key}: #{literal}"
|
27
|
+
end
|
28
|
+
|
29
|
+
"{ #{map_arr.join(', ')} }"
|
30
|
+
end
|
31
|
+
|
31
32
|
# Notification that the parsing has successfully completed
|
32
33
|
def done!
|
33
34
|
# Default: do nothing ...
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Rley
|
4
|
-
module
|
4
|
+
module RGN
|
5
5
|
class ASTVisitor
|
6
6
|
# Link to the top node to visit
|
7
7
|
attr_reader(:top)
|
@@ -10,7 +10,7 @@ module Rley
|
|
10
10
|
attr_reader(:subscribers)
|
11
11
|
|
12
12
|
# Build a visitor for the given top.
|
13
|
-
# @param aTop [
|
13
|
+
# @param aTop [RGN::ASTNode] the parse tree to visit.
|
14
14
|
def initialize(aTop)
|
15
15
|
raise StandardError if aTop.nil?
|
16
16
|
|
@@ -49,26 +49,26 @@ module Rley
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Visit event. The visitor is about to visit a symbol node.
|
52
|
-
# @param aSymbolNode [
|
52
|
+
# @param aSymbolNode [RGN::SymbolNode] the symbol node to visit
|
53
53
|
def visit_symbol_node(aSymbolNode)
|
54
54
|
broadcast(:before_symbol_node, aSymbolNode, self)
|
55
55
|
broadcast(:after_symbol_node, aSymbolNode, self)
|
56
56
|
end
|
57
57
|
|
58
58
|
# Visit event. The visitor is about to visit a sequence node.
|
59
|
-
# @param aSequenceNode [
|
59
|
+
# @param aSequenceNode [RGN::SequenceNode] the sequence node to visit
|
60
60
|
def visit_sequence_node(aSequenceNode)
|
61
61
|
broadcast(:before_sequence_node, aSequenceNode, self)
|
62
62
|
traverse_subnodes(aSequenceNode)
|
63
63
|
broadcast(:after_sequence_node, aSequenceNode, self)
|
64
64
|
end
|
65
65
|
|
66
|
-
# Visit event. The visitor is about to visit a
|
67
|
-
# @param
|
68
|
-
def
|
69
|
-
broadcast(:
|
70
|
-
traverse_subnodes(
|
71
|
-
broadcast(:
|
66
|
+
# Visit event. The visitor is about to visit a repetition node.
|
67
|
+
# @param aRepetitionNode [RGN::GroupingNode] the repetition node to visit
|
68
|
+
def visit_repetition_node(aRepetitionNode)
|
69
|
+
broadcast(:before_repetition_node, aRepetitionNode, self)
|
70
|
+
traverse_subnodes(aRepetitionNode) if aRepetitionNode.repetition == :exactly_one
|
71
|
+
broadcast(:after_repetition_node, aRepetitionNode, self)
|
72
72
|
end
|
73
73
|
|
74
74
|
private
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module RGN
|
7
|
+
# Abstract class for a syntax node that is the parent
|
8
|
+
# of one or more subnodes.
|
9
|
+
class CompositeNode < ASTNode
|
10
|
+
# @return [Array<ASTNode>]
|
11
|
+
attr_reader :subnodes
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
attr_accessor :constraints
|
15
|
+
|
16
|
+
# @param children [Array<ASTNode>] sequence of children nodes
|
17
|
+
def initialize(children)
|
18
|
+
super()
|
19
|
+
@subnodes = children
|
20
|
+
@constraints = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def size
|
24
|
+
subnodes.size
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
end # module
|