rley 0.8.06 → 0.8.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/appveyor.yml +1 -3
  4. data/examples/NLP/benchmark_pico_en.rb +6 -6
  5. data/examples/NLP/engtagger.rb +6 -6
  6. data/examples/general/calc_iter1/calc_lexer.rb +1 -1
  7. data/examples/general/calc_iter2/calc_lexer.rb +1 -1
  8. data/examples/general/left.rb +1 -1
  9. data/examples/general/right.rb +1 -1
  10. data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
  11. data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
  12. data/examples/tokenizer/run_tokenizer.rb +1 -1
  13. data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
  14. data/lib/rley/constants.rb +1 -1
  15. data/lib/rley/engine.rb +2 -2
  16. data/lib/rley/interface.rb +3 -3
  17. data/lib/rley/lexical/token.rb +1 -1
  18. data/lib/rley/ptree/non_terminal_node.rb +1 -1
  19. data/lib/rley/rgn/all_notation_nodes.rb +5 -0
  20. data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
  21. data/lib/rley/{notation → rgn}/ast_node.rb +12 -11
  22. data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
  23. data/lib/rley/rgn/composite_node.rb +28 -0
  24. data/lib/rley/{notation → rgn}/grammar.rb +1 -1
  25. data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
  26. data/lib/rley/{notation → rgn}/parser.rb +4 -4
  27. data/lib/rley/rgn/repetition_node.rb +62 -0
  28. data/lib/rley/rgn/sequence_node.rb +30 -0
  29. data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
  30. data/lib/rley/{notation → rgn}/tokenizer.rb +1 -1
  31. data/spec/rley/parser/dangling_else_spec.rb +3 -3
  32. data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -48
  33. data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
  34. data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
  35. data/spec/rley/rgn/repetition_node_spec.rb +56 -0
  36. data/spec/rley/rgn/sequence_node_spec.rb +48 -0
  37. data/spec/rley/rgn/symbol_node_spec.rb +33 -0
  38. data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
  39. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  40. data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
  41. metadata +31 -24
  42. data/lib/rley/notation/all_notation_nodes.rb +0 -4
  43. data/lib/rley/notation/grouping_node.rb +0 -23
  44. data/lib/rley/notation/sequence_node.rb +0 -35
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e15e0d34742a0b6c95f3cc7c7d84020d6a486a1e47c2b3fb38bf33c42ff71ea9
4
- data.tar.gz: 2a842d65101d8d833478f70023a4c5263b5dbb24b36799329ef82dffef70be37
3
+ metadata.gz: 55b425fac78001960b95935a8dc8de88d1c0826bea5fba5fae72455f53cb084a
4
+ data.tar.gz: fdf0f9ed39985923134cd3ecc4bdd9325c9aab783437ab1d61a07e333ad3f2eb
5
5
  SHA512:
6
- metadata.gz: 78ee8ff8d729f52f710cf6c52eccb880c78a851b45e6a4df146aa1138f77275557c87ebc5b591896695d3ab8c9072d5b6a7aa32bf600fb14d09658fd834779fe
7
- data.tar.gz: 2c0cfc070c05d0049f65cb938e03162a305fdab926a8e47d9825cf7a36841262a076407a9c6ac86685954a5b3b174137b24bfc7c976fee443d791d82346b1fe4
6
+ metadata.gz: e63b7a5df081fa326b48f7f2e2cedbaf877ed377927e408278e3c8078a30fdcc69714ff234ba98dccc1eb3b05eaea53ebb39846a43e2cb794b883d91ce1c6953
7
+ data.tar.gz: e60bc7b045fe8d7203ddf26ecd603646137d43cbbbe79b533b2088c0f7d15bb46bc1dd58ae4d811ba8d78327fdcb0045cfddec4d708a76968d33c1602bf71cf4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ### 0.8.08 / 2021-10-31
2
+ - Fixes in example files, Refactored module `Notation` renamed `to `RGN´
3
+
4
+
5
+ * [CHANGE] Module `Notation` changed to `RGN`.
6
+
7
+ * [FIX] `examples/general/calc_iter1/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
8
+ * [FIX] `examples/general/calc_iter2/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
9
+ * [FIX] File `examples/general/left.rb` Call to wrong grammar builder
10
+ * [FIX] File `examples/general/right.rb` Call to wrong grammar builder
11
+ * [FIX] File `examples/NLP/benchmark_pico_en.rb` Grammar rules used obsolete rule syntax
12
+ * [FIX] File `examples/NLP/engtagger.rb` Grammar rules used obsolete rule syntax
13
+
1
14
  ### 0.8.06 / 2021-10-06
2
15
  - Added more comment in `/examples/tokenizer` files
3
16
 
data/appveyor.yml CHANGED
@@ -1,15 +1,13 @@
1
1
  version: '{build}'
2
2
  max_jobs: 5
3
3
  environment:
4
- matrix:
4
+ matrix:
5
5
  - Ruby_version: 26-x64
6
6
  - Ruby_version: 25-x64
7
7
  - Ruby_version: 24-x64
8
- - Ruby_version: 23-x64
9
8
  - Ruby_version: 26
10
9
  - Ruby_version: 25
11
10
  - Ruby_version: 24
12
- - Ruby_version: 23
13
11
 
14
12
  install:
15
13
  - set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
@@ -24,13 +24,13 @@ engine.build_grammar do
24
24
  add_terminals('Determiner', 'Preposition')
25
25
 
26
26
  # Here we define the productions (= grammar rules)
27
- rule 'S' => %w[NP VP]
27
+ rule 'S' => 'NP VP'
28
28
  rule 'NP' => 'Proper-Noun'
29
- rule 'NP' => %w[Determiner Noun]
30
- rule 'NP' => %w[Determiner Noun PP]
31
- rule 'VP' => %w[Verb NP]
32
- rule 'VP' => %w[Verb NP PP]
33
- rule 'PP' => %w[Preposition NP]
29
+ rule 'NP' => 'Determiner Noun'
30
+ rule 'NP' => 'Determiner Noun PP'
31
+ rule 'VP' => 'Verb NP'
32
+ rule 'VP' => 'Verb NP PP'
33
+ rule 'PP' => 'Preposition NP'
34
34
  end
35
35
 
36
36
  ########################################
@@ -136,13 +136,13 @@ nlp_engine.build_grammar do
136
136
  add_terminals('DET', 'IN', 'VBD')
137
137
 
138
138
  # Here we define the productions (= grammar rules)
139
- rule 'S' => %w[NP VP]
139
+ rule 'S' => 'NP VP'
140
140
  rule 'NP' => 'NNP'
141
- rule 'NP' => %w[DET NN]
142
- rule 'NP' => %w[DET NN PP]
143
- rule 'VP' => %w[VBD NP]
144
- rule 'VP' => %w[VBD NP PP]
145
- rule 'PP' => %w[IN NP]
141
+ rule 'NP' => 'DET NN'
142
+ rule 'NP' => 'DET NN PP'
143
+ rule 'VP' => 'VBD NP'
144
+ rule 'VP' => 'VBD NP PP'
145
+ rule 'PP' => 'IN NP'
146
146
  end
147
147
 
148
148
  # text = "Yo I'm not done with you"
@@ -24,7 +24,7 @@ class CalcLexer
24
24
  class ScanError < StandardError; end
25
25
 
26
26
  def initialize(source)
27
- @scanner = StringScanner.new(source)
27
+ @scanner = StringScanner.new(source.strip)
28
28
  @lineno = 1
29
29
  end
30
30
 
@@ -30,7 +30,7 @@ class CalcLexer
30
30
  class ScanError < StandardError; end
31
31
 
32
32
  def initialize(source)
33
- @scanner = StringScanner.new(source)
33
+ @scanner = StringScanner.new(source.strip)
34
34
  @lineno = 1
35
35
  end
36
36
 
@@ -4,7 +4,7 @@
4
4
  require 'rley' # Load Rley library
5
5
 
6
6
  # Instantiate a builder object that will build the grammar for us.
7
- builder = Rley::Syntax::GrammarBuilder.new do
7
+ builder = Rley::grammar_builder do
8
8
  # The grammar defines a language that consists in a sequence
9
9
  # of 0 or more dots...
10
10
  add_terminals('DOT')
@@ -4,7 +4,7 @@
4
4
  require 'rley' # Load Rley library
5
5
 
6
6
  # Instantiate a builder object that will build the grammar for us
7
- builder = Rley::Syntax::GrammarBuilder.new do
7
+ builder = Rley::grammar_builder do
8
8
  # The grammar defines a language that consists in a sequence
9
9
  # of 0 or more dots...
10
10
  add_terminals('DOT')
@@ -1,12 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # encoding: UTF-8
4
+
4
5
  #--
5
6
  # This file is automatically generated. Do not modify it.
6
7
  # Generated by: oedipus_lex version 2.5.3.
7
8
  # Source: loxxy_raw_scanner.rex
8
9
  #++
9
10
 
11
+ # As Rubocop shouts about "offences" in the generated code,
12
+ # we disable the detection of most of them...
10
13
  # rubocop: disable Style/MutableConstant
11
14
  # rubocop: disable Layout/SpaceBeforeSemicolon
12
15
  # rubocop: disable Style/Alias
@@ -4,7 +4,7 @@ require 'rley'
4
4
  require_relative 'loxxy_raw_scanner.rex'
5
5
 
6
6
  # Tokenizer for the Lox language that is compatible with a Rley parser.
7
- # It works as an adapter between the parser and the scanner generated
7
+ # It works as an adapter between the parser and the scanner generated
8
8
  # with `oedipus_lex` scanner generator.
9
9
  # The purpose is to transform the output of a `LoxxyRawScanner` instance
10
10
  # into o sequence of tokens in the format expected by Rley.
@@ -60,7 +60,7 @@ class LoxxyTokenizer
60
60
  end
61
61
 
62
62
  # Set the text to tokenize.
63
- # @param source [String] The text to tokenize
63
+ # @param source [String] The text to tokenize
64
64
  def start_with(source)
65
65
  @input = source
66
66
  end
@@ -29,5 +29,5 @@ LOX_END
29
29
 
30
30
  loxxy_tokenizer = LoxxyTokenizer.new(lox_source)
31
31
  tokens = loxxy_tokenizer.tokens
32
- File::open('tokens.yaml', 'w') { |f| YAML.dump(tokens, f) }
32
+ File::open('tokens.yml', 'w') { |f| YAML.dump(tokens, f) }
33
33
  puts 'Done: tokenizer results saved in YAML.'
File without changes
@@ -5,7 +5,7 @@
5
5
 
6
6
  module Rley # Module used as a namespace
7
7
  # The version number of the gem.
8
- Version = '0.8.06'
8
+ Version = '0.8.08'
9
9
 
10
10
  # Brief description of the gem.
11
11
  Description = "Ruby implementation of the Earley's parsing algorithm"
data/lib/rley/engine.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative './notation/grammar_builder'
3
+ require_relative './rgn/grammar_builder'
4
4
  require_relative './parser/gfg_earley_parser'
5
5
  require_relative './parse_tree_visitor'
6
6
  require_relative './parse_forest_visitor'
@@ -61,7 +61,7 @@ module Rley # This module is used as a namespace
61
61
  # add_production('elements' => 'INTEGER')
62
62
  # end
63
63
  def build_grammar(&aBlock)
64
- builder = Rley::Notation::GrammarBuilder.new(&aBlock)
64
+ builder = Rley::RGN::GrammarBuilder.new(&aBlock)
65
65
  @grammar = builder.grammar
66
66
  end
67
67
 
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative './notation/grammar_builder'
3
+ require_relative './rgn/grammar_builder'
4
4
 
5
5
  module Rley # Module used as a namespace
6
6
  # Factory method.
7
7
  # A grammar builder constructs a Rley grammar piece by piece
8
8
  # from DSL instructions in a provided code block.
9
9
  # @param aBlock [Proc] a code block
10
- # @return [Rley::Notation::GrammarBuilder] An object that builds a grammar.
10
+ # @return [Rley::RGN::GrammarBuilder] An object that builds a grammar.
11
11
  def self.grammar_builder(&aBlock)
12
- Rley::Notation::GrammarBuilder.new(&aBlock)
12
+ Rley::RGN::GrammarBuilder.new(&aBlock)
13
13
  end
14
14
  end # module
15
15
 
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
37
37
  # @param theLexeme [String] the lexeme (= piece of text from input)
38
38
  # @param aTerminal [Syntax::Terminal, String]
39
39
  # The terminal symbol corresponding to the lexeme.
40
- # @param aPositiçon [Rley::Lexical::Position] position of the token in source file
40
+ # @param aPosition [Rley::Lexical::Position] position of the token in source file
41
41
  def initialize(theLexeme, aTerminal, aPosition = nil)
42
42
  raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
43
43
 
@@ -27,7 +27,7 @@ module Rley # This module is used as a namespace
27
27
  selfie = super(indentation)
28
28
  prefix = "\n" + (' ' * connector.size * indentation) + connector
29
29
  subnodes_repr = subnodes.reduce(+'') do |sub_result, subnode|
30
- sub_result << prefix + subnode.to_string(indentation + 1)
30
+ sub_result << (prefix + subnode.to_string(indentation + 1))
31
31
  end
32
32
 
33
33
  selfie + subnodes_repr
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'repetition_node'
4
+ require_relative 'sequence_node'
5
+ require_relative 'symbol_node'
@@ -5,7 +5,7 @@ require_relative '../engine'
5
5
  require_relative 'all_notation_nodes'
6
6
 
7
7
  module Rley
8
- module Notation
8
+ module RGN
9
9
  # The purpose of ASTBuilder is to build piece by piece an AST
10
10
  # (Abstract Syntax Tree) from a sequence of input tokens and
11
11
  # visit events produced by walking over a GFGParsing object.
@@ -72,7 +72,7 @@ module Rley
72
72
  end
73
73
 
74
74
  #####################################
75
- # SEMANTIC ACTIONS
75
+ # RGN SEMANTIC ACTIONS
76
76
  #####################################
77
77
 
78
78
  # rule('rhs' => 'member_seq').tag 'sequence'
@@ -80,7 +80,7 @@ module Rley
80
80
  if theChildren[0].size == 1
81
81
  theChildren[0].first
82
82
  else
83
- SequenceNode.new(theChildren[0].first.position, theChildren[0], nil)
83
+ SequenceNode.new(theChildren[0])
84
84
  end
85
85
  end
86
86
 
@@ -96,9 +96,15 @@ module Rley
96
96
 
97
97
  # rule('strait_member' => 'base_member annotation')
98
98
  def reduce_annotated_member(_production, _range, _tokens, theChildren)
99
- theChildren[0].annotation = theChildren[1]
100
-
101
- theChildren[0]
99
+ if theChildren[1].include?('repeat')
100
+ node = RepetitionNode.new(theChildren[0], theChildren[1].fetch('repeat'))
101
+ theChildren[1].delete('repeat')
102
+ theChildren[0].annotation = theChildren[1]
103
+ node
104
+ else
105
+ theChildren[0].annotation = theChildren[1]
106
+ theChildren[0]
107
+ end
102
108
  end
103
109
 
104
110
  # rule('base_member' => 'SYMBOL')
@@ -107,20 +113,21 @@ module Rley
107
113
  end
108
114
 
109
115
  # rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
110
- def reduce_grouping(_production, _range, tokens, theChildren)
116
+ def reduce_grouping(_production, _range, _tokens, theChildren)
111
117
  if theChildren[1].size == 1
112
118
  theChildren[1].first
113
119
  else
114
- rank = theChildren[0].range.high
115
- pos = tokens[rank].position
116
- GroupingNode.new(pos, theChildren[1], nil)
120
+ SequenceNode.new(theChildren[1])
117
121
  end
118
122
  end
119
123
 
120
124
  # rule('quantified_member' => 'base_member quantifier')
121
125
  def reduce_quantified_member(_production, _range, _tokens, theChildren)
122
- theChildren[0].repetition = theChildren[1]
123
- theChildren[0]
126
+ if theChildren == :exactly_one
127
+ theChildren[0]
128
+ else
129
+ RGN::RepetitionNode.new(theChildren[0], theChildren[1])
130
+ end
124
131
  end
125
132
 
126
133
  # rule('quantifier' => 'QUESTION_MARK')
@@ -1,24 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rley
4
- module Notation
4
+ module RGN
5
5
  # Abstract class.
6
6
  # Instances of its subclasses represent nodes of an abstract syntax tree
7
7
  # that is the product of the parse of an input text.
8
8
  class ASTNode
9
- # @return [Rley::Lexical::Position] Position of the entry in the input stream.
10
- attr_reader :position
11
-
12
- # @return [Symbol]
13
- attr_accessor :repetition
14
-
15
9
  # @return [Hash]
16
10
  attr_reader :annotation
17
11
 
18
- # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
19
- def initialize(aPosition)
20
- @position = aPosition
21
- @repetition = :exactly_one
12
+ def initialize
22
13
  @annotation = {}
23
14
  end
24
15
 
@@ -28,6 +19,16 @@ module Rley
28
19
  @annotation = aMapping
29
20
  end
30
21
 
22
+ def annotation_to_text
23
+ map_arr = []
24
+ @annotation.each_pair do |key, val|
25
+ literal = val.kind_of?(String) ? "'#{val}'" : val
26
+ map_arr << "#{key}: #{literal}"
27
+ end
28
+
29
+ "{ #{map_arr.join(', ')} }"
30
+ end
31
+
31
32
  # Notification that the parsing has successfully completed
32
33
  def done!
33
34
  # Default: do nothing ...
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rley
4
- module Notation
4
+ module RGN
5
5
  class ASTVisitor
6
6
  # Link to the top node to visit
7
7
  attr_reader(:top)
@@ -10,7 +10,7 @@ module Rley
10
10
  attr_reader(:subscribers)
11
11
 
12
12
  # Build a visitor for the given top.
13
- # @param aTop [Notation::ASTNode] the parse tree to visit.
13
+ # @param aTop [RGN::ASTNode] the parse tree to visit.
14
14
  def initialize(aTop)
15
15
  raise StandardError if aTop.nil?
16
16
 
@@ -49,26 +49,26 @@ module Rley
49
49
  end
50
50
 
51
51
  # Visit event. The visitor is about to visit a symbol node.
52
- # @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
52
+ # @param aSymbolNode [RGN::SymbolNode] the symbol node to visit
53
53
  def visit_symbol_node(aSymbolNode)
54
54
  broadcast(:before_symbol_node, aSymbolNode, self)
55
55
  broadcast(:after_symbol_node, aSymbolNode, self)
56
56
  end
57
57
 
58
58
  # Visit event. The visitor is about to visit a sequence node.
59
- # @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
59
+ # @param aSequenceNode [RGN::SequenceNode] the sequence node to visit
60
60
  def visit_sequence_node(aSequenceNode)
61
61
  broadcast(:before_sequence_node, aSequenceNode, self)
62
62
  traverse_subnodes(aSequenceNode)
63
63
  broadcast(:after_sequence_node, aSequenceNode, self)
64
64
  end
65
65
 
66
- # Visit event. The visitor is about to visit a grouping node.
67
- # @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
68
- def visit_grouping_node(aGroupingNode)
69
- broadcast(:before_grouping_node, aGroupingNode, self)
70
- traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
71
- broadcast(:after_grouping_node, aGroupingNode, self)
66
+ # Visit event. The visitor is about to visit a repetition node.
67
+ # @param aRepetitionNode [RGN::GroupingNode] the repetition node to visit
68
+ def visit_repetition_node(aRepetitionNode)
69
+ broadcast(:before_repetition_node, aRepetitionNode, self)
70
+ traverse_subnodes(aRepetitionNode) if aRepetitionNode.repetition == :exactly_one
71
+ broadcast(:after_repetition_node, aRepetitionNode, self)
72
72
  end
73
73
 
74
74
  private
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ast_node'
4
+
5
+ module Rley
6
+ module RGN
7
+ # Abstract class for a syntax node that is the parent
8
+ # of one or more subnodes.
9
+ class CompositeNode < ASTNode
10
+ # @return [Array<ASTNode>]
11
+ attr_reader :subnodes
12
+
13
+ # @return [Hash]
14
+ attr_accessor :constraints
15
+
16
+ # @param children [Array<ASTNode>] sequence of children nodes
17
+ def initialize(children)
18
+ super()
19
+ @subnodes = children
20
+ @constraints = []
21
+ end
22
+
23
+ def size
24
+ subnodes.size
25
+ end
26
+ end # class
27
+ end # module
28
+ end # module
@@ -3,7 +3,7 @@
3
3
  require_relative '../syntax/base_grammar_builder'
4
4
 
5
5
  module Rley
6
- module Notation
6
+ module RGN
7
7
  ########################################
8
8
  # Syntax for right-hand side of production rules
9
9
  builder = Rley::Syntax::BaseGrammarBuilder.new do