rley 0.8.06 → 0.8.08

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/appveyor.yml +1 -3
  4. data/examples/NLP/benchmark_pico_en.rb +6 -6
  5. data/examples/NLP/engtagger.rb +6 -6
  6. data/examples/general/calc_iter1/calc_lexer.rb +1 -1
  7. data/examples/general/calc_iter2/calc_lexer.rb +1 -1
  8. data/examples/general/left.rb +1 -1
  9. data/examples/general/right.rb +1 -1
  10. data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
  11. data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
  12. data/examples/tokenizer/run_tokenizer.rb +1 -1
  13. data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
  14. data/lib/rley/constants.rb +1 -1
  15. data/lib/rley/engine.rb +2 -2
  16. data/lib/rley/interface.rb +3 -3
  17. data/lib/rley/lexical/token.rb +1 -1
  18. data/lib/rley/ptree/non_terminal_node.rb +1 -1
  19. data/lib/rley/rgn/all_notation_nodes.rb +5 -0
  20. data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
  21. data/lib/rley/{notation → rgn}/ast_node.rb +12 -11
  22. data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
  23. data/lib/rley/rgn/composite_node.rb +28 -0
  24. data/lib/rley/{notation → rgn}/grammar.rb +1 -1
  25. data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
  26. data/lib/rley/{notation → rgn}/parser.rb +4 -4
  27. data/lib/rley/rgn/repetition_node.rb +62 -0
  28. data/lib/rley/rgn/sequence_node.rb +30 -0
  29. data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
  30. data/lib/rley/{notation → rgn}/tokenizer.rb +1 -1
  31. data/spec/rley/parser/dangling_else_spec.rb +3 -3
  32. data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -48
  33. data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
  34. data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
  35. data/spec/rley/rgn/repetition_node_spec.rb +56 -0
  36. data/spec/rley/rgn/sequence_node_spec.rb +48 -0
  37. data/spec/rley/rgn/symbol_node_spec.rb +33 -0
  38. data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
  39. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  40. data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
  41. metadata +31 -24
  42. data/lib/rley/notation/all_notation_nodes.rb +0 -4
  43. data/lib/rley/notation/grouping_node.rb +0 -23
  44. data/lib/rley/notation/sequence_node.rb +0 -35
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e15e0d34742a0b6c95f3cc7c7d84020d6a486a1e47c2b3fb38bf33c42ff71ea9
4
- data.tar.gz: 2a842d65101d8d833478f70023a4c5263b5dbb24b36799329ef82dffef70be37
3
+ metadata.gz: 55b425fac78001960b95935a8dc8de88d1c0826bea5fba5fae72455f53cb084a
4
+ data.tar.gz: fdf0f9ed39985923134cd3ecc4bdd9325c9aab783437ab1d61a07e333ad3f2eb
5
5
  SHA512:
6
- metadata.gz: 78ee8ff8d729f52f710cf6c52eccb880c78a851b45e6a4df146aa1138f77275557c87ebc5b591896695d3ab8c9072d5b6a7aa32bf600fb14d09658fd834779fe
7
- data.tar.gz: 2c0cfc070c05d0049f65cb938e03162a305fdab926a8e47d9825cf7a36841262a076407a9c6ac86685954a5b3b174137b24bfc7c976fee443d791d82346b1fe4
6
+ metadata.gz: e63b7a5df081fa326b48f7f2e2cedbaf877ed377927e408278e3c8078a30fdcc69714ff234ba98dccc1eb3b05eaea53ebb39846a43e2cb794b883d91ce1c6953
7
+ data.tar.gz: e60bc7b045fe8d7203ddf26ecd603646137d43cbbbe79b533b2088c0f7d15bb46bc1dd58ae4d811ba8d78327fdcb0045cfddec4d708a76968d33c1602bf71cf4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ### 0.8.08 / 2021-10-31
2
+ - Fixes in example files, Refactored module `Notation` renamed `to `RGN´
3
+
4
+
5
+ * [CHANGE] Module `Notation` changed to `RGN`.
6
+
7
+ * [FIX] `examples/general/calc_iter1/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
8
+ * [FIX] `examples/general/calc_iter2/CalcLexer#initialize` now strips whitespaces at end of expression to parse.
9
+ * [FIX] File `examples/general/left.rb` Call to wrong grammar builder
10
+ * [FIX] File `examples/general/right.rb` Call to wrong grammar builder
11
+ * [FIX] File `examples/NLP/benchmark_pico_en.rb` Grammar rules used obsolete rule syntax
12
+ * [FIX] File `examples/NLP/engtagger.rb` Grammar rules used obsolete rule syntax
13
+
1
14
  ### 0.8.06 / 2021-10-06
2
15
  - Added more comment in `/examples/tokenizer` files
3
16
 
data/appveyor.yml CHANGED
@@ -1,15 +1,13 @@
1
1
  version: '{build}'
2
2
  max_jobs: 5
3
3
  environment:
4
- matrix:
4
+ matrix:
5
5
  - Ruby_version: 26-x64
6
6
  - Ruby_version: 25-x64
7
7
  - Ruby_version: 24-x64
8
- - Ruby_version: 23-x64
9
8
  - Ruby_version: 26
10
9
  - Ruby_version: 25
11
10
  - Ruby_version: 24
12
- - Ruby_version: 23
13
11
 
14
12
  install:
15
13
  - set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
@@ -24,13 +24,13 @@ engine.build_grammar do
24
24
  add_terminals('Determiner', 'Preposition')
25
25
 
26
26
  # Here we define the productions (= grammar rules)
27
- rule 'S' => %w[NP VP]
27
+ rule 'S' => 'NP VP'
28
28
  rule 'NP' => 'Proper-Noun'
29
- rule 'NP' => %w[Determiner Noun]
30
- rule 'NP' => %w[Determiner Noun PP]
31
- rule 'VP' => %w[Verb NP]
32
- rule 'VP' => %w[Verb NP PP]
33
- rule 'PP' => %w[Preposition NP]
29
+ rule 'NP' => 'Determiner Noun'
30
+ rule 'NP' => 'Determiner Noun PP'
31
+ rule 'VP' => 'Verb NP'
32
+ rule 'VP' => 'Verb NP PP'
33
+ rule 'PP' => 'Preposition NP'
34
34
  end
35
35
 
36
36
  ########################################
@@ -136,13 +136,13 @@ nlp_engine.build_grammar do
136
136
  add_terminals('DET', 'IN', 'VBD')
137
137
 
138
138
  # Here we define the productions (= grammar rules)
139
- rule 'S' => %w[NP VP]
139
+ rule 'S' => 'NP VP'
140
140
  rule 'NP' => 'NNP'
141
- rule 'NP' => %w[DET NN]
142
- rule 'NP' => %w[DET NN PP]
143
- rule 'VP' => %w[VBD NP]
144
- rule 'VP' => %w[VBD NP PP]
145
- rule 'PP' => %w[IN NP]
141
+ rule 'NP' => 'DET NN'
142
+ rule 'NP' => 'DET NN PP'
143
+ rule 'VP' => 'VBD NP'
144
+ rule 'VP' => 'VBD NP PP'
145
+ rule 'PP' => 'IN NP'
146
146
  end
147
147
 
148
148
  # text = "Yo I'm not done with you"
@@ -24,7 +24,7 @@ class CalcLexer
24
24
  class ScanError < StandardError; end
25
25
 
26
26
  def initialize(source)
27
- @scanner = StringScanner.new(source)
27
+ @scanner = StringScanner.new(source.strip)
28
28
  @lineno = 1
29
29
  end
30
30
 
@@ -30,7 +30,7 @@ class CalcLexer
30
30
  class ScanError < StandardError; end
31
31
 
32
32
  def initialize(source)
33
- @scanner = StringScanner.new(source)
33
+ @scanner = StringScanner.new(source.strip)
34
34
  @lineno = 1
35
35
  end
36
36
 
@@ -4,7 +4,7 @@
4
4
  require 'rley' # Load Rley library
5
5
 
6
6
  # Instantiate a builder object that will build the grammar for us.
7
- builder = Rley::Syntax::GrammarBuilder.new do
7
+ builder = Rley::grammar_builder do
8
8
  # The grammar defines a language that consists in a sequence
9
9
  # of 0 or more dots...
10
10
  add_terminals('DOT')
@@ -4,7 +4,7 @@
4
4
  require 'rley' # Load Rley library
5
5
 
6
6
  # Instantiate a builder object that will build the grammar for us
7
- builder = Rley::Syntax::GrammarBuilder.new do
7
+ builder = Rley::grammar_builder do
8
8
  # The grammar defines a language that consists in a sequence
9
9
  # of 0 or more dots...
10
10
  add_terminals('DOT')
@@ -1,12 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # encoding: UTF-8
4
+
4
5
  #--
5
6
  # This file is automatically generated. Do not modify it.
6
7
  # Generated by: oedipus_lex version 2.5.3.
7
8
  # Source: loxxy_raw_scanner.rex
8
9
  #++
9
10
 
11
+ # As Rubocop shouts about "offences" in the generated code,
12
+ # we disable the detection of most of them...
10
13
  # rubocop: disable Style/MutableConstant
11
14
  # rubocop: disable Layout/SpaceBeforeSemicolon
12
15
  # rubocop: disable Style/Alias
@@ -4,7 +4,7 @@ require 'rley'
4
4
  require_relative 'loxxy_raw_scanner.rex'
5
5
 
6
6
  # Tokenizer for the Lox language that is compatible with a Rley parser.
7
- # It works as an adapter between the parser and the scanner generated
7
+ # It works as an adapter between the parser and the scanner generated
8
8
  # with `oedipus_lex` scanner generator.
9
9
  # The purpose is to transform the output of a `LoxxyRawScanner` instance
10
10
  # into o sequence of tokens in the format expected by Rley.
@@ -60,7 +60,7 @@ class LoxxyTokenizer
60
60
  end
61
61
 
62
62
  # Set the text to tokenize.
63
- # @param source [String] The text to tokenize
63
+ # @param source [String] The text to tokenize
64
64
  def start_with(source)
65
65
  @input = source
66
66
  end
@@ -29,5 +29,5 @@ LOX_END
29
29
 
30
30
  loxxy_tokenizer = LoxxyTokenizer.new(lox_source)
31
31
  tokens = loxxy_tokenizer.tokens
32
- File::open('tokens.yaml', 'w') { |f| YAML.dump(tokens, f) }
32
+ File::open('tokens.yml', 'w') { |f| YAML.dump(tokens, f) }
33
33
  puts 'Done: tokenizer results saved in YAML.'
File without changes
@@ -5,7 +5,7 @@
5
5
 
6
6
  module Rley # Module used as a namespace
7
7
  # The version number of the gem.
8
- Version = '0.8.06'
8
+ Version = '0.8.08'
9
9
 
10
10
  # Brief description of the gem.
11
11
  Description = "Ruby implementation of the Earley's parsing algorithm"
data/lib/rley/engine.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative './notation/grammar_builder'
3
+ require_relative './rgn/grammar_builder'
4
4
  require_relative './parser/gfg_earley_parser'
5
5
  require_relative './parse_tree_visitor'
6
6
  require_relative './parse_forest_visitor'
@@ -61,7 +61,7 @@ module Rley # This module is used as a namespace
61
61
  # add_production('elements' => 'INTEGER')
62
62
  # end
63
63
  def build_grammar(&aBlock)
64
- builder = Rley::Notation::GrammarBuilder.new(&aBlock)
64
+ builder = Rley::RGN::GrammarBuilder.new(&aBlock)
65
65
  @grammar = builder.grammar
66
66
  end
67
67
 
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative './notation/grammar_builder'
3
+ require_relative './rgn/grammar_builder'
4
4
 
5
5
  module Rley # Module used as a namespace
6
6
  # Factory method.
7
7
  # A grammar builder constructs a Rley grammar piece by piece
8
8
  # from DSL instructions in a provided code block.
9
9
  # @param aBlock [Proc] a code block
10
- # @return [Rley::Notation::GrammarBuilder] An object that builds a grammar.
10
+ # @return [Rley::RGN::GrammarBuilder] An object that builds a grammar.
11
11
  def self.grammar_builder(&aBlock)
12
- Rley::Notation::GrammarBuilder.new(&aBlock)
12
+ Rley::RGN::GrammarBuilder.new(&aBlock)
13
13
  end
14
14
  end # module
15
15
 
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
37
37
  # @param theLexeme [String] the lexeme (= piece of text from input)
38
38
  # @param aTerminal [Syntax::Terminal, String]
39
39
  # The terminal symbol corresponding to the lexeme.
40
- # @param aPositiçon [Rley::Lexical::Position] position of the token in source file
40
+ # @param aPosition [Rley::Lexical::Position] position of the token in source file
41
41
  def initialize(theLexeme, aTerminal, aPosition = nil)
42
42
  raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
43
43
 
@@ -27,7 +27,7 @@ module Rley # This module is used as a namespace
27
27
  selfie = super(indentation)
28
28
  prefix = "\n" + (' ' * connector.size * indentation) + connector
29
29
  subnodes_repr = subnodes.reduce(+'') do |sub_result, subnode|
30
- sub_result << prefix + subnode.to_string(indentation + 1)
30
+ sub_result << (prefix + subnode.to_string(indentation + 1))
31
31
  end
32
32
 
33
33
  selfie + subnodes_repr
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'repetition_node'
4
+ require_relative 'sequence_node'
5
+ require_relative 'symbol_node'
@@ -5,7 +5,7 @@ require_relative '../engine'
5
5
  require_relative 'all_notation_nodes'
6
6
 
7
7
  module Rley
8
- module Notation
8
+ module RGN
9
9
  # The purpose of ASTBuilder is to build piece by piece an AST
10
10
  # (Abstract Syntax Tree) from a sequence of input tokens and
11
11
  # visit events produced by walking over a GFGParsing object.
@@ -72,7 +72,7 @@ module Rley
72
72
  end
73
73
 
74
74
  #####################################
75
- # SEMANTIC ACTIONS
75
+ # RGN SEMANTIC ACTIONS
76
76
  #####################################
77
77
 
78
78
  # rule('rhs' => 'member_seq').tag 'sequence'
@@ -80,7 +80,7 @@ module Rley
80
80
  if theChildren[0].size == 1
81
81
  theChildren[0].first
82
82
  else
83
- SequenceNode.new(theChildren[0].first.position, theChildren[0], nil)
83
+ SequenceNode.new(theChildren[0])
84
84
  end
85
85
  end
86
86
 
@@ -96,9 +96,15 @@ module Rley
96
96
 
97
97
  # rule('strait_member' => 'base_member annotation')
98
98
  def reduce_annotated_member(_production, _range, _tokens, theChildren)
99
- theChildren[0].annotation = theChildren[1]
100
-
101
- theChildren[0]
99
+ if theChildren[1].include?('repeat')
100
+ node = RepetitionNode.new(theChildren[0], theChildren[1].fetch('repeat'))
101
+ theChildren[1].delete('repeat')
102
+ theChildren[0].annotation = theChildren[1]
103
+ node
104
+ else
105
+ theChildren[0].annotation = theChildren[1]
106
+ theChildren[0]
107
+ end
102
108
  end
103
109
 
104
110
  # rule('base_member' => 'SYMBOL')
@@ -107,20 +113,21 @@ module Rley
107
113
  end
108
114
 
109
115
  # rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
110
- def reduce_grouping(_production, _range, tokens, theChildren)
116
+ def reduce_grouping(_production, _range, _tokens, theChildren)
111
117
  if theChildren[1].size == 1
112
118
  theChildren[1].first
113
119
  else
114
- rank = theChildren[0].range.high
115
- pos = tokens[rank].position
116
- GroupingNode.new(pos, theChildren[1], nil)
120
+ SequenceNode.new(theChildren[1])
117
121
  end
118
122
  end
119
123
 
120
124
  # rule('quantified_member' => 'base_member quantifier')
121
125
  def reduce_quantified_member(_production, _range, _tokens, theChildren)
122
- theChildren[0].repetition = theChildren[1]
123
- theChildren[0]
126
+ if theChildren == :exactly_one
127
+ theChildren[0]
128
+ else
129
+ RGN::RepetitionNode.new(theChildren[0], theChildren[1])
130
+ end
124
131
  end
125
132
 
126
133
  # rule('quantifier' => 'QUESTION_MARK')
@@ -1,24 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rley
4
- module Notation
4
+ module RGN
5
5
  # Abstract class.
6
6
  # Instances of its subclasses represent nodes of an abstract syntax tree
7
7
  # that is the product of the parse of an input text.
8
8
  class ASTNode
9
- # @return [Rley::Lexical::Position] Position of the entry in the input stream.
10
- attr_reader :position
11
-
12
- # @return [Symbol]
13
- attr_accessor :repetition
14
-
15
9
  # @return [Hash]
16
10
  attr_reader :annotation
17
11
 
18
- # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
19
- def initialize(aPosition)
20
- @position = aPosition
21
- @repetition = :exactly_one
12
+ def initialize
22
13
  @annotation = {}
23
14
  end
24
15
 
@@ -28,6 +19,16 @@ module Rley
28
19
  @annotation = aMapping
29
20
  end
30
21
 
22
+ def annotation_to_text
23
+ map_arr = []
24
+ @annotation.each_pair do |key, val|
25
+ literal = val.kind_of?(String) ? "'#{val}'" : val
26
+ map_arr << "#{key}: #{literal}"
27
+ end
28
+
29
+ "{ #{map_arr.join(', ')} }"
30
+ end
31
+
31
32
  # Notification that the parsing has successfully completed
32
33
  def done!
33
34
  # Default: do nothing ...
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rley
4
- module Notation
4
+ module RGN
5
5
  class ASTVisitor
6
6
  # Link to the top node to visit
7
7
  attr_reader(:top)
@@ -10,7 +10,7 @@ module Rley
10
10
  attr_reader(:subscribers)
11
11
 
12
12
  # Build a visitor for the given top.
13
- # @param aTop [Notation::ASTNode] the parse tree to visit.
13
+ # @param aTop [RGN::ASTNode] the parse tree to visit.
14
14
  def initialize(aTop)
15
15
  raise StandardError if aTop.nil?
16
16
 
@@ -49,26 +49,26 @@ module Rley
49
49
  end
50
50
 
51
51
  # Visit event. The visitor is about to visit a symbol node.
52
- # @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
52
+ # @param aSymbolNode [RGN::SymbolNode] the symbol node to visit
53
53
  def visit_symbol_node(aSymbolNode)
54
54
  broadcast(:before_symbol_node, aSymbolNode, self)
55
55
  broadcast(:after_symbol_node, aSymbolNode, self)
56
56
  end
57
57
 
58
58
  # Visit event. The visitor is about to visit a sequence node.
59
- # @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
59
+ # @param aSequenceNode [RGN::SequenceNode] the sequence node to visit
60
60
  def visit_sequence_node(aSequenceNode)
61
61
  broadcast(:before_sequence_node, aSequenceNode, self)
62
62
  traverse_subnodes(aSequenceNode)
63
63
  broadcast(:after_sequence_node, aSequenceNode, self)
64
64
  end
65
65
 
66
- # Visit event. The visitor is about to visit a grouping node.
67
- # @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
68
- def visit_grouping_node(aGroupingNode)
69
- broadcast(:before_grouping_node, aGroupingNode, self)
70
- traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
71
- broadcast(:after_grouping_node, aGroupingNode, self)
66
+ # Visit event. The visitor is about to visit a repetition node.
67
+ # @param aRepetitionNode [RGN::GroupingNode] the repetition node to visit
68
+ def visit_repetition_node(aRepetitionNode)
69
+ broadcast(:before_repetition_node, aRepetitionNode, self)
70
+ traverse_subnodes(aRepetitionNode) if aRepetitionNode.repetition == :exactly_one
71
+ broadcast(:after_repetition_node, aRepetitionNode, self)
72
72
  end
73
73
 
74
74
  private
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ast_node'
4
+
5
+ module Rley
6
+ module RGN
7
+ # Abstract class for a syntax node that is the parent
8
+ # of one or more subnodes.
9
+ class CompositeNode < ASTNode
10
+ # @return [Array<ASTNode>]
11
+ attr_reader :subnodes
12
+
13
+ # @return [Hash]
14
+ attr_accessor :constraints
15
+
16
+ # @param children [Array<ASTNode>] sequence of children nodes
17
+ def initialize(children)
18
+ super()
19
+ @subnodes = children
20
+ @constraints = []
21
+ end
22
+
23
+ def size
24
+ subnodes.size
25
+ end
26
+ end # class
27
+ end # module
28
+ end # module
@@ -3,7 +3,7 @@
3
3
  require_relative '../syntax/base_grammar_builder'
4
4
 
5
5
  module Rley
6
- module Notation
6
+ module RGN
7
7
  ########################################
8
8
  # Syntax for right-hand side of production rules
9
9
  builder = Rley::Syntax::BaseGrammarBuilder.new do