rley 0.5.05 → 0.5.06

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/examples/NLP/mini_en_demo.rb +1 -1
  4. data/examples/data_formats/JSON/json_lexer.rb +4 -4
  5. data/examples/general/calc_iter1/calc_lexer.rb +1 -1
  6. data/examples/general/calc_iter2/calc_ast_builder.rb +51 -23
  7. data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -6
  8. data/examples/general/calc_iter2/calc_demo.rb +2 -1
  9. data/examples/general/calc_iter2/calc_grammar.rb +14 -5
  10. data/examples/general/calc_iter2/calc_lexer.rb +14 -5
  11. data/examples/general/calc_iter2/spec/calculator_spec.rb +140 -23
  12. data/lib/rley.rb +1 -1
  13. data/lib/rley/{parser → base}/base_parser.rb +5 -3
  14. data/lib/rley/{parser → base}/dotted_item.rb +27 -16
  15. data/lib/rley/{parser → base}/grm_items_builder.rb +4 -2
  16. data/lib/rley/constants.rb +1 -1
  17. data/lib/rley/formatter/base_formatter.rb +1 -0
  18. data/lib/rley/gfg/item_vertex.rb +16 -5
  19. data/lib/rley/gfg/non_terminal_vertex.rb +5 -0
  20. data/lib/rley/gfg/vertex.rb +1 -2
  21. data/lib/rley/lexical/token.rb +31 -0
  22. data/lib/rley/{tokens → lexical}/token_range.rb +8 -2
  23. data/lib/rley/parser/gfg_earley_parser.rb +2 -2
  24. data/lib/rley/parser/parse_tree_builder.rb +2 -2
  25. data/lib/rley/ptree/parse_tree_node.rb +2 -2
  26. data/lib/rley/sppf/sppf_node.rb +2 -2
  27. data/lib/rley/syntax/grammar.rb +5 -1
  28. data/lib/rley/syntax/grammar_builder.rb +2 -2
  29. data/lib/rley/syntax/terminal.rb +1 -1
  30. data/spec/rley/{parser → base}/dotted_item_spec.rb +2 -2
  31. data/spec/rley/{parser → base}/grm_items_builder_spec.rb +2 -2
  32. data/spec/rley/formatter/asciitree_spec.rb +6 -6
  33. data/spec/rley/formatter/bracket_notation_spec.rb +6 -6
  34. data/spec/rley/formatter/debug_spec.rb +6 -6
  35. data/spec/rley/formatter/json_spec.rb +6 -6
  36. data/spec/rley/gfg/call_edge_spec.rb +2 -2
  37. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  38. data/spec/rley/gfg/item_vertex_spec.rb +9 -9
  39. data/spec/rley/gfg/return_edge_spec.rb +2 -2
  40. data/spec/rley/{tokens → lexical}/token_range_spec.rb +2 -2
  41. data/spec/rley/{tokens → lexical}/token_spec.rb +2 -2
  42. data/spec/rley/parse_forest_visitor_spec.rb +1 -1
  43. data/spec/rley/parse_tree_visitor_spec.rb +6 -6
  44. data/spec/rley/parser/ast_builder_spec.rb +1 -1
  45. data/spec/rley/parser/cst_builder_spec.rb +1 -1
  46. data/spec/rley/parser/error_reason_spec.rb +3 -3
  47. data/spec/rley/parser/gfg_chart_spec.rb +4 -4
  48. data/spec/rley/parser/gfg_earley_parser_spec.rb +3 -3
  49. data/spec/rley/parser/gfg_parsing_spec.rb +5 -5
  50. data/spec/rley/parser/groucho_spec.rb +1 -1
  51. data/spec/rley/parser/parse_entry_set_spec.rb +4 -4
  52. data/spec/rley/parser/parse_entry_spec.rb +4 -4
  53. data/spec/rley/parser/parse_state_spec.rb +7 -7
  54. data/spec/rley/parser/parse_tracer_spec.rb +5 -5
  55. data/spec/rley/parser/parse_walker_factory_spec.rb +1 -1
  56. data/spec/rley/ptree/non_terminal_node_spec.rb +1 -1
  57. data/spec/rley/sppf/alternative_node_spec.rb +4 -4
  58. data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
  59. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  60. data/spec/rley/support/expectation_helper.rb +1 -1
  61. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  62. data/spec/rley/support/grammar_arr_int_helper.rb +3 -3
  63. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  64. data/spec/rley/support/grammar_helper.rb +3 -3
  65. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  66. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  67. metadata +15 -15
  68. data/lib/rley/tokens/token.rb +0 -14
@@ -4,7 +4,7 @@
4
4
 
5
5
  require_relative './rley/constants'
6
6
  require_relative './rley/syntax/grammar_builder'
7
- require_relative './rley/tokens/token'
7
+ require_relative './rley/lexical/token'
8
8
  require_relative './rley/parser/gfg_earley_parser'
9
9
  require_relative './rley/parser/parse_tree_builder'
10
10
  require_relative './rley/parse_tree_visitor'
@@ -1,21 +1,23 @@
1
1
  require_relative '../syntax/grammar'
2
2
  require_relative 'grm_items_builder' # Use mix-in module
3
- require_relative 'parse_tracer'
3
+
4
4
 
5
5
 
6
6
  module Rley # This module is used as a namespace
7
- module Parser # This module is used as a namespace
7
+ module Base # This module is used as a namespace
8
8
  # Abstract class for Earley parser.
9
9
  class BaseParser
10
10
  include GrmItemsBuilder # Mix-in for creating dotted items of grammar
11
11
 
12
12
  # The grammar of the language.
13
+ # @return [Syntax::Grammar]
13
14
  attr_reader(:grammar)
14
15
 
15
16
  # The dotted items/rules for the productions of the grammar
16
17
  attr_reader(:dotted_items)
17
18
 
18
-
19
+ # Constructor.
20
+ # @param [Syntax::Grammar] The grammar of the language.
19
21
  def initialize(aGrammar)
20
22
  @grammar = aGrammar
21
23
  @dotted_items = build_dotted_items(grammar) # Method from mixin
@@ -1,30 +1,33 @@
1
1
  module Rley # This module is used as a namespace
2
- # A dotted item is a parse state for a given production/grammar rule
3
- # It partitions the rhs of the rule in two parts.
4
- # The left part consists of the symbols in the rules that are matched
5
- # by the input tokens.
6
- # The right part consists of symbols that are predicted to match the
7
- # input tokens.
8
- # The terminology stems from the traditional way to visualize the partition
9
- # by using a fat dot character as a separator between the left and right
10
- # parts
11
- # An item with the dot at the beginning (i.e. before any rhs symbol)
12
- # is called a predicted item.
13
- # An item with the dot at the end (i.e. after all rhs symbols)
14
- # is called a reduce item.
15
- # An item with a dot in front of a terminal is called a shift item.
16
- module Parser # This module is used as a namespace
2
+ module Base # This module is used as a namespace
3
+ # A dotted item is a parse state for a given production/grammar rule
4
+ # It partitions the rhs of the rule in two parts.
5
+ # The left part consists of the symbols in the rules that are matched
6
+ # by the input tokens.
7
+ # The right part consists of symbols that are predicted to match the
8
+ # input tokens.
9
+ # The terminology stems from the traditional way to visualize the partition
10
+ # by using a fat dot character as a separator between the left and right
11
+ # parts
12
+ # An item with the dot at the beginning (i.e. before any rhs symbol)
13
+ # is called a predicted item.
14
+ # An item with the dot at the end (i.e. after all rhs symbols)
15
+ # is called a reduce item.
16
+ # An item with a dot in front of a terminal is called a shift item.
17
17
  class DottedItem
18
18
  # Production rule
19
+ # @return [Syntax::Production]
19
20
  attr_reader(:production)
20
21
 
21
22
  # Index of the next symbol (from the rhs) after the 'dot'.
22
23
  # If the dot is at the end of the rhs (i.e.) there is no next
23
24
  # symbol, then the position takes the value -1.
24
25
  # It the rhs is empty, then the position is -2
26
+ # @return [Integer]
25
27
  attr_reader(:position)
26
28
 
27
- # @param aProduction
29
+ # @param aProduction [Syntax::Production]
30
+ # @param aPosition [Integer] Position of the dot in rhs of production.
28
31
  def initialize(aProduction, aPosition)
29
32
  @production = aProduction
30
33
  @position = valid_position(aPosition)
@@ -46,6 +49,7 @@ module Rley # This module is used as a namespace
46
49
  end
47
50
 
48
51
  # Return true if the dot position is at the start of the rhs.
52
+ # @return [Boolean]
49
53
  def at_start?()
50
54
  return position.zero? || position == -2
51
55
  end
@@ -55,17 +59,20 @@ module Rley # This module is used as a namespace
55
59
  alias predicted_item? at_start?
56
60
 
57
61
  # A dotted item is called a reduce item if the dot is at the end.
62
+ # @return [Boolean]
58
63
  def reduce_item?()
59
64
  return position < 0 # Either -1 or -2
60
65
  end
61
66
 
62
67
  # The non-terminal symbol that is on the left-side of the production
68
+ # @return [Syntax::NonTerminal]
63
69
  def lhs()
64
70
  return production.lhs
65
71
  end
66
72
 
67
73
  # Return the symbol before the dot.
68
74
  # nil is returned if the dot is at the start of the rhs
75
+ # @return [Syntax::GrmSymbol, NilClass]
69
76
  def prev_symbol()
70
77
  before_position = prev_position
71
78
  result = if before_position.nil?
@@ -79,12 +86,14 @@ module Rley # This module is used as a namespace
79
86
 
80
87
  # Return the symbol after the dot.
81
88
  # nil is returned if the dot is at the end
89
+ # @return [Syntax::GrmSymbol, NilClass]
82
90
  def next_symbol()
83
91
  return position < 0 ? nil : production.rhs[position]
84
92
  end
85
93
 
86
94
  # Calculate the position of the dot if were moved by
87
95
  # one step on the left.
96
+ # @return [Integer]
88
97
  def prev_position()
89
98
  case position
90
99
  when -2, 0
@@ -100,6 +109,8 @@ module Rley # This module is used as a namespace
100
109
 
101
110
  # Return true if this dotted item has a dot one place
102
111
  # to the right compared to the dotted item argument.
112
+ # @param another [DottedItem]
113
+ # @return [Boolean]
103
114
  def successor_of?(another)
104
115
  return false if production != another.production
105
116
  to_the_left = prev_position
@@ -1,10 +1,12 @@
1
1
  require_relative 'dotted_item'
2
2
 
3
3
  module Rley # This module is used as a namespace
4
- module Parser # This module is used as a namespace
4
+ module Base # This module is used as a namespace
5
5
  # Mix-in module. Builds the dotted items for a given grammar
6
6
  module GrmItemsBuilder
7
- # Build an array of dotted items from the productions in passed grammar.
7
+ # Build an array of dotted items from the productions of passed grammar.
8
+ # @param [Syntax::Grammar]
9
+ # @return [Array<DottedItem>]
8
10
  def build_dotted_items(aGrammar)
9
11
  items = []
10
12
  aGrammar.rules.each do |prod|
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.05'.freeze
6
+ Version = '0.5.06'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -4,6 +4,7 @@ module Rley # This module is used as a namespace
4
4
  # Superclass for parse tree formatters.
5
5
  class BaseFormatter
6
6
  # The IO output stream in which the formatter's result will be sent.
7
+ # @return [IO] The output stream for the formatter.
7
8
  attr_reader(:output)
8
9
 
9
10
  # Constructor.
@@ -4,25 +4,29 @@ require_relative 'vertex'
4
4
 
5
5
  module Rley # This module is used as a namespace
6
6
  module GFG # This module is used as a namespace
7
- # TODO: modify definition
8
- # Represents a specialized vertex in a grammar flow graph
9
- # that is associated to a given dotted item.
7
+ # Specialization of Vertex class. Represents a
8
+ # vertex in a grammar flow graph associated to a given dotted item.
10
9
  # Responsibilities (in addition to inherited ones):
11
10
  # - Know its related non-terminal symbol
12
11
  class ItemVertex < Vertex
13
12
  # Link to the dotted item object
13
+ # @return [DottedItem] The corresponding dotted item
14
14
  attr_reader :dotted_item
15
15
 
16
16
  # Optional link to a "shortcut" edge.
17
17
  # Applicable only if the dotted expects a non-terminal symbol.
18
+ # @return [ShortcutEdge] Optional "shortcut" edge
18
19
  attr_reader :shortcut
19
20
 
21
+ # Constructor.
22
+ # @param aDottedItem [DottedItem] the corresponding dotted item.
20
23
  def initialize(aDottedItem)
21
24
  super()
22
25
  @dotted_item = aDottedItem
23
26
  end
24
27
 
25
28
  # Set the "shortcut" edge.
29
+ # @param aShortcut [ShortcutEdge] the "shortcut" edge.
26
30
  def shortcut=(aShortcut)
27
31
  unless aShortcut.kind_of?(ShortcutEdge)
28
32
  raise StandardError, 'Invalid shortcut argument'
@@ -41,26 +45,33 @@ module Rley # This module is used as a namespace
41
45
  @shortcut = aShortcut
42
46
  end
43
47
 
48
+ # The label of this vertex.
49
+ # It is the same as the label of the corresponding dotted item.
50
+ # @return [String] Label for this vertex
44
51
  def label()
45
52
  return dotted_item.to_s
46
53
  end
47
54
 
48
55
  # Returns true if the dotted item has a dot at the end of the production.
56
+ # @return [Boolean]
49
57
  def complete?()
50
58
  return dotted_item.reduce_item?
51
59
  end
52
60
 
53
- # Return the symbol before the dot else nil.
61
+ # Return the symbol before the dot.
62
+ # @return [Syntax::GrmSymbol, NilClass] Previous symbol otherwise nil.
54
63
  def prev_symbol()
55
64
  return dotted_item.prev_symbol
56
65
  end
57
66
 
58
- # Return the symbol after the dot else nil.
67
+ # Return the symbol after the dot.
68
+ # @return [Syntax::GrmSymbol, NilClass] Next grammar symbol otherwise nil.
59
69
  def next_symbol()
60
70
  return dotted_item.next_symbol
61
71
  end
62
72
 
63
73
  # Return the non-terminal symbol at the left-hand side of the production
74
+ # @return [Syntax::GrmSymbol] The non-terminal symbol at left side of production.
64
75
  def lhs()
65
76
  return dotted_item.lhs
66
77
  end
@@ -2,14 +2,19 @@ require_relative 'vertex'
2
2
 
3
3
  module Rley # This module is used as a namespace
4
4
  module GFG # This module is used as a namespace
5
+ # Abstract class.
5
6
  # Represents a specialized vertex in a grammar flow graph
6
7
  # that is associated to a given non-terminal symbol and
7
8
  # that may have in-degree or out-degree > 1
8
9
  # Responsibilities (in addition to inherited ones):
9
10
  # - Know its related non-terminal symbol
10
11
  class NonTerminalVertex < Vertex
12
+ # The non-terminal symbol associated to the vertex
13
+ # @return [Syntax::NonTerminal]
11
14
  attr_reader :non_terminal
12
15
 
16
+ # Constructor to specialize in subclasses.
17
+ # @param [Syntax::NonTerminal]
13
18
  def initialize(aNonTerminal)
14
19
  super()
15
20
  @non_terminal = aNonTerminal
@@ -6,11 +6,10 @@ module Rley # This module is used as a namespace
6
6
  # - To know its label
7
7
  class Vertex
8
8
  # The edges linking the successor vertices to this one.
9
- # @!attribute [r] edges
10
9
  # @return [Array<Edge>] The edge(s) linking this vertex to successor(s)
11
10
  attr_reader :edges
12
11
 
13
- # Constructor to override.
12
+ # Constructor to extend in subclasses.
14
13
  def initialize()
15
14
  @edges = []
16
15
  end
@@ -0,0 +1,31 @@
1
+ module Rley # This module is used as a namespace
2
+ module Lexical # This module is used as a namespace
3
+ # In Rley, a (lexical) token is an object created by a lexer (tokenizer)
4
+ # and passed to the parser. Such token an object is created when a lexer
5
+ # detects that a sequence of characters(a lexeme) from the input stream
6
+ # is an instance of a terminal grammar symbol.
7
+ # Say, that in a particular language, the lexeme 'foo' is an occurrence
8
+ # of the terminal symbol IDENTIFIER. Then the lexer will return a Token
9
+ # object that states the fact that 'foo' is indeed an IDENTIFIER. Basically,
10
+ # a Token is a pair (lexeme, terminal): it asserts that a given lexeme
11
+ # is an instance of given terminal symbol.
12
+ class Token
13
+ # The sequence of character(s) from the input stream that is an occurrence
14
+ # of the related terminal symbol.
15
+ # @return [String] Input substring that is an instance of the terminal.
16
+ attr_reader(:lexeme)
17
+
18
+ # @return [Syntax::Terminal] Terminal symbol corresponding to the lexeme.
19
+ attr_reader(:terminal)
20
+
21
+ # Constructor.
22
+ # @param theLexeme [String] the lexeme (= piece of text from input)
23
+ # @param aTerminal [Syntax::Terminal] The terminal symbol corresponding to the lexeme.
24
+ def initialize(theLexeme, aTerminal)
25
+ @lexeme = theLexeme
26
+ @terminal = aTerminal
27
+ end
28
+ end # class
29
+ end # module
30
+ end # module
31
+ # End of file
@@ -1,5 +1,5 @@
1
1
  module Rley # This module is used as a namespace
2
- module Tokens # This module is used as a namespace
2
+ module Lexical # This module is used as a namespace
3
3
  # A token range (also called an extent) represents an interval
4
4
  # of token positions that is matched by a given grammar symbol.
5
5
  # For instance consider the expression E: 3 + 11,
@@ -10,17 +10,23 @@ module Rley # This module is used as a namespace
10
10
  # token range can be temporarily undefined (= set to nil)
11
11
  class TokenRange
12
12
  # The index of the lower bound of token range
13
+ # @return [Integer]
13
14
  attr_reader(:low)
14
15
 
15
16
  # The index of the upper bound of token range
17
+ # @return [Integer]
16
18
  attr_reader(:high)
17
19
 
18
- # @param aRangeRep [Hash]
20
+ # Constructor
21
+ # @param aRangeRep [Hash] A hash with keys :low and :high
19
22
  def initialize(aRangeRep)
20
23
  assign_low(aRangeRep)
21
24
  assign_high(aRangeRep)
22
25
  end
23
26
 
27
+ # Test for equality of ranges.
28
+ # @param other [TokenRange, Range, Hash, Array<Integer>]
29
+ # @return [Boolean]
24
30
  def ==(other)
25
31
  return true if object_id == other.object_id
26
32
 
@@ -1,11 +1,11 @@
1
- require_relative 'base_parser'
1
+ require_relative '../base/base_parser'
2
2
  require_relative '../gfg/grm_flow_graph'
3
3
  require_relative 'gfg_parsing'
4
4
 
5
5
  module Rley # This module is used as a namespace
6
6
  module Parser # This module is used as a namespace
7
7
  # Implementation of a parser that uses the Earley parsing algorithm.
8
- class GFGEarleyParser < BaseParser
8
+ class GFGEarleyParser < Base::BaseParser
9
9
  # The Grammar Flow graph for the given grammar
10
10
  attr_reader :gf_graph
11
11
 
@@ -1,4 +1,4 @@
1
- require_relative '../tokens/token_range'
1
+ require_relative '../lexical/token_range'
2
2
  require_relative '../syntax/terminal'
3
3
  require_relative '../syntax/non_terminal'
4
4
  require_relative '../gfg/end_vertex'
@@ -166,7 +166,7 @@ module Rley # This module is used as a namespace
166
166
  # Create a raw node with given range
167
167
  # and push it on top of stack.
168
168
  def push_raw_node(aRange, aSymbol)
169
- raw_node = CSTRawNode.new(Tokens::TokenRange.new(aRange), aSymbol)
169
+ raw_node = CSTRawNode.new(Lexical::TokenRange.new(aRange), aSymbol)
170
170
  stack.push(raw_node)
171
171
  end
172
172
 
@@ -1,4 +1,4 @@
1
- require_relative '../tokens/token_range'
1
+ require_relative '../lexical/token_range'
2
2
 
3
3
  module Rley # This module is used as a namespace
4
4
  module PTree # This module is used as a namespace
@@ -11,7 +11,7 @@ module Rley # This module is used as a namespace
11
11
 
12
12
  def initialize(aSymbol, aRange)
13
13
  @symbol = aSymbol
14
- @range = Tokens::TokenRange.new(aRange)
14
+ @range = Lexical::TokenRange.new(aRange)
15
15
  end
16
16
 
17
17
  # Assign a value from given range to each undefined range bound
@@ -1,4 +1,4 @@
1
- require_relative '../tokens/token_range'
1
+ require_relative '../lexical/token_range'
2
2
 
3
3
  module Rley # This module is used as a namespace
4
4
  module SPPF # This module is used as a namespace
@@ -10,7 +10,7 @@ module Rley # This module is used as a namespace
10
10
  attr_reader(:range)
11
11
 
12
12
  def initialize(aRange)
13
- @range = Tokens::TokenRange.new(aRange)
13
+ @range = Lexical::TokenRange.new(aRange)
14
14
  end
15
15
 
16
16
  # Return the origin (= lower bound of the range
@@ -14,15 +14,19 @@ module Rley # This module is used as a namespace
14
14
  class Grammar
15
15
  # A non-terminal symbol that represents all the possible strings
16
16
  # in the language.
17
+ # @return [NonTerminal] Start symbol of the grammar.
17
18
  attr_reader(:start_symbol)
18
19
 
19
20
  # The list of production rules for the language.
21
+ # @return [Array<Production>] Array of productions for the grammar.
20
22
  attr_reader(:rules)
21
23
 
22
24
  # The list of grammar symbols in the language.
25
+ # @return [Array<GrmSymbol>] The terminal and non-terminal symbols.
23
26
  attr_reader(:symbols)
24
27
 
25
- # A Hash with pairs of the kind: symbol name => grammar symbol
28
+ # A Hash that maps symbol names to their grammar symbols
29
+ # @return [Hash{String => GrmSymbol}]
26
30
  attr_reader(:name2symbol)
27
31
 
28
32
  # @param theProductions [Array<Production>] productions of the grammar.
@@ -1,8 +1,8 @@
1
1
  require 'set'
2
- require_relative 'verbatim_symbol'
3
- require_relative 'literal'
4
2
  require_relative 'terminal'
5
3
  require_relative 'non_terminal'
4
+ require_relative 'literal'
5
+ require_relative 'verbatim_symbol'
6
6
  require_relative 'production'
7
7
  require_relative 'grammar'
8
8
 
@@ -7,7 +7,7 @@ module Rley # This module is used as a namespace
7
7
  class Terminal < GrmSymbol
8
8
 
9
9
  # Constructor.
10
- # aName [String] The name of the grammar symbol.
10
+ # @param aName [String] The name of the grammar symbol.
11
11
  def initialize(aName)
12
12
  super(aName)
13
13
  self.generative = true
@@ -5,10 +5,10 @@ require_relative '../../../lib/rley/syntax/non_terminal'
5
5
  require_relative '../../../lib/rley/syntax/production'
6
6
 
7
7
  # Load the class under test
8
- require_relative '../../../lib/rley/parser/dotted_item'
8
+ require_relative '../../../lib/rley/base/dotted_item'
9
9
 
10
10
  module Rley # Open this namespace to avoid module qualifier prefixes
11
- module Parser # Open this namespace to avoid module qualifier prefixes
11
+ module Base # Open this namespace to avoid module qualifier prefixes
12
12
  describe DottedItem do
13
13
  # Factory method. Builds a production with given left-hand side (LHS)
14
14
  # and given RHS (right-hand side)