rley 0.0.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +15 -0
  2. data/.rspec +1 -0
  3. data/.rubocop.yml +74 -0
  4. data/.ruby-gemset +1 -0
  5. data/.ruby-version +1 -0
  6. data/.simplecov +7 -0
  7. data/.travis.yml +21 -0
  8. data/.yardopts +6 -0
  9. data/CHANGELOG.md +10 -0
  10. data/Gemfile +8 -0
  11. data/LICENSE.txt +19 -0
  12. data/README.md +19 -0
  13. data/Rakefile +32 -0
  14. data/lib/rley/constants.rb +26 -0
  15. data/lib/rley/parser/chart.rb +39 -0
  16. data/lib/rley/parser/dotted_item.rb +80 -0
  17. data/lib/rley/parser/earley_parser.rb +177 -0
  18. data/lib/rley/parser/parse_state.rb +54 -0
  19. data/lib/rley/parser/parsing.rb +101 -0
  20. data/lib/rley/parser/state_set.rb +47 -0
  21. data/lib/rley/parser/token.rb +21 -0
  22. data/lib/rley/syntax/grammar.rb +59 -0
  23. data/lib/rley/syntax/grm_symbol.rb +18 -0
  24. data/lib/rley/syntax/literal.rb +20 -0
  25. data/lib/rley/syntax/non_terminal.rb +18 -0
  26. data/lib/rley/syntax/production.rb +42 -0
  27. data/lib/rley/syntax/symbol_seq.rb +36 -0
  28. data/lib/rley/syntax/terminal.rb +18 -0
  29. data/lib/rley/syntax/verbatim_symbol.rb +21 -0
  30. data/spec/rley/parser/chart_spec.rb +47 -0
  31. data/spec/rley/parser/dotted_item_spec.rb +108 -0
  32. data/spec/rley/parser/earley_parser_spec.rb +271 -0
  33. data/spec/rley/parser/parse_state_spec.rb +99 -0
  34. data/spec/rley/parser/parsing_spec.rb +118 -0
  35. data/spec/rley/parser/state_set_spec.rb +68 -0
  36. data/spec/rley/parser/token_spec.rb +40 -0
  37. data/spec/rley/syntax/grammar_spec.rb +149 -0
  38. data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
  39. data/spec/rley/syntax/literal_spec.rb +32 -0
  40. data/spec/rley/syntax/non_terminal_spec.rb +29 -0
  41. data/spec/rley/syntax/production_spec.rb +50 -0
  42. data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
  43. data/spec/rley/syntax/terminal_spec.rb +29 -0
  44. data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
  45. data/spec/spec_helper.rb +21 -0
  46. metadata +166 -0
@@ -0,0 +1,54 @@
1
+ module Rley # This module is used as a namespace
2
+ module Parser # This module is used as a namespace
3
+
4
+ class ParseState
5
+ attr_reader(:dotted_rule)
6
+
7
+ # the position in the input that matches the beginning of the rhs
8
+ # of the production.
9
+ attr_reader(:origin)
10
+
11
+ def initialize(aDottedRule, theOrigin)
12
+ @dotted_rule = valid_dotted_rule(aDottedRule)
13
+ @origin = theOrigin
14
+ end
15
+
16
+ # Equality comparison. A parse state behaves as a value object.
17
+ def ==(other)
18
+ return true if self.object_id == other.object_id
19
+
20
+ if (dotted_rule == other.dotted_rule) && (origin == other.origin)
21
+ result = true
22
+ else
23
+ result = false
24
+ end
25
+
26
+ return result
27
+ end
28
+
29
+ # Returns true if the dot is at the end of the rhs of the production.
30
+ # In other words, the complete rhs matches the input.
31
+ def complete?()
32
+ return dotted_rule.reduce_item?
33
+ end
34
+
35
+ # Next expected symbol in the production
36
+ def next_symbol()
37
+ return dotted_rule.next_symbol
38
+ end
39
+
40
+ private
41
+
42
+ # Return the validated dotted item(rule)
43
+ def valid_dotted_rule(aDottedRule)
44
+ fail StandardError, 'Dotted item cannot be nil' if aDottedRule.nil?
45
+
46
+ return aDottedRule
47
+ end
48
+
49
+ end # class
50
+
51
+ end # module
52
+ end # module
53
+
54
+ # End of file
@@ -0,0 +1,101 @@
1
+ require_relative 'chart'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Parser # This module is used as a namespace
5
+
6
+ class Parsing
7
+ attr_reader(:chart)
8
+
9
+ # The sequence of input token to parse
10
+ attr_reader(:tokens)
11
+
12
+ def initialize(startDottedRule, theTokens)
13
+ @tokens = theTokens.dup
14
+ @chart = Chart.new(startDottedRule, tokens.size)
15
+ end
16
+
17
+ # Return true if the parse was successful (= input tokens
18
+ # followed the syntax specified by the grammar)
19
+ def success?()
20
+ # Success can be detected as follows:
21
+ # The last chart entry has a parse state
22
+ # that involves the start production and
23
+ # has a dot positioned at the end of its rhs.
24
+
25
+ start_dotted_rule = chart.start_dotted_rule
26
+ start_production = start_dotted_rule.production
27
+ last_chart_entry = chart.state_sets.last
28
+ candidate_states = last_chart_entry.states_for(start_production)
29
+ found = candidate_states.find(&:complete?)
30
+
31
+ return ! found.nil?
32
+ end
33
+
34
+
35
+ # Push a parse state (dotted item + origin) to the
36
+ # chart entry with given index if it isn't yet in the chart entry.
37
+ def push_state(aDottedItem, anOrigin, aChartIndex)
38
+ fail StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
39
+ chart.push_state(aDottedItem, anOrigin, aChartIndex)
40
+ end
41
+
42
+
43
+ # This method is called when a parse state for chart entry at position
44
+ # 'pos' expects a terminal as next symbol.
45
+ # If the input token matches the terminal symbol then:
46
+ # Retrieve all parse states for chart entry at 'aPosition'
47
+ # that have the given terminal as next symbol.
48
+ # For each s of the above states, push to chart entry aPosition + 1
49
+ # a new state like: <next dotted rule, s.origin, aPosition + 1>
50
+ # In other words, we place the dotted rules in the next state set
51
+ # such that the dot appears after terminal.
52
+ # @param Terminal [Terminal] a terminal symbol that
53
+ # immediately follows a dot
54
+ # @param aPosition [Fixnum] position in the input token sequence.
55
+ # @param nextMapping [Proc or Lambda] code to evaluate in order to
56
+ # determine the "next" dotted rule for a given one.
57
+ def scanning(aTerminal, aPosition, &nextMapping)
58
+ curr_token = tokens[aPosition]
59
+
60
+ if curr_token.terminal == aTerminal
61
+ states = states_expecting(aTerminal, aPosition)
62
+ states.each do |s|
63
+ next_item = nextMapping.call(s.dotted_rule)
64
+ push_state(next_item, s.origin, aPosition + 1)
65
+ end
66
+ end
67
+ end
68
+
69
+
70
+
71
+ # This method is called when a parse state at chart entry reaches the end
72
+ # of a production.
73
+ # For every state in chart[aPosition] that is complete (i.e. of the form:
74
+ # { dotted_rule: X -> γ •, origin: j}),
75
+ # Find states s in chart[j] of the form {dotted_rule: Y -> α • X β, origin: i}
76
+ # In other words, rules that predicted the non-terminal X.
77
+ # For each s, add to chart[aPosition] a state of the form
78
+ # { dotted_rule: Y → α X • β, origin: i})
79
+ def completion(aState, aPosition, &nextMapping)
80
+ curr_origin = aState.origin
81
+ curr_lhs = aState.dotted_rule.lhs
82
+ states = states_expecting(curr_lhs, curr_origin)
83
+ states.each do |s|
84
+ next_item = nextMapping.call(s.dotted_rule)
85
+ push_state(next_item, s.origin, aPosition)
86
+ end
87
+ end
88
+
89
+
90
+ # The list of ParseState from the chart entry at given position
91
+ # that expect the given terminal
92
+ def states_expecting(aTerminal, aPosition)
93
+ return chart[aPosition].states_expecting(aTerminal)
94
+ end
95
+
96
+ end # class
97
+
98
+ end # module
99
+ end # module
100
+
101
+ # End of file
@@ -0,0 +1,47 @@
1
+ require 'forwardable' # Delegation
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Parser # This module is used as a namespace
5
+
6
+ class StateSet
7
+ extend Forwardable
8
+ def_delegators :states, :empty?, :size, :first, :each
9
+
10
+ # The set of parse states
11
+ attr_reader(:states)
12
+
13
+
14
+ def initialize()
15
+ @states = []
16
+ end
17
+
18
+ # Append the given state (if it isn't yet in the set)
19
+ # to the list of states
20
+ # @param aState [ParseState] the state to push.
21
+ def push_state(aState)
22
+ @states << aState unless include?(aState)
23
+ end
24
+
25
+ # The list of ParseState that expect the given terminal
26
+ def states_expecting(aTerminal)
27
+ return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
28
+ end
29
+
30
+ # The list of ParseState that involve the given production
31
+ def states_for(aProduction)
32
+ return states.select { |s| s.dotted_rule.production == aProduction }
33
+ end
34
+
35
+ private
36
+
37
+ def include?(aState)
38
+ # TODO: make it better than linear search
39
+ return states.include?(aState)
40
+ end
41
+
42
+ end # class
43
+
44
+ end # module
45
+ end # module
46
+
47
+ # End of file
@@ -0,0 +1,21 @@
1
+ require_relative '../syntax/grammar'
2
+ require_relative 'dotted_item'
3
+
4
+ module Rley # This module is used as a namespace
5
+ module Parser # This module is used as a namespace
6
+
7
+ class Token
8
+ attr_reader(:lexeme)
9
+ attr_reader(:terminal)
10
+
11
+ def initialize(theLexeme, aTerminal)
12
+ @lexeme = theLexeme
13
+ @terminal = aTerminal
14
+ end
15
+
16
+ end # class
17
+
18
+ end # module
19
+ end # module
20
+
21
+ # End of file
@@ -0,0 +1,59 @@
1
+ module Rley # This module is used as a namespace
2
+ module Syntax # This module is used as a namespace
3
+
4
+ # A grammar specifies the syntax of a language.
5
+ # Formally, a grammar has:
6
+ # One start symbol,
7
+ # One or more other production rules,
8
+ # Each production has a rhs that is a sequence of grammar symbols.
9
+ # Grammar symbols are categorized into
10
+ # -terminal symbols
11
+ # -non-terminal symbols
12
+ class Grammar
13
+ # A non-terminal symbol that represents all the possible strings
14
+ # in the language.
15
+ attr_reader(:start_symbol)
16
+
17
+ # The list of production rules for the language.
18
+ attr_reader(:rules)
19
+
20
+ # The list of grammar symbols in the language.
21
+ attr_reader(:symbols)
22
+
23
+ # @param theProduction [Array of Production]
24
+ def initialize(theProductions)
25
+ @rules = []
26
+ @symbols = []
27
+ valid_productions = validate_productions(theProductions)
28
+ # TODO: use topological sorting
29
+ @start_symbol = valid_productions[0].lhs
30
+ valid_productions.each { |prod| add_production(prod) }
31
+ end
32
+
33
+ private
34
+
35
+ # Validation method. Return the validated list of productions
36
+ def validate_productions(theProductions)
37
+ msg = 'A grammar must have at least one production'
38
+ fail StandardError, msg if theProductions.nil? || theProductions.empty?
39
+ return theProductions
40
+ end
41
+
42
+ def add_production(aProduction)
43
+ @rules << aProduction
44
+ the_lhs = aProduction.lhs
45
+ @symbols << the_lhs unless @symbols.include? the_lhs
46
+
47
+ # TODO: remove quadratic execution time
48
+ aProduction.rhs.members.each do |symb|
49
+ next if symbols.include? symb
50
+ @symbols << symb
51
+ end
52
+ end
53
+
54
+ end # class
55
+
56
+ end # module
57
+ end # module
58
+
59
+ # End of file
@@ -0,0 +1,18 @@
1
+ module Rley # This module is used as a namespace
2
+ module Syntax # This module is used as a namespace
3
+
4
+ # Abstract class for grammar symbols.
5
+ # A grammar symbol is an element that appears in grammar rules.
6
+ class GrmSymbol
7
+ # The name of the grammar symbol
8
+ attr_reader(:name)
9
+
10
+ def initialize(aName)
11
+ @name = aName.dup
12
+ end
13
+ end # class
14
+
15
+ end # module
16
+ end # module
17
+
18
+ # End of file
@@ -0,0 +1,20 @@
1
+ require_relative 'terminal' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A literal is terminal symbol that matches a lexical pattern
7
+ class Literal < Terminal
8
+ # The exact text representation of the word.
9
+ attr_reader(:pattern)
10
+
11
+ def initialize(aName, aPattern)
12
+ super(aName)
13
+ @pattern = aPattern
14
+ end
15
+ end # class
16
+
17
+ end # module
18
+ end # module
19
+
20
+ # End of file
@@ -0,0 +1,18 @@
1
+ require_relative 'grm_symbol' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A non-terminal symbol (sometimes called a syntactic variable) represents
7
+ # a composition of terminal or non-terminal symbols
8
+ class NonTerminal < GrmSymbol
9
+
10
+ def initialize(aName)
11
+ super(aName)
12
+ end
13
+ end # class
14
+
15
+ end # module
16
+ end # module
17
+
18
+ # End of file
@@ -0,0 +1,42 @@
1
+ require_relative 'symbol_seq'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # In a context-free grammar, a production is a rule in which
7
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
8
+ # and the right-hand side (RHS) consists of a sequence of symbols.
9
+ # The symbols in RHS can be either terminal or non-terminal symbols.
10
+ # The rule stipulates that the LHS is equivalent to the RHS,
11
+ # in other words every occurrence of the LHS can be substituted to
12
+ # corresponding RHS.
13
+ # Implementation note: the object id of the production is taken as its LHS.
14
+ class Production
15
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
16
+ attr_reader(:rhs)
17
+
18
+ # The left-hand side of the rule. It must be a non-terminal symbol
19
+ attr_reader(:lhs)
20
+
21
+ # Provide common alternate names to lhs and rhs accessors
22
+
23
+ alias :body :rhs
24
+ alias :head :lhs
25
+
26
+ def initialize(aNonTerminal, theSymbols)
27
+ @lhs = aNonTerminal
28
+ @rhs = SymbolSeq.new(theSymbols)
29
+ end
30
+
31
+ # Is the rhs empty?
32
+ # @ return true if the rhs has no members.
33
+ def empty?()
34
+ return rhs.empty?
35
+ end
36
+
37
+ end # class
38
+
39
+ end # module
40
+ end # module
41
+
42
+ # End of file
@@ -0,0 +1,36 @@
1
+ require 'forwardable'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A symbol sequence is a suite of grammar symbols
7
+ class SymbolSeq
8
+ extend Forwardable
9
+ def_delegators :@members, :empty?, :size, :[]
10
+
11
+ # The sequence of symbols
12
+ attr_reader(:members)
13
+
14
+ def initialize(theSymbols)
15
+ @members = theSymbols.dup
16
+ end
17
+
18
+ # Equality operator.
19
+ def ==(other)
20
+ return true if other.object_id == self.object_id
21
+
22
+ case other
23
+ when SymbolSeq then result = other.members == self.members
24
+ when Array then result = other == self.members
25
+ else
26
+ fail StandardError, "Cannot compare a SymbolSeq with a #{other.class}"
27
+ end
28
+
29
+ return result
30
+ end
31
+ end # class
32
+
33
+ end # module
34
+ end # module
35
+
36
+ # End of file
@@ -0,0 +1,18 @@
1
+ require_relative 'grm_symbol' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A terminal symbol represents a class of words in the language
7
+ # defined the grammar.
8
+ class Terminal < GrmSymbol
9
+
10
+ def initialize(aName)
11
+ super(aName)
12
+ end
13
+ end # class
14
+
15
+ end # module
16
+ end # module
17
+
18
+ # End of file