rley 0.0.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +15 -0
  2. data/.rspec +1 -0
  3. data/.rubocop.yml +74 -0
  4. data/.ruby-gemset +1 -0
  5. data/.ruby-version +1 -0
  6. data/.simplecov +7 -0
  7. data/.travis.yml +21 -0
  8. data/.yardopts +6 -0
  9. data/CHANGELOG.md +10 -0
  10. data/Gemfile +8 -0
  11. data/LICENSE.txt +19 -0
  12. data/README.md +19 -0
  13. data/Rakefile +32 -0
  14. data/lib/rley/constants.rb +26 -0
  15. data/lib/rley/parser/chart.rb +39 -0
  16. data/lib/rley/parser/dotted_item.rb +80 -0
  17. data/lib/rley/parser/earley_parser.rb +177 -0
  18. data/lib/rley/parser/parse_state.rb +54 -0
  19. data/lib/rley/parser/parsing.rb +101 -0
  20. data/lib/rley/parser/state_set.rb +47 -0
  21. data/lib/rley/parser/token.rb +21 -0
  22. data/lib/rley/syntax/grammar.rb +59 -0
  23. data/lib/rley/syntax/grm_symbol.rb +18 -0
  24. data/lib/rley/syntax/literal.rb +20 -0
  25. data/lib/rley/syntax/non_terminal.rb +18 -0
  26. data/lib/rley/syntax/production.rb +42 -0
  27. data/lib/rley/syntax/symbol_seq.rb +36 -0
  28. data/lib/rley/syntax/terminal.rb +18 -0
  29. data/lib/rley/syntax/verbatim_symbol.rb +21 -0
  30. data/spec/rley/parser/chart_spec.rb +47 -0
  31. data/spec/rley/parser/dotted_item_spec.rb +108 -0
  32. data/spec/rley/parser/earley_parser_spec.rb +271 -0
  33. data/spec/rley/parser/parse_state_spec.rb +99 -0
  34. data/spec/rley/parser/parsing_spec.rb +118 -0
  35. data/spec/rley/parser/state_set_spec.rb +68 -0
  36. data/spec/rley/parser/token_spec.rb +40 -0
  37. data/spec/rley/syntax/grammar_spec.rb +149 -0
  38. data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
  39. data/spec/rley/syntax/literal_spec.rb +32 -0
  40. data/spec/rley/syntax/non_terminal_spec.rb +29 -0
  41. data/spec/rley/syntax/production_spec.rb +50 -0
  42. data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
  43. data/spec/rley/syntax/terminal_spec.rb +29 -0
  44. data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
  45. data/spec/spec_helper.rb +21 -0
  46. metadata +166 -0
@@ -0,0 +1,54 @@
1
+ module Rley # This module is used as a namespace
2
+ module Parser # This module is used as a namespace
3
+
4
+ class ParseState
5
+ attr_reader(:dotted_rule)
6
+
7
+ # the position in the input that matches the beginning of the rhs
8
+ # of the production.
9
+ attr_reader(:origin)
10
+
11
+ def initialize(aDottedRule, theOrigin)
12
+ @dotted_rule = valid_dotted_rule(aDottedRule)
13
+ @origin = theOrigin
14
+ end
15
+
16
+ # Equality comparison. A parse state behaves as a value object.
17
+ def ==(other)
18
+ return true if self.object_id == other.object_id
19
+
20
+ if (dotted_rule == other.dotted_rule) && (origin == other.origin)
21
+ result = true
22
+ else
23
+ result = false
24
+ end
25
+
26
+ return result
27
+ end
28
+
29
+ # Returns true if the dot is at the end of the rhs of the production.
30
+ # In other words, the complete rhs matches the input.
31
+ def complete?()
32
+ return dotted_rule.reduce_item?
33
+ end
34
+
35
+ # Next expected symbol in the production
36
+ def next_symbol()
37
+ return dotted_rule.next_symbol
38
+ end
39
+
40
+ private
41
+
42
+ # Return the validated dotted item(rule)
43
+ def valid_dotted_rule(aDottedRule)
44
+ fail StandardError, 'Dotted item cannot be nil' if aDottedRule.nil?
45
+
46
+ return aDottedRule
47
+ end
48
+
49
+ end # class
50
+
51
+ end # module
52
+ end # module
53
+
54
+ # End of file
@@ -0,0 +1,101 @@
1
+ require_relative 'chart'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Parser # This module is used as a namespace
5
+
6
+ class Parsing
7
+ attr_reader(:chart)
8
+
9
+ # The sequence of input token to parse
10
+ attr_reader(:tokens)
11
+
12
+ def initialize(startDottedRule, theTokens)
13
+ @tokens = theTokens.dup
14
+ @chart = Chart.new(startDottedRule, tokens.size)
15
+ end
16
+
17
+ # Return true if the parse was successful (= input tokens
18
+ # followed the syntax specified by the grammar)
19
+ def success?()
20
+ # Success can be detected as follows:
21
+ # The last chart entry has a parse state
22
+ # that involves the start production and
23
+ # has a dot positioned at the end of its rhs.
24
+
25
+ start_dotted_rule = chart.start_dotted_rule
26
+ start_production = start_dotted_rule.production
27
+ last_chart_entry = chart.state_sets.last
28
+ candidate_states = last_chart_entry.states_for(start_production)
29
+ found = candidate_states.find(&:complete?)
30
+
31
+ return ! found.nil?
32
+ end
33
+
34
+
35
+ # Push a parse state (dotted item + origin) to the
36
+ # chart entry with given index if it isn't yet in the chart entry.
37
+ def push_state(aDottedItem, anOrigin, aChartIndex)
38
+ fail StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
39
+ chart.push_state(aDottedItem, anOrigin, aChartIndex)
40
+ end
41
+
42
+
43
+ # This method is called when a parse state for chart entry at position
44
+ # 'pos' expects a terminal as next symbol.
45
+ # If the input token matches the terminal symbol then:
46
+ # Retrieve all parse states for chart entry at 'aPosition'
47
+ # that have the given terminal as next symbol.
48
+ # For each s of the above states, push to chart entry aPosition + 1
49
+ # a new state like: <next dotted rule, s.origin, aPosition + 1>
50
+ # In other words, we place the dotted rules in the next state set
51
+ # such that the dot appears after terminal.
52
+ # @param Terminal [Terminal] a terminal symbol that
53
+ # immediately follows a dot
54
+ # @param aPosition [Fixnum] position in the input token sequence.
55
+ # @param nextMapping [Proc or Lambda] code to evaluate in order to
56
+ # determine the "next" dotted rule for a given one.
57
+ def scanning(aTerminal, aPosition, &nextMapping)
58
+ curr_token = tokens[aPosition]
59
+
60
+ if curr_token.terminal == aTerminal
61
+ states = states_expecting(aTerminal, aPosition)
62
+ states.each do |s|
63
+ next_item = nextMapping.call(s.dotted_rule)
64
+ push_state(next_item, s.origin, aPosition + 1)
65
+ end
66
+ end
67
+ end
68
+
69
+
70
+
71
+ # This method is called when a parse state at chart entry reaches the end
72
+ # of a production.
73
+ # For every state in chart[aPosition] that is complete (i.e. of the form:
74
+ # { dotted_rule: X -> γ •, origin: j}),
75
+ # Find states s in chart[j] of the form {dotted_rule: Y -> α • X β, origin: i}
76
+ # In other words, rules that predicted the non-terminal X.
77
+ # For each s, add to chart[aPosition] a state of the form
78
+ # { dotted_rule: Y → α X • β, origin: i})
79
+ def completion(aState, aPosition, &nextMapping)
80
+ curr_origin = aState.origin
81
+ curr_lhs = aState.dotted_rule.lhs
82
+ states = states_expecting(curr_lhs, curr_origin)
83
+ states.each do |s|
84
+ next_item = nextMapping.call(s.dotted_rule)
85
+ push_state(next_item, s.origin, aPosition)
86
+ end
87
+ end
88
+
89
+
90
+ # The list of ParseState from the chart entry at given position
91
+ # that expect the given terminal
92
+ def states_expecting(aTerminal, aPosition)
93
+ return chart[aPosition].states_expecting(aTerminal)
94
+ end
95
+
96
+ end # class
97
+
98
+ end # module
99
+ end # module
100
+
101
+ # End of file
@@ -0,0 +1,47 @@
1
+ require 'forwardable' # Delegation
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Parser # This module is used as a namespace
5
+
6
+ class StateSet
7
+ extend Forwardable
8
+ def_delegators :states, :empty?, :size, :first, :each
9
+
10
+ # The set of parse states
11
+ attr_reader(:states)
12
+
13
+
14
+ def initialize()
15
+ @states = []
16
+ end
17
+
18
+ # Append the given state (if it isn't yet in the set)
19
+ # to the list of states
20
+ # @param aState [ParseState] the state to push.
21
+ def push_state(aState)
22
+ @states << aState unless include?(aState)
23
+ end
24
+
25
+ # The list of ParseState that expect the given terminal
26
+ def states_expecting(aTerminal)
27
+ return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
28
+ end
29
+
30
+ # The list of ParseState that involve the given production
31
+ def states_for(aProduction)
32
+ return states.select { |s| s.dotted_rule.production == aProduction }
33
+ end
34
+
35
+ private
36
+
37
+ def include?(aState)
38
+ # TODO: make it better than linear search
39
+ return states.include?(aState)
40
+ end
41
+
42
+ end # class
43
+
44
+ end # module
45
+ end # module
46
+
47
+ # End of file
@@ -0,0 +1,21 @@
1
+ require_relative '../syntax/grammar'
2
+ require_relative 'dotted_item'
3
+
4
+ module Rley # This module is used as a namespace
5
+ module Parser # This module is used as a namespace
6
+
7
+ class Token
8
+ attr_reader(:lexeme)
9
+ attr_reader(:terminal)
10
+
11
+ def initialize(theLexeme, aTerminal)
12
+ @lexeme = theLexeme
13
+ @terminal = aTerminal
14
+ end
15
+
16
+ end # class
17
+
18
+ end # module
19
+ end # module
20
+
21
+ # End of file
@@ -0,0 +1,59 @@
1
+ module Rley # This module is used as a namespace
2
+ module Syntax # This module is used as a namespace
3
+
4
+ # A grammar specifies the syntax of a language.
5
+ # Formally, a grammar has:
6
+ # One start symbol,
7
+ # One or more other production rules,
8
+ # Each production has a rhs that is a sequence of grammar symbols.
9
+ # Grammar symbols are categorized into
10
+ # -terminal symbols
11
+ # -non-terminal symbols
12
+ class Grammar
13
+ # A non-terminal symbol that represents all the possible strings
14
+ # in the language.
15
+ attr_reader(:start_symbol)
16
+
17
+ # The list of production rules for the language.
18
+ attr_reader(:rules)
19
+
20
+ # The list of grammar symbols in the language.
21
+ attr_reader(:symbols)
22
+
23
+ # @param theProduction [Array of Production]
24
+ def initialize(theProductions)
25
+ @rules = []
26
+ @symbols = []
27
+ valid_productions = validate_productions(theProductions)
28
+ # TODO: use topological sorting
29
+ @start_symbol = valid_productions[0].lhs
30
+ valid_productions.each { |prod| add_production(prod) }
31
+ end
32
+
33
+ private
34
+
35
+ # Validation method. Return the validated list of productions
36
+ def validate_productions(theProductions)
37
+ msg = 'A grammar must have at least one production'
38
+ fail StandardError, msg if theProductions.nil? || theProductions.empty?
39
+ return theProductions
40
+ end
41
+
42
+ def add_production(aProduction)
43
+ @rules << aProduction
44
+ the_lhs = aProduction.lhs
45
+ @symbols << the_lhs unless @symbols.include? the_lhs
46
+
47
+ # TODO: remove quadratic execution time
48
+ aProduction.rhs.members.each do |symb|
49
+ next if symbols.include? symb
50
+ @symbols << symb
51
+ end
52
+ end
53
+
54
+ end # class
55
+
56
+ end # module
57
+ end # module
58
+
59
+ # End of file
@@ -0,0 +1,18 @@
1
+ module Rley # This module is used as a namespace
2
+ module Syntax # This module is used as a namespace
3
+
4
+ # Abstract class for grammar symbols.
5
+ # A grammar symbol is an element that appears in grammar rules.
6
+ class GrmSymbol
7
+ # The name of the grammar symbol
8
+ attr_reader(:name)
9
+
10
+ def initialize(aName)
11
+ @name = aName.dup
12
+ end
13
+ end # class
14
+
15
+ end # module
16
+ end # module
17
+
18
+ # End of file
@@ -0,0 +1,20 @@
1
+ require_relative 'terminal' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A literal is terminal symbol that matches a lexical pattern
7
+ class Literal < Terminal
8
+ # The exact text representation of the word.
9
+ attr_reader(:pattern)
10
+
11
+ def initialize(aName, aPattern)
12
+ super(aName)
13
+ @pattern = aPattern
14
+ end
15
+ end # class
16
+
17
+ end # module
18
+ end # module
19
+
20
+ # End of file
@@ -0,0 +1,18 @@
1
+ require_relative 'grm_symbol' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A non-terminal symbol (sometimes called a syntactic variable) represents
7
+ # a composition of terminal or non-terminal symbols
8
+ class NonTerminal < GrmSymbol
9
+
10
+ def initialize(aName)
11
+ super(aName)
12
+ end
13
+ end # class
14
+
15
+ end # module
16
+ end # module
17
+
18
+ # End of file
@@ -0,0 +1,42 @@
1
+ require_relative 'symbol_seq'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # In a context-free grammar, a production is a rule in which
7
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
8
+ # and the right-hand side (RHS) consists of a sequence of symbols.
9
+ # The symbols in RHS can be either terminal or non-terminal symbols.
10
+ # The rule stipulates that the LHS is equivalent to the RHS,
11
+ # in other words every occurrence of the LHS can be substituted to
12
+ # corresponding RHS.
13
+ # Implementation note: the object id of the production is taken as its LHS.
14
+ class Production
15
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
16
+ attr_reader(:rhs)
17
+
18
+ # The left-hand side of the rule. It must be a non-terminal symbol
19
+ attr_reader(:lhs)
20
+
21
+ # Provide common alternate names to lhs and rhs accessors
22
+
23
+ alias :body :rhs
24
+ alias :head :lhs
25
+
26
+ def initialize(aNonTerminal, theSymbols)
27
+ @lhs = aNonTerminal
28
+ @rhs = SymbolSeq.new(theSymbols)
29
+ end
30
+
31
+ # Is the rhs empty?
32
+ # @ return true if the rhs has no members.
33
+ def empty?()
34
+ return rhs.empty?
35
+ end
36
+
37
+ end # class
38
+
39
+ end # module
40
+ end # module
41
+
42
+ # End of file
@@ -0,0 +1,36 @@
1
+ require 'forwardable'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A symbol sequence is a suite of grammar symbols
7
+ class SymbolSeq
8
+ extend Forwardable
9
+ def_delegators :@members, :empty?, :size, :[]
10
+
11
+ # The sequence of symbols
12
+ attr_reader(:members)
13
+
14
+ def initialize(theSymbols)
15
+ @members = theSymbols.dup
16
+ end
17
+
18
+ # Equality operator.
19
+ def ==(other)
20
+ return true if other.object_id == self.object_id
21
+
22
+ case other
23
+ when SymbolSeq then result = other.members == self.members
24
+ when Array then result = other == self.members
25
+ else
26
+ fail StandardError, "Cannot compare a SymbolSeq with a #{other.class}"
27
+ end
28
+
29
+ return result
30
+ end
31
+ end # class
32
+
33
+ end # module
34
+ end # module
35
+
36
+ # End of file
@@ -0,0 +1,18 @@
1
+ require_relative 'grm_symbol' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A terminal symbol represents a class of words in the language
7
+ # defined the grammar.
8
+ class Terminal < GrmSymbol
9
+
10
+ def initialize(aName)
11
+ super(aName)
12
+ end
13
+ end # class
14
+
15
+ end # module
16
+ end # module
17
+
18
+ # End of file