rley 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MDNkYmEzYTFiNmUyMzk2MTgwOTczZmNlMTVjMDU3ZDIzNjI4YWNmYw==
4
+ NTc2NGE4MzYxOTc1ZDUyMDVkYjdmNGFhODllNmEwM2YxMjVkZDk1OQ==
5
5
  data.tar.gz: !binary |-
6
- ZWQ1MmMzMDA2NzcxOTUzM2ZjMDg0Yjg2OTg0MTVhODgzOTQ1OTExNg==
6
+ ZDBkMTdmZWM2NTMwYWMwNDFkNDQ4NGI2YzdkNjk3NDU0ZGExMGYzNA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZjhmMDlmZjk2ZDYwY2EwNDBjZjIxYWMyMDA1ZjQ4MTVmOWE0NjRhZmI4OTg1
10
- MWZiMzQwNzFkOGIyMzJmZTdmMTg2NTRkOGVmM2ViNjg0MzU1MjllODJkZTIx
11
- NzQ4NDA3NmUwYzY4YzBhMmYwODg1MGIzNGJmMDU3NDBjMzJmZDY=
9
+ YWZjYmQxNDNiNjVmMDYyYWI1YzM0YzMyN2VjMzk4ZjUxOTIyMmQxNTE4Y2Y4
10
+ YjM4NTAyNGNhNjhiOWRhNWMyZWVmYzRhYjFjNzhhOWEzMDY1ZTgzMzRiMTVh
11
+ Njg1MDRjMjQ5NTlhYmU3NTk2MDBiMWQyZmI3MDIyMWUwNGM1NjM=
12
12
  data.tar.gz: !binary |-
13
- OTBhM2JiNzRiNzJlNmNmZWRjODJhMzY3MzkyMDdkNDRmNzcyYzFiMGNmYThm
14
- YWVhMTBjMzkzMDI1NzMyNTNiOTkzZGFlODAxYzcyYWQ0Y2QyNGEwZWUxYjBj
15
- YTk0OTViZWVkODk5N2U4MmUxZWJlZTBjY2QxOTNmMDk5YmIxZjU=
13
+ ODZiMmY1ZDMwNTVlNmM3ZGJmZDIzOTAzYjQ1MTFlMmY4OTBlODhhZWZhN2M3
14
+ MjEzYjI0YjZhNTA4NzkzYjJiMzMwY2Y0NzliMGMyZDdlMjI3NjkzMTliYWNh
15
+ YzQ2MGVmNWM1NTA5M2IxYjcxNzliNzhhMzViMDE4OTM3NWY1NjI=
@@ -1,3 +1,8 @@
1
+ ### 0.1.12 / 2014-12-22
2
+ * [FIX] Fixed `Parsing#parse_tree`: code couldn't cope with parse state set containing more
3
+ than one parse state that expected the same symbol.
4
+ * [NEW] Added one more parser example (for very basic arithmetic expression)
5
+
1
6
  ### 0.1.11 / 2014-12-16
2
7
  * [FIX] Fixed all but one YARD (documentation) warnings. Most of them were due to mismatch
3
8
  in method argument names between source code and documentation.
@@ -0,0 +1,85 @@
1
+ # Purpose: to demonstrate how to parse basic arithmetic expressions
2
+ # and render a parse tree
3
+ require 'pp' # TODO remove this dependency
4
+ require 'rley' # Load the gem
5
+
6
+ # Steps to render a parse tree (of a valid parsed input):
7
+ # 1. Define a grammar
8
+ # 2. Create a tokenizer for the language
9
+ # 3. Create a parser for that grammar
10
+ # 4. Tokenize the input
11
+ # 5. Let the parser process the input
12
+ # 6. Generate a parse tree from the parse result
13
+ # 7. Render the parse tree (in JSON)
14
+
15
+ ########################################
16
+ # Step 1. Define a grammar for a very simple arithmetic expression language
17
+ # (based on example in article on Earley's algorithm in Wikipedia)
18
+
19
+ # Let's create the grammar piece by piece
20
+ builder = Rley::Syntax::GrammarBuilder.new
21
+ builder.add_terminals('+', '*', 'integer')
22
+ builder.add_production('P' => 'S')
23
+ builder.add_production('S' => %w(S + M))
24
+ builder.add_production('S' => 'M')
25
+ builder.add_production('M' => %w(M * T))
26
+ builder.add_production('M' => 'T')
27
+ builder.add_production('T' => 'integer')
28
+
29
+ # And now build the grammar...
30
+ grammar_s_expr = builder.grammar
31
+
32
+
33
+ ########################################
34
+ # 2. Create a tokenizer for the language
35
+ # The tokenizer transforms the input into an array of tokens
36
+ def tokenizer(aText, aGrammar)
37
+ tokens = aText.scan(/\S+/).map do |lexeme|
38
+ case lexeme
39
+ when '+', '*'
40
+ terminal = aGrammar.name2symbol[lexeme]
41
+ when /^[-+]?\d+$/
42
+ terminal = aGrammar.name2symbol['integer']
43
+ else
44
+ msg = "Unknown input text '#{lexeme}'"
45
+ fail StandardError, msg
46
+ end
47
+ Rley::Parser::Token.new(lexeme, terminal)
48
+ end
49
+
50
+ return tokens
51
+ end
52
+
53
+ ########################################
54
+ # Step 3. Create a parser for that grammar
55
+ parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
56
+
57
+ ########################################
58
+ # Step 3. Tokenize the input
59
+ valid_input = '2 + 3 * 4'
60
+ tokens = tokenizer(valid_input, grammar_s_expr)
61
+
62
+ ########################################
63
+ # Step 5. Let the parser process the input
64
+ result = parser.parse(tokens)
65
+ puts "Parse successful? #{result.success?}"
66
+ pp result
67
+
68
+
69
+ ########################################
70
+ # Step 6. Generate a parse tree from the parse result
71
+ ptree = result.parse_tree
72
+ =begin
73
+ ########################################
74
+ # Step 7. Render the parse tree (in JSON)
75
+ # Let's create a parse tree visitor
76
+ visitor = Rley::ParseTreeVisitor.new(ptree)
77
+
78
+ #Here we create a renderer object...
79
+ renderer = Rley::Formatter::Json.new(STDOUT)
80
+
81
+ # Now emit the parse tree as JSON on the console output
82
+ puts "JSON rendering of the parse tree for '#{valid_input}' input:"
83
+ renderer.render(visitor)
84
+ =end
85
+ # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.1.11'
6
+ Version = '0.1.12'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -31,6 +31,11 @@ module Rley # This module is used as a namespace
31
31
  return dotted_rule.reduce_item?
32
32
  end
33
33
 
34
+ # Returns true if the dot is at the start of the rhs of the production.
35
+ def predicted?()
36
+ return dotted_rule.predicted_item?
37
+ end
38
+
34
39
  # Next expected symbol in the production
35
40
  def next_symbol()
36
41
  return dotted_rule.next_symbol
@@ -1,6 +1,7 @@
1
1
  require_relative 'chart'
2
2
  require_relative '../ptree/parse_tree'
3
3
 
4
+
4
5
  module Rley # This module is used as a namespace
5
6
  module Parser # This module is used as a namespace
6
7
  class Parsing
@@ -47,19 +48,23 @@ module Rley # This module is used as a namespace
47
48
  parse_state)
48
49
 
49
50
  when Syntax::NonTerminal
50
- # Retrieve complete states
51
+ # Retrieve complete states with curr_symbol as lhs
51
52
  new_states = chart[state_set_index].states_rewriting(curr_symbol)
52
53
  # TODO: make this more robust
53
54
  parse_state = new_states[0]
54
55
  curr_dotted_item = parse_state.dotted_rule
56
+ # Additional check
57
+ if ptree.current_node.symbol != curr_dotted_item.production.lhs
58
+ ptree.step_back(state_set_index)
59
+ end
55
60
  ptree.current_node.range = { low: parse_state.origin }
56
61
  node_range = ptree.current_node.range
57
62
  ptree.add_children(curr_dotted_item.production, node_range)
58
63
  link_node_to_token(ptree, state_set_index - 1)
59
64
 
60
- when NilClass
65
+ when NilClass # No symbol on the left of dot
61
66
  lhs = curr_dotted_item.production.lhs
62
- new_states = chart[state_set_index].states_expecting(lhs)
67
+ new_states = states_expecting(lhs, state_set_index, true)
63
68
  break if new_states.empty?
64
69
  # TODO: make this more robust
65
70
  parse_state = new_states[0]
@@ -98,7 +103,7 @@ module Rley # This module is used as a namespace
98
103
  curr_token = tokens[aPosition]
99
104
  return unless curr_token.terminal == aTerminal
100
105
 
101
- states = states_expecting(aTerminal, aPosition)
106
+ states = states_expecting(aTerminal, aPosition, false)
102
107
  states.each do |s|
103
108
  next_item = nextMapping.call(s.dotted_rule)
104
109
  push_state(next_item, s.origin, aPosition + 1)
@@ -119,7 +124,7 @@ module Rley # This module is used as a namespace
119
124
  def completion(aState, aPosition, &nextMapping)
120
125
  curr_origin = aState.origin
121
126
  curr_lhs = aState.dotted_rule.lhs
122
- states = states_expecting(curr_lhs, curr_origin)
127
+ states = states_expecting(curr_lhs, curr_origin, false)
123
128
  states.each do |s|
124
129
  next_item = nextMapping.call(s.dotted_rule)
125
130
  push_state(next_item, s.origin, aPosition)
@@ -129,8 +134,19 @@ module Rley # This module is used as a namespace
129
134
 
130
135
  # The list of ParseState from the chart entry at given position
131
136
  # that expect the given terminal
132
- def states_expecting(aTerminal, aPosition)
133
- return chart[aPosition].states_expecting(aTerminal)
137
+ def states_expecting(aTerminal, aPosition, toSort)
138
+ expecting = chart[aPosition].states_expecting(aTerminal)
139
+ return expecting if !toSort || expecting.size < 2
140
+
141
+ # Put predicted states ahead
142
+ (predicted, others) = expecting.partition { |state| state.predicted? }
143
+
144
+ # Sort state in reverse order of their origin value
145
+ [predicted, others].each do |set|
146
+ set.sort! { |a,b| b.origin <=> a.origin }
147
+ end
148
+
149
+ return predicted + others
134
150
  end
135
151
 
136
152
  private
@@ -21,13 +21,15 @@ module Rley # This module is used as a namespace
21
21
  @states << aState unless include?(aState)
22
22
  end
23
23
 
24
- # The list of ParseState that expect the given terminal
25
- def states_expecting(aTerminal)
26
- return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
24
+ # The list of ParseState that expect the given symbol.
25
+ # @param aSymbol [GrmSymbol] the expected symbol
26
+ # (=on the right of the dot)
27
+ def states_expecting(aSymbol)
28
+ return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
27
29
  end
28
30
 
29
- # The list of complete ParseState that have the symbol as the lhs of their
30
- # production
31
+ # The list of complete ParseState that have the given non-terminal
32
+ # symbol as the lhs of their production.
31
33
  def states_rewriting(aNonTerm)
32
34
  return states.select do |s|
33
35
  (s.dotted_rule.production.lhs == aNonTerm) && s.complete?
@@ -33,8 +33,19 @@ module Rley # This module is used as a namespace
33
33
  aVisitor.end_visit_ptree(self)
34
34
  end
35
35
 
36
-
36
+ # Add children to the current node.
37
+ # The children nodes correspond to the rhs of the production.
38
+ # Update the range in the children given the passed range object.
39
+ # Pre-condition: the current node refers to the same (non-terminal)
40
+ # symbol of the lhs of the given produiction.
41
+ # @param aProduction [Production] A production rule
42
+ # @param aRange [TokenRange]
37
43
  def add_children(aProduction, aRange)
44
+ if aProduction.lhs != current_node.symbol
45
+ msg = "Internal error. Expected symbol was #{aProduction.lhs} but current node is #{current_node.symbol}"
46
+ fail StandardError, msg
47
+ end
48
+
38
49
  aProduction.rhs.each do |symb|
39
50
  case symb
40
51
  when Syntax::Terminal
@@ -7,12 +7,19 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
7
7
  require_relative '../../../lib/rley/parser/dotted_item'
8
8
  require_relative '../../../lib/rley/parser/token'
9
9
  require_relative '../../../lib/rley/parser/earley_parser'
10
+ require_relative '../support/grammar_abc_helper'
11
+ require_relative '../support/grammar_b_expr_helper'
12
+
13
+
10
14
  # Load the class under test
11
15
  require_relative '../../../lib/rley/parser/parsing'
12
16
 
13
17
  module Rley # Open this namespace to avoid module qualifier prefixes
14
18
  module Parser # Open this namespace to avoid module qualifier prefixes
15
19
  describe Parsing do
20
+ include GrammarABCHelper # Mix-in module with builder for grammar abc
21
+ include GrammarBExprHelper # Mix-in with builder for simple expressions
22
+
16
23
  # Grammar 1: A very simple language
17
24
  # S ::= A.
18
25
  # A ::= "a" A "c".
@@ -87,7 +94,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
87
94
  item2 = DottedItem.new(prod_A1, 1)
88
95
  subject.push_state(item1, 2, 2)
89
96
  subject.push_state(item2, 2, 2)
90
- states = subject.states_expecting(c_, 2)
97
+ states = subject.states_expecting(c_, 2, false)
91
98
  expect(states.size).to eq(1)
92
99
  expect(states[0].dotted_rule).to eq(item1)
93
100
  end
@@ -114,11 +121,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
114
121
 
115
122
  context 'Parse tree building:' do
116
123
  let(:sample_grammar1) do
117
- builder = Syntax::GrammarBuilder.new
118
- builder.add_terminals('a', 'b', 'c')
119
- builder.add_production('S' => ['A'])
120
- builder.add_production('A' => %w(a A c))
121
- builder.add_production('A' => ['b'])
124
+ builder = grammar_abc_builder
122
125
  builder.grammar
123
126
  end
124
127
 
@@ -128,13 +131,28 @@ module Rley # Open this namespace to avoid module qualifier prefixes
128
131
  end
129
132
  end
130
133
 
134
+ let(:b_expr_grammar) do
135
+ builder = grammar_expr_builder
136
+ builder.grammar
137
+ end
138
+
131
139
 
132
- it 'should build the parse tree for a non-ambiguous grammar' do
140
+ it 'should build the parse tree for a simple non-ambiguous grammar' do
133
141
  parser = EarleyParser.new(sample_grammar1)
134
142
  instance = parser.parse(token_seq1)
135
143
  ptree = instance.parse_tree
136
144
  expect(ptree).to be_kind_of(PTree::ParseTree)
137
145
  end
146
+
147
+ it 'should build the parse tree for a simple expression grammar' do
148
+ parser = EarleyParser.new(b_expr_grammar)
149
+ tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
150
+ instance = parser.parse(tokens)
151
+ ptree = instance.parse_tree
152
+ expect(ptree).to be_kind_of(PTree::ParseTree)
153
+ end
154
+
155
+
138
156
  end # context
139
157
  end # describe
140
158
  end # module
@@ -2,6 +2,7 @@ require_relative '../../spec_helper'
2
2
 
3
3
  require_relative '../support/grammar_abc_helper'
4
4
 
5
+
5
6
  # Load the class under test
6
7
  require_relative '../../../lib/rley/ptree/parse_tree'
7
8
 
@@ -9,7 +10,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
9
10
  module PTree # Open this namespace to avoid module qualifier prefixes
10
11
  describe ParseTree do
11
12
  include GrammarABCHelper # Mix-in module with builder for grammar abc
12
-
13
+
14
+
13
15
  let(:sample_grammar) do
14
16
  builder = grammar_abc_builder
15
17
  builder.grammar
@@ -0,0 +1,39 @@
1
+ # Load the builder class
2
+ require_relative '../../../lib/rley/syntax/grammar_builder'
3
+ require_relative '../../../lib/rley/parser/token'
4
+
5
+
6
+ module GrammarBExprHelper
7
+ # Factory method. Creates a grammar builder for a basic arithmetic
8
+ # expression grammar.
9
+ # (based on example in article on Earley's algorithm in Wikipedia)
10
+ def grammar_expr_builder()
11
+ builder = Rley::Syntax::GrammarBuilder.new
12
+ builder.add_terminals('+', '*', 'integer')
13
+ builder.add_production('P' => 'S')
14
+ builder.add_production('S' => %w(S + M))
15
+ builder.add_production('S' => 'M')
16
+ builder.add_production('M' => %w(M * T))
17
+ builder.add_production('M' => 'T')
18
+ builder.add_production('T' => 'integer')
19
+ builder
20
+ end
21
+
22
+ # Basic expression tokenizer
23
+ def expr_tokenizer(aText, aGrammar)
24
+ tokens = aText.scan(/\S+/).map do |lexeme|
25
+ case lexeme
26
+ when '+', '*'
27
+ terminal = aGrammar.name2symbol[lexeme]
28
+ when /^[-+]?\d+$/
29
+ terminal = aGrammar.name2symbol['integer']
30
+ else
31
+ msg = "Unknown input text '#{lexeme}'"
32
+ fail StandardError, msg
33
+ end
34
+ Rley::Parser::Token.new(lexeme, terminal)
35
+ end
36
+
37
+ return tokens
38
+ end
39
+ end # module
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-16 00:00:00.000000000 Z
11
+ date: 2014-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -102,6 +102,7 @@ files:
102
102
  - examples/grammars/grammar_abc.rb
103
103
  - examples/grammars/grammar_L0.rb
104
104
  - examples/parsers/parsing_abc.rb
105
+ - examples/parsers/parsing_b_expr.rb
105
106
  - examples/recognizers/recognizer_abc.rb
106
107
  - lib/rley.rb
107
108
  - lib/rley/constants.rb
@@ -145,6 +146,7 @@ files:
145
146
  - spec/rley/ptree/parse_tree_spec.rb
146
147
  - spec/rley/ptree/token_range_spec.rb
147
148
  - spec/rley/support/grammar_abc_helper.rb
149
+ - spec/rley/support/grammar_b_expr_helper.rb
148
150
  - spec/rley/syntax/grammar_builder_spec.rb
149
151
  - spec/rley/syntax/grammar_spec.rb
150
152
  - spec/rley/syntax/grm_symbol_spec.rb