rley 0.1.11 → 0.1.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MDNkYmEzYTFiNmUyMzk2MTgwOTczZmNlMTVjMDU3ZDIzNjI4YWNmYw==
4
+ NTc2NGE4MzYxOTc1ZDUyMDVkYjdmNGFhODllNmEwM2YxMjVkZDk1OQ==
5
5
  data.tar.gz: !binary |-
6
- ZWQ1MmMzMDA2NzcxOTUzM2ZjMDg0Yjg2OTg0MTVhODgzOTQ1OTExNg==
6
+ ZDBkMTdmZWM2NTMwYWMwNDFkNDQ4NGI2YzdkNjk3NDU0ZGExMGYzNA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZjhmMDlmZjk2ZDYwY2EwNDBjZjIxYWMyMDA1ZjQ4MTVmOWE0NjRhZmI4OTg1
10
- MWZiMzQwNzFkOGIyMzJmZTdmMTg2NTRkOGVmM2ViNjg0MzU1MjllODJkZTIx
11
- NzQ4NDA3NmUwYzY4YzBhMmYwODg1MGIzNGJmMDU3NDBjMzJmZDY=
9
+ YWZjYmQxNDNiNjVmMDYyYWI1YzM0YzMyN2VjMzk4ZjUxOTIyMmQxNTE4Y2Y4
10
+ YjM4NTAyNGNhNjhiOWRhNWMyZWVmYzRhYjFjNzhhOWEzMDY1ZTgzMzRiMTVh
11
+ Njg1MDRjMjQ5NTlhYmU3NTk2MDBiMWQyZmI3MDIyMWUwNGM1NjM=
12
12
  data.tar.gz: !binary |-
13
- OTBhM2JiNzRiNzJlNmNmZWRjODJhMzY3MzkyMDdkNDRmNzcyYzFiMGNmYThm
14
- YWVhMTBjMzkzMDI1NzMyNTNiOTkzZGFlODAxYzcyYWQ0Y2QyNGEwZWUxYjBj
15
- YTk0OTViZWVkODk5N2U4MmUxZWJlZTBjY2QxOTNmMDk5YmIxZjU=
13
+ ODZiMmY1ZDMwNTVlNmM3ZGJmZDIzOTAzYjQ1MTFlMmY4OTBlODhhZWZhN2M3
14
+ MjEzYjI0YjZhNTA4NzkzYjJiMzMwY2Y0NzliMGMyZDdlMjI3NjkzMTliYWNh
15
+ YzQ2MGVmNWM1NTA5M2IxYjcxNzliNzhhMzViMDE4OTM3NWY1NjI=
@@ -1,3 +1,8 @@
1
+ ### 0.1.12 / 2014-12-22
2
+ * [FIX] Fixed `Parsing#parse_tree`: code couldn't cope with parse state set containing more
3
+ than one parse state that expected the same symbol.
4
+ * [NEW] Added one more parser example (for very basic arithmetic expression)
5
+
1
6
  ### 0.1.11 / 2014-12-16
2
7
  * [FIX] Fixed all but one YARD (documentation) warnings. Most of them were due to mismatch
3
8
  in method argument names between source code and documentation.
@@ -0,0 +1,85 @@
1
+ # Purpose: to demonstrate how to parse basic arithmetic expressions
2
+ # and render a parse tree
3
+ require 'pp' # TODO remove this dependency
4
+ require 'rley' # Load the gem
5
+
6
+ # Steps to render a parse tree (of a valid parsed input):
7
+ # 1. Define a grammar
8
+ # 2. Create a tokenizer for the language
9
+ # 3. Create a parser for that grammar
10
+ # 4. Tokenize the input
11
+ # 5. Let the parser process the input
12
+ # 6. Generate a parse tree from the parse result
13
+ # 7. Render the parse tree (in JSON)
14
+
15
+ ########################################
16
+ # Step 1. Define a grammar for a very simple arithmetic expression language
17
+ # (based on example in article on Earley's algorithm in Wikipedia)
18
+
19
+ # Let's create the grammar piece by piece
20
+ builder = Rley::Syntax::GrammarBuilder.new
21
+ builder.add_terminals('+', '*', 'integer')
22
+ builder.add_production('P' => 'S')
23
+ builder.add_production('S' => %w(S + M))
24
+ builder.add_production('S' => 'M')
25
+ builder.add_production('M' => %w(M * T))
26
+ builder.add_production('M' => 'T')
27
+ builder.add_production('T' => 'integer')
28
+
29
+ # And now build the grammar...
30
+ grammar_s_expr = builder.grammar
31
+
32
+
33
+ ########################################
34
+ # 2. Create a tokenizer for the language
35
+ # The tokenizer transforms the input into an array of tokens
36
+ def tokenizer(aText, aGrammar)
37
+ tokens = aText.scan(/\S+/).map do |lexeme|
38
+ case lexeme
39
+ when '+', '*'
40
+ terminal = aGrammar.name2symbol[lexeme]
41
+ when /^[-+]?\d+$/
42
+ terminal = aGrammar.name2symbol['integer']
43
+ else
44
+ msg = "Unknown input text '#{lexeme}'"
45
+ fail StandardError, msg
46
+ end
47
+ Rley::Parser::Token.new(lexeme, terminal)
48
+ end
49
+
50
+ return tokens
51
+ end
52
+
53
+ ########################################
54
+ # Step 3. Create a parser for that grammar
55
+ parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
56
+
57
+ ########################################
58
+ # Step 3. Tokenize the input
59
+ valid_input = '2 + 3 * 4'
60
+ tokens = tokenizer(valid_input, grammar_s_expr)
61
+
62
+ ########################################
63
+ # Step 5. Let the parser process the input
64
+ result = parser.parse(tokens)
65
+ puts "Parse successful? #{result.success?}"
66
+ pp result
67
+
68
+
69
+ ########################################
70
+ # Step 6. Generate a parse tree from the parse result
71
+ ptree = result.parse_tree
72
+ =begin
73
+ ########################################
74
+ # Step 7. Render the parse tree (in JSON)
75
+ # Let's create a parse tree visitor
76
+ visitor = Rley::ParseTreeVisitor.new(ptree)
77
+
78
+ #Here we create a renderer object...
79
+ renderer = Rley::Formatter::Json.new(STDOUT)
80
+
81
+ # Now emit the parse tree as JSON on the console output
82
+ puts "JSON rendering of the parse tree for '#{valid_input}' input:"
83
+ renderer.render(visitor)
84
+ =end
85
+ # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.1.11'
6
+ Version = '0.1.12'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -31,6 +31,11 @@ module Rley # This module is used as a namespace
31
31
  return dotted_rule.reduce_item?
32
32
  end
33
33
 
34
+ # Returns true if the dot is at the start of the rhs of the production.
35
+ def predicted?()
36
+ return dotted_rule.predicted_item?
37
+ end
38
+
34
39
  # Next expected symbol in the production
35
40
  def next_symbol()
36
41
  return dotted_rule.next_symbol
@@ -1,6 +1,7 @@
1
1
  require_relative 'chart'
2
2
  require_relative '../ptree/parse_tree'
3
3
 
4
+
4
5
  module Rley # This module is used as a namespace
5
6
  module Parser # This module is used as a namespace
6
7
  class Parsing
@@ -47,19 +48,23 @@ module Rley # This module is used as a namespace
47
48
  parse_state)
48
49
 
49
50
  when Syntax::NonTerminal
50
- # Retrieve complete states
51
+ # Retrieve complete states with curr_symbol as lhs
51
52
  new_states = chart[state_set_index].states_rewriting(curr_symbol)
52
53
  # TODO: make this more robust
53
54
  parse_state = new_states[0]
54
55
  curr_dotted_item = parse_state.dotted_rule
56
+ # Additional check
57
+ if ptree.current_node.symbol != curr_dotted_item.production.lhs
58
+ ptree.step_back(state_set_index)
59
+ end
55
60
  ptree.current_node.range = { low: parse_state.origin }
56
61
  node_range = ptree.current_node.range
57
62
  ptree.add_children(curr_dotted_item.production, node_range)
58
63
  link_node_to_token(ptree, state_set_index - 1)
59
64
 
60
- when NilClass
65
+ when NilClass # No symbol on the left of dot
61
66
  lhs = curr_dotted_item.production.lhs
62
- new_states = chart[state_set_index].states_expecting(lhs)
67
+ new_states = states_expecting(lhs, state_set_index, true)
63
68
  break if new_states.empty?
64
69
  # TODO: make this more robust
65
70
  parse_state = new_states[0]
@@ -98,7 +103,7 @@ module Rley # This module is used as a namespace
98
103
  curr_token = tokens[aPosition]
99
104
  return unless curr_token.terminal == aTerminal
100
105
 
101
- states = states_expecting(aTerminal, aPosition)
106
+ states = states_expecting(aTerminal, aPosition, false)
102
107
  states.each do |s|
103
108
  next_item = nextMapping.call(s.dotted_rule)
104
109
  push_state(next_item, s.origin, aPosition + 1)
@@ -119,7 +124,7 @@ module Rley # This module is used as a namespace
119
124
  def completion(aState, aPosition, &nextMapping)
120
125
  curr_origin = aState.origin
121
126
  curr_lhs = aState.dotted_rule.lhs
122
- states = states_expecting(curr_lhs, curr_origin)
127
+ states = states_expecting(curr_lhs, curr_origin, false)
123
128
  states.each do |s|
124
129
  next_item = nextMapping.call(s.dotted_rule)
125
130
  push_state(next_item, s.origin, aPosition)
@@ -129,8 +134,19 @@ module Rley # This module is used as a namespace
129
134
 
130
135
  # The list of ParseState from the chart entry at given position
131
136
  # that expect the given terminal
132
- def states_expecting(aTerminal, aPosition)
133
- return chart[aPosition].states_expecting(aTerminal)
137
+ def states_expecting(aTerminal, aPosition, toSort)
138
+ expecting = chart[aPosition].states_expecting(aTerminal)
139
+ return expecting if !toSort || expecting.size < 2
140
+
141
+ # Put predicted states ahead
142
+ (predicted, others) = expecting.partition { |state| state.predicted? }
143
+
144
+ # Sort state in reverse order of their origin value
145
+ [predicted, others].each do |set|
146
+ set.sort! { |a,b| b.origin <=> a.origin }
147
+ end
148
+
149
+ return predicted + others
134
150
  end
135
151
 
136
152
  private
@@ -21,13 +21,15 @@ module Rley # This module is used as a namespace
21
21
  @states << aState unless include?(aState)
22
22
  end
23
23
 
24
- # The list of ParseState that expect the given terminal
25
- def states_expecting(aTerminal)
26
- return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
24
+ # The list of ParseState that expect the given symbol.
25
+ # @param aSymbol [GrmSymbol] the expected symbol
26
+ # (=on the right of the dot)
27
+ def states_expecting(aSymbol)
28
+ return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
27
29
  end
28
30
 
29
- # The list of complete ParseState that have the symbol as the lhs of their
30
- # production
31
+ # The list of complete ParseState that have the given non-terminal
32
+ # symbol as the lhs of their production.
31
33
  def states_rewriting(aNonTerm)
32
34
  return states.select do |s|
33
35
  (s.dotted_rule.production.lhs == aNonTerm) && s.complete?
@@ -33,8 +33,19 @@ module Rley # This module is used as a namespace
33
33
  aVisitor.end_visit_ptree(self)
34
34
  end
35
35
 
36
-
36
+ # Add children to the current node.
37
+ # The children nodes correspond to the rhs of the production.
38
+ # Update the range in the children given the passed range object.
39
+ # Pre-condition: the current node refers to the same (non-terminal)
40
+ # symbol of the lhs of the given produiction.
41
+ # @param aProduction [Production] A production rule
42
+ # @param aRange [TokenRange]
37
43
  def add_children(aProduction, aRange)
44
+ if aProduction.lhs != current_node.symbol
45
+ msg = "Internal error. Expected symbol was #{aProduction.lhs} but current node is #{current_node.symbol}"
46
+ fail StandardError, msg
47
+ end
48
+
38
49
  aProduction.rhs.each do |symb|
39
50
  case symb
40
51
  when Syntax::Terminal
@@ -7,12 +7,19 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
7
7
  require_relative '../../../lib/rley/parser/dotted_item'
8
8
  require_relative '../../../lib/rley/parser/token'
9
9
  require_relative '../../../lib/rley/parser/earley_parser'
10
+ require_relative '../support/grammar_abc_helper'
11
+ require_relative '../support/grammar_b_expr_helper'
12
+
13
+
10
14
  # Load the class under test
11
15
  require_relative '../../../lib/rley/parser/parsing'
12
16
 
13
17
  module Rley # Open this namespace to avoid module qualifier prefixes
14
18
  module Parser # Open this namespace to avoid module qualifier prefixes
15
19
  describe Parsing do
20
+ include GrammarABCHelper # Mix-in module with builder for grammar abc
21
+ include GrammarBExprHelper # Mix-in with builder for simple expressions
22
+
16
23
  # Grammar 1: A very simple language
17
24
  # S ::= A.
18
25
  # A ::= "a" A "c".
@@ -87,7 +94,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
87
94
  item2 = DottedItem.new(prod_A1, 1)
88
95
  subject.push_state(item1, 2, 2)
89
96
  subject.push_state(item2, 2, 2)
90
- states = subject.states_expecting(c_, 2)
97
+ states = subject.states_expecting(c_, 2, false)
91
98
  expect(states.size).to eq(1)
92
99
  expect(states[0].dotted_rule).to eq(item1)
93
100
  end
@@ -114,11 +121,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
114
121
 
115
122
  context 'Parse tree building:' do
116
123
  let(:sample_grammar1) do
117
- builder = Syntax::GrammarBuilder.new
118
- builder.add_terminals('a', 'b', 'c')
119
- builder.add_production('S' => ['A'])
120
- builder.add_production('A' => %w(a A c))
121
- builder.add_production('A' => ['b'])
124
+ builder = grammar_abc_builder
122
125
  builder.grammar
123
126
  end
124
127
 
@@ -128,13 +131,28 @@ module Rley # Open this namespace to avoid module qualifier prefixes
128
131
  end
129
132
  end
130
133
 
134
+ let(:b_expr_grammar) do
135
+ builder = grammar_expr_builder
136
+ builder.grammar
137
+ end
138
+
131
139
 
132
- it 'should build the parse tree for a non-ambiguous grammar' do
140
+ it 'should build the parse tree for a simple non-ambiguous grammar' do
133
141
  parser = EarleyParser.new(sample_grammar1)
134
142
  instance = parser.parse(token_seq1)
135
143
  ptree = instance.parse_tree
136
144
  expect(ptree).to be_kind_of(PTree::ParseTree)
137
145
  end
146
+
147
+ it 'should build the parse tree for a simple expression grammar' do
148
+ parser = EarleyParser.new(b_expr_grammar)
149
+ tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
150
+ instance = parser.parse(tokens)
151
+ ptree = instance.parse_tree
152
+ expect(ptree).to be_kind_of(PTree::ParseTree)
153
+ end
154
+
155
+
138
156
  end # context
139
157
  end # describe
140
158
  end # module
@@ -2,6 +2,7 @@ require_relative '../../spec_helper'
2
2
 
3
3
  require_relative '../support/grammar_abc_helper'
4
4
 
5
+
5
6
  # Load the class under test
6
7
  require_relative '../../../lib/rley/ptree/parse_tree'
7
8
 
@@ -9,7 +10,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
9
10
  module PTree # Open this namespace to avoid module qualifier prefixes
10
11
  describe ParseTree do
11
12
  include GrammarABCHelper # Mix-in module with builder for grammar abc
12
-
13
+
14
+
13
15
  let(:sample_grammar) do
14
16
  builder = grammar_abc_builder
15
17
  builder.grammar
@@ -0,0 +1,39 @@
1
+ # Load the builder class
2
+ require_relative '../../../lib/rley/syntax/grammar_builder'
3
+ require_relative '../../../lib/rley/parser/token'
4
+
5
+
6
+ module GrammarBExprHelper
7
+ # Factory method. Creates a grammar builder for a basic arithmetic
8
+ # expression grammar.
9
+ # (based on example in article on Earley's algorithm in Wikipedia)
10
+ def grammar_expr_builder()
11
+ builder = Rley::Syntax::GrammarBuilder.new
12
+ builder.add_terminals('+', '*', 'integer')
13
+ builder.add_production('P' => 'S')
14
+ builder.add_production('S' => %w(S + M))
15
+ builder.add_production('S' => 'M')
16
+ builder.add_production('M' => %w(M * T))
17
+ builder.add_production('M' => 'T')
18
+ builder.add_production('T' => 'integer')
19
+ builder
20
+ end
21
+
22
+ # Basic expression tokenizer
23
+ def expr_tokenizer(aText, aGrammar)
24
+ tokens = aText.scan(/\S+/).map do |lexeme|
25
+ case lexeme
26
+ when '+', '*'
27
+ terminal = aGrammar.name2symbol[lexeme]
28
+ when /^[-+]?\d+$/
29
+ terminal = aGrammar.name2symbol['integer']
30
+ else
31
+ msg = "Unknown input text '#{lexeme}'"
32
+ fail StandardError, msg
33
+ end
34
+ Rley::Parser::Token.new(lexeme, terminal)
35
+ end
36
+
37
+ return tokens
38
+ end
39
+ end # module
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-16 00:00:00.000000000 Z
11
+ date: 2014-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -102,6 +102,7 @@ files:
102
102
  - examples/grammars/grammar_abc.rb
103
103
  - examples/grammars/grammar_L0.rb
104
104
  - examples/parsers/parsing_abc.rb
105
+ - examples/parsers/parsing_b_expr.rb
105
106
  - examples/recognizers/recognizer_abc.rb
106
107
  - lib/rley.rb
107
108
  - lib/rley/constants.rb
@@ -145,6 +146,7 @@ files:
145
146
  - spec/rley/ptree/parse_tree_spec.rb
146
147
  - spec/rley/ptree/token_range_spec.rb
147
148
  - spec/rley/support/grammar_abc_helper.rb
149
+ - spec/rley/support/grammar_b_expr_helper.rb
148
150
  - spec/rley/syntax/grammar_builder_spec.rb
149
151
  - spec/rley/syntax/grammar_spec.rb
150
152
  - spec/rley/syntax/grm_symbol_spec.rb