rley 0.4.07 → 0.4.08
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -2
- data/CHANGELOG.md +14 -9
- data/README.md +8 -9
- data/examples/data_formats/JSON/cli_options.rb +4 -3
- data/examples/data_formats/JSON/json_demo.rb +4 -1
- data/examples/data_formats/JSON/json_lexer.rb +1 -1
- data/examples/data_formats/JSON/json_minifier.rb +45 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/formatter/asciitree.rb +1 -1
- data/lib/rley/gfg/grm_flow_graph.rb +2 -3
- data/lib/rley/parser/gfg_parsing.rb +2 -2
- data/lib/rley/parser/parse_forest_builder.rb +7 -7
- data/lib/rley/parser/parse_forest_factory.rb +3 -39
- data/lib/rley/parser/parse_rep_creator.rb +53 -0
- data/lib/rley/parser/parse_tree_builder.rb +9 -7
- data/lib/rley/parser/parse_tree_factory.rb +3 -39
- data/lib/rley/parser/parse_walker_factory.rb +19 -11
- data/lib/rley/ptree/parse_tree.rb +1 -1
- data/lib/rley/ptree/terminal_node.rb +1 -1
- data/lib/rley/syntax/grammar.rb +20 -1
- data/lib/rley/syntax/grammar_builder.rb +3 -1
- data/lib/rley/syntax/production.rb +7 -1
- data/spec/rley/parser/groucho_spec.rb +1 -1
- data/spec/rley/parser/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parser/parse_forest_factory_spec.rb +1 -1
- data/spec/rley/parser/parse_tree_builder_spec.rb +7 -6
- data/spec/rley/parser/parse_tree_factory_spec.rb +13 -32
- data/spec/rley/syntax/grammar_spec.rb +6 -0
- data/spec/rley/syntax/production_spec.rb +12 -0
- data/spec/rley/syntax/symbol_seq_spec.rb +1 -1
- data/spec/spec_helper.rb +4 -4
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ce91c79f9088da09449654c9b6e1b6e0581cf1c
|
4
|
+
data.tar.gz: 3b1e3cc1ce21d3b031b7b09cdbb7606e2eb5a747
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bd55bc558c857bab67f080169c1f9d72705fb66fda177c63032fdb0e8e6674efd1119f9fafba3681593dbc9b027beb05081bfe3f38fb99f46068f94b2563282
|
7
|
+
data.tar.gz: 54186ced4b193fa133b48ecdb2cddad7df0960cd2499b2ae48bc586a657cc7a2e49466b42570d78e8be4d0d123b6c821f5c9ab75b98d2e3c3d04d4876fd61f0e
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,16 +1,24 @@
|
|
1
|
+
### 0.4.08 / 2017-08-06
|
2
|
+
* [FIX] File `/spec/spec_helper.rb` replaced deprecated syntax for `SimpleCov::Formatter::MultiFormatter` construction
|
3
|
+
* [NEW] File `examples/data_formats/JSON/json_minifier.rb` Added a working JSON minifier to the demo app.
|
4
|
+
* [NEW] Class `Syntax::Production` Each production can have a name.
|
5
|
+
* [CHANGE] File `.travis.yml`: removal of Rubinius (rbx) in the list of Ruby versions.
|
6
|
+
* [CHANGE] DRYing the classes `ParseTreeFactory` and `ParseForestFactory` by inheriting from `ParseRepCreator`
|
7
|
+
* [CHANGE] Minor documentation updates.
|
8
|
+
|
1
9
|
### 0.4.07 / 2017-05-25
|
2
|
-
* [FIX] To avoid Fixnum deprecation error in Ruby 2.4
|
10
|
+
* [FIX] To avoid Fixnum deprecation error in Ruby 2.4 and higher, all explicit references to Fixnum has been removed.
|
3
11
|
* [FIX] File `.rubocop.yml`: folder `examples` was excluded from Rubocop control. Now it is in code analysis scope.
|
4
|
-
* [NEW] File `spec
|
12
|
+
* [NEW] File `spec/.rubocop.yml` to tune the code analysis for Rspec files
|
5
13
|
* [NEW] File `examples\.rubocop.yml` to tune the code analysis for example files
|
6
14
|
* [CHANGE] Code re-styling to please Rubocop 0.49.0: less than 10 offences remain (from above 200 count!)
|
7
15
|
* [CHANGE] Added support for Ruby 2.4.x. Files `.travis.yml` and `README.md` updated.
|
8
16
|
|
9
17
|
|
10
18
|
### 0.4.06 / 2017-05-25
|
11
|
-
* [FIX] File `formatter
|
12
|
-
* [FIX] File `formatter
|
13
|
-
* [FIX] File `parser
|
19
|
+
* [FIX] File `formatter/asciitree.rb` fixed inconsistency in comments that caused Yard warnings.
|
20
|
+
* [FIX] File `formatter/bracket_notation.rb` fixed inconsistency in comments that caused Yard warnings.
|
21
|
+
* [FIX] File `parser/parse_entry_set.rb` fixed inconsistency in comments that caused Yard warnings.
|
14
22
|
* [NEW] Method `Grammar#diagnose` performs a number of checks on the grammar. It detects whether:
|
15
23
|
there are undefined non-terminals (i.e. non-terminals without a rule that define them)
|
16
24
|
there are non-productive non-terminals (i.e. non-terminals that don't derive a sting of terminals)
|
@@ -20,14 +28,11 @@
|
|
20
28
|
* [NEW] Method `GrmSymbol#generative?` inidcates whether the grammar symbol can produce a sequence of terminals.
|
21
29
|
* [CHANGE] Class `GrammarBuilder` Improved the API documentation.
|
22
30
|
|
23
|
-
|
24
31
|
### 0.4.05 / 2017-05-06
|
25
32
|
* [CHANGE] File `README.md` Added documentation on how to build parse trees and manipulate them.
|
26
|
-
* [CHANGE] File `examples
|
33
|
+
* [CHANGE] File `examples/NLP/mini_en_demo.rb` now emits different parse tree representations.
|
27
34
|
* [NEW] Directory `www`. Contains a diagram output produced from Rley and fed to online RSyntaxTree tool.
|
28
35
|
|
29
|
-
|
30
|
-
|
31
36
|
### 0.4.04 / 2017-05-01
|
32
37
|
* [NEW] `Asciitree` formatter class. Allows parse tree output in simple printable text.
|
33
38
|
* [CHANGE] Major enhancements in directory `examples\data_formats\JSON`. The demo command-line tool parses JSON and outputs the parse tree in one of the supported formats.
|
data/README.md
CHANGED
@@ -110,7 +110,7 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
110
110
|
grammar = builder.grammar
|
111
111
|
```
|
112
112
|
|
113
|
-
|
113
|
+
### Creating a lexicon
|
114
114
|
|
115
115
|
```ruby
|
116
116
|
# To simplify things, lexicon is implemented as a Hash with pairs of the form:
|
@@ -139,7 +139,7 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
139
139
|
```
|
140
140
|
|
141
141
|
|
142
|
-
|
142
|
+
### Creating a tokenizer
|
143
143
|
```ruby
|
144
144
|
# A tokenizer reads the input string and converts it into a sequence of tokens
|
145
145
|
# Highly simplified tokenizer implementation.
|
@@ -162,14 +162,14 @@ creating a lexicon and tokenizer from scratch. Here are a few Ruby Part-of-Speec
|
|
162
162
|
|
163
163
|
|
164
164
|
|
165
|
-
|
165
|
+
### Building the parser
|
166
166
|
```ruby
|
167
167
|
# Easy with Rley...
|
168
168
|
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
169
169
|
```
|
170
170
|
|
171
171
|
|
172
|
-
|
172
|
+
### Parsing some input
|
173
173
|
```ruby
|
174
174
|
input_to_parse = 'John saw Mary with a telescope'
|
175
175
|
# Convert input text into a sequence of token objects...
|
@@ -191,7 +191,7 @@ representing the parse outcome:
|
|
191
191
|
|
192
192
|
For our whirlwind tour, we will opt for parse trees.
|
193
193
|
|
194
|
-
|
194
|
+
### Generating the parse tree
|
195
195
|
|
196
196
|
```ruby
|
197
197
|
ptree = result.parse_tree
|
@@ -210,7 +210,7 @@ Let's do it by:
|
|
210
210
|
- Using one of the built-in visit subscribers specifically created to render the
|
211
211
|
parse tree in a given output format.
|
212
212
|
|
213
|
-
|
213
|
+
#### Creating a parse tree visitor
|
214
214
|
Good news: creating a parse tree visitor for the parse tree `ptree` is just
|
215
215
|
an one-liner:
|
216
216
|
|
@@ -219,7 +219,7 @@ an one-liner:
|
|
219
219
|
visitor = Rley::ParseTreeVisitor.new(ptree)
|
220
220
|
```
|
221
221
|
|
222
|
-
|
222
|
+
#### Visiting the parse tree
|
223
223
|
|
224
224
|
Unsurprisingly, to start the parse tree visit, one calls the `#start` method:
|
225
225
|
|
@@ -293,7 +293,7 @@ the tree node being visited.
|
|
293
293
|
|
294
294
|
Not really impressive? So let's use another formatter...
|
295
295
|
|
296
|
-
|
296
|
+
#### Visualizing the parse tree structure
|
297
297
|
If one replaces the previous formatter by an instance of
|
298
298
|
`Rley::Formatter::Asciitree` the output now shows the parse tree structure.
|
299
299
|
|
@@ -345,7 +345,6 @@ By the way, this tool is also a Ruby gem, [rsyntaxtree](https://rubygems.org/gem
|
|
345
345
|
![Sample parse tree diagram](www/sample_parse_tree.png)
|
346
346
|
|
347
347
|
|
348
|
-
|
349
348
|
## Error reporting
|
350
349
|
__Rley__ is a non-violent parser, that is, it won't throw an exception when it
|
351
350
|
detects a syntax error. Instead, the parse result will be marked as
|
@@ -26,10 +26,10 @@ class CLIOptions < Hash
|
|
26
26
|
and renders its parse tree to the standard output
|
27
27
|
in the format specified in the command-line.
|
28
28
|
|
29
|
-
Usage:
|
29
|
+
Usage: json_demo.rb [options] FILE
|
30
30
|
|
31
31
|
Examples:
|
32
|
-
|
32
|
+
json_demo --format ascii_tree sample01.json
|
33
33
|
END_BANNER
|
34
34
|
|
35
35
|
opts.separator ''
|
@@ -37,11 +37,12 @@ END_BANNER
|
|
37
37
|
format_help = <<-END_TEXT
|
38
38
|
Select the output format (default: ascii_tree). Available formats:
|
39
39
|
ascii_tree Simple text representation of parse trees
|
40
|
+
minify Strip all unnecessary whitespace in the input json file
|
40
41
|
labelled Labelled square notation (LBN)
|
41
42
|
Use online tools (e.g. http://yohasebe.com/rsyntaxtree/)
|
42
43
|
to visualize parse trees from LBN output.
|
43
44
|
END_TEXT
|
44
|
-
formats = %i[ascii_tree labelled]
|
45
|
+
formats = %i[ascii_tree labelled minify]
|
45
46
|
opts.on('-f', '--format FORMAT', formats, format_help) do |frm|
|
46
47
|
self[:format] = frm
|
47
48
|
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
require_relative 'cli_options'
|
2
2
|
require_relative 'json_parser'
|
3
|
+
require_relative 'json_minifier'
|
3
4
|
|
4
5
|
prog_name = 'json_demo'
|
5
|
-
prog_version = '0.
|
6
|
+
prog_version = '0.2.0'
|
6
7
|
|
7
8
|
cli_options = CLIOptions.new(prog_name, prog_version, ARGV)
|
8
9
|
if ARGV.empty?
|
@@ -32,6 +33,8 @@ case cli_options[:format]
|
|
32
33
|
renderer = Rley::Formatter::Asciitree.new($stdout)
|
33
34
|
when :labelled
|
34
35
|
renderer = Rley::Formatter::BracketNotation.new($stdout)
|
36
|
+
when :minify
|
37
|
+
renderer = JSONMinifier.new($stdout)
|
35
38
|
end
|
36
39
|
|
37
40
|
# Let's create a parse tree visitor
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# File: json_minifier.rb
|
2
|
+
|
3
|
+
|
4
|
+
# A JSON minifier, it removes unnecessary whitespaces in a JSON expression.
|
5
|
+
# It typically reduces size by half.
|
6
|
+
class JSONMinifier
|
7
|
+
# The IO output stream in which the formatter's result will be sent.
|
8
|
+
attr_reader(:output)
|
9
|
+
|
10
|
+
# Constructor.
|
11
|
+
# @param anIO [IO] an output IO where the formatter's result will
|
12
|
+
# be placed.
|
13
|
+
def initialize(anIO)
|
14
|
+
@output = anIO
|
15
|
+
end
|
16
|
+
|
17
|
+
# Given a parse tree visitor, perform the visit
|
18
|
+
# and render the visit events in the output stream.
|
19
|
+
# @param aVisitor [ParseTreeVisitor]
|
20
|
+
def render(aVisitor)
|
21
|
+
aVisitor.subscribe(self)
|
22
|
+
aVisitor.start
|
23
|
+
aVisitor.unsubscribe(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
27
|
+
# Notification of a visit event: the visitor is about to visit
|
28
|
+
# a terminal node. The only thing the JSON minifier has to do is
|
29
|
+
# to render the input tokens almost as they appear initially.
|
30
|
+
# @param aTerm [TerminalNode]
|
31
|
+
def before_terminal(aTerm)
|
32
|
+
# Lexeme is the original text representation of the token
|
33
|
+
lexeme = aTerm.token.lexeme
|
34
|
+
literal = if aTerm.symbol.name == 'string'
|
35
|
+
# String values are delimited by double quotes
|
36
|
+
'"' + lexeme + '"'
|
37
|
+
else
|
38
|
+
lexeme
|
39
|
+
end
|
40
|
+
|
41
|
+
output << literal
|
42
|
+
end
|
43
|
+
end # class
|
44
|
+
|
45
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
@@ -41,7 +41,7 @@ module Rley # This module is used as a namespace
|
|
41
41
|
# Notification of a visit event: the visitor is about to visit
|
42
42
|
# the children of a non-terminal node
|
43
43
|
# @param parent [NonTerminalNode]
|
44
|
-
# @param
|
44
|
+
# @param _children [Array<ParseTreeNode>] array of children nodes
|
45
45
|
def before_subnodes(parent, _children)
|
46
46
|
rank_of(parent)
|
47
47
|
curr_path << parent
|
@@ -67,10 +67,9 @@ module Rley # This module is used as a namespace
|
|
67
67
|
|
68
68
|
# Walk over all the vertices of the graph that are reachable from a given
|
69
69
|
# start vertex. This is a depth-first graph traversal.
|
70
|
-
# @param aStartVertex [StartVertex] the depth-first traversal begins
|
70
|
+
# @param aStartVertex [StartVertex] the depth-first traversal begins
|
71
71
|
# from here
|
72
|
-
# @param
|
73
|
-
# is found
|
72
|
+
# @param _visitAction [Proc] block called when a new graph vertex is found
|
74
73
|
def traverse_df(aStartVertex, &_visitAction)
|
75
74
|
visited = Set.new
|
76
75
|
stack = []
|
@@ -146,7 +146,7 @@ module Rley # This module is used as a namespace
|
|
146
146
|
def parse_forest()
|
147
147
|
factory = ParseForestFactory.new(self)
|
148
148
|
|
149
|
-
return factory.
|
149
|
+
return factory.create
|
150
150
|
end
|
151
151
|
|
152
152
|
# Factory method. Builds a ParseTree from the parse result.
|
@@ -154,7 +154,7 @@ module Rley # This module is used as a namespace
|
|
154
154
|
def parse_tree()
|
155
155
|
factory = ParseTreeFactory.new(self)
|
156
156
|
|
157
|
-
return factory.
|
157
|
+
return factory.create
|
158
158
|
end
|
159
159
|
|
160
160
|
# Retrieve the very first parse entry added to the chart.
|
@@ -17,8 +17,8 @@ module Rley # This module is used as a namespace
|
|
17
17
|
# The sequence of input tokens
|
18
18
|
attr_reader(:tokens)
|
19
19
|
|
20
|
-
# Link to forest object
|
21
|
-
attr_reader(:
|
20
|
+
# Link to forest object (being) built
|
21
|
+
attr_reader(:result)
|
22
22
|
|
23
23
|
# Link to current path
|
24
24
|
attr_reader(:curr_path)
|
@@ -77,7 +77,7 @@ module Rley # This module is used as a namespace
|
|
77
77
|
range = { low: anEntry.origin, high: anIndex }
|
78
78
|
non_terminal = anEntry.vertex.non_terminal
|
79
79
|
create_non_terminal_node(anEntry, range, non_terminal)
|
80
|
-
@
|
80
|
+
@result = create_forest(curr_parent) unless @last_visitee
|
81
81
|
|
82
82
|
when :backtrack
|
83
83
|
# Restore path
|
@@ -187,7 +187,7 @@ module Rley # This module is used as a namespace
|
|
187
187
|
range = curr_parent.range
|
188
188
|
alternative = Rley::SPPF::AlternativeNode.new(vertex, range)
|
189
189
|
add_subnode(alternative)
|
190
|
-
|
190
|
+
result.is_ambiguous = true
|
191
191
|
# puts "FOREST ADD #{alternative.key}"
|
192
192
|
|
193
193
|
return alternative
|
@@ -218,11 +218,11 @@ module Rley # This module is used as a namespace
|
|
218
218
|
# Add the given node if not yet present in parse forest
|
219
219
|
def add_node_to_forest(aNode)
|
220
220
|
key_node = aNode.key
|
221
|
-
if
|
222
|
-
new_node =
|
221
|
+
if result.include?(key_node)
|
222
|
+
new_node = result.key2node[key_node]
|
223
223
|
else
|
224
224
|
new_node = aNode
|
225
|
-
|
225
|
+
result.key2node[key_node] = new_node
|
226
226
|
# puts "FOREST ADD #{key_node}"
|
227
227
|
end
|
228
228
|
add_subnode(new_node, false)
|
@@ -1,49 +1,13 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative 'parse_rep_creator'
|
2
2
|
require_relative 'parse_forest_builder'
|
3
3
|
|
4
4
|
module Rley # This module is used as a namespace
|
5
5
|
module Parser # This module is used as a namespace
|
6
6
|
# Utility class that helps to create a ParseForest from
|
7
7
|
# a given Parsing object.
|
8
|
-
class ParseForestFactory
|
9
|
-
# Link to Parsing object (= results of recognizer)
|
10
|
-
attr_reader(:parsing)
|
8
|
+
class ParseForestFactory < ParseRepCreator
|
11
9
|
|
12
|
-
|
13
|
-
def initialize(aParsingResult)
|
14
|
-
@parsing = aParsingResult
|
15
|
-
end
|
16
|
-
|
17
|
-
# Factory that produces the parse forest
|
18
|
-
def build_parse_forest()
|
19
|
-
a_walker = walker(parsing)
|
20
|
-
a_builder = builder(parsing)
|
21
|
-
|
22
|
-
begin
|
23
|
-
loop do
|
24
|
-
event = a_walker.next
|
25
|
-
# puts "EVENT #{event[0]} #{event[1]}"
|
26
|
-
a_builder.receive_event(*event)
|
27
|
-
end
|
28
|
-
rescue StopIteration
|
29
|
-
# Do nothing
|
30
|
-
end
|
31
|
-
|
32
|
-
return a_builder.forest
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# Create a Parsing walker, that is, an object
|
39
|
-
# that will iterate over the relevant nodes (= parsing entries)
|
40
|
-
# of a GFGParsing
|
41
|
-
def walker(aParseResult)
|
42
|
-
walker_factory = ParseWalkerFactory.new
|
43
|
-
accept_entry = aParseResult.accepting_entry
|
44
|
-
accept_index = aParseResult.chart.last_index
|
45
|
-
walker_factory.build_walker(accept_entry, accept_index)
|
46
|
-
end
|
10
|
+
protected
|
47
11
|
|
48
12
|
# Create a Builder, that is, an object
|
49
13
|
# that will create piece by piece the forest
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require_relative 'parse_walker_factory'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
# Utility class that helps to create a representation of a parse from
|
6
|
+
# a given Parsing object.
|
7
|
+
class ParseRepCreator
|
8
|
+
# @return [GFGParsing] Link to Parsing object (= results of recognizer)
|
9
|
+
attr_reader(:parsing)
|
10
|
+
|
11
|
+
# Constructor. Creates and initialize a ParseRepCreator instance.
|
12
|
+
# @return [ParseRepCreator]
|
13
|
+
def initialize(aParsingResult)
|
14
|
+
@parsing = aParsingResult
|
15
|
+
end
|
16
|
+
|
17
|
+
# Factory method that produces the representation of the parse.
|
18
|
+
# @return [ParseTree] The parse representation.
|
19
|
+
def create()
|
20
|
+
a_walker = walker(parsing)
|
21
|
+
a_builder = builder(parsing)
|
22
|
+
|
23
|
+
begin
|
24
|
+
loop do
|
25
|
+
event = a_walker.next
|
26
|
+
# puts "EVENT #{event[0]} #{event[1]}"
|
27
|
+
a_builder.receive_event(*event)
|
28
|
+
end
|
29
|
+
rescue StopIteration
|
30
|
+
# Do nothing
|
31
|
+
end
|
32
|
+
|
33
|
+
return a_builder.result
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Create a Parsing walker, that is, an object
|
39
|
+
# that will iterate over the relevant nodes (= parsing entries)
|
40
|
+
# of a GFGParsing
|
41
|
+
def walker(aParseResult)
|
42
|
+
walker_factory = ParseWalkerFactory.new
|
43
|
+
accept_entry = aParseResult.accepting_entry
|
44
|
+
accept_index = aParseResult.chart.last_index
|
45
|
+
walker_factory.build_walker(accept_entry, accept_index)
|
46
|
+
end
|
47
|
+
|
48
|
+
end # class
|
49
|
+
end # module
|
50
|
+
end # module
|
51
|
+
|
52
|
+
# End of file
|
53
|
+
|
@@ -9,15 +9,16 @@ require_relative '../ptree/parse_tree'
|
|
9
9
|
|
10
10
|
module Rley # This module is used as a namespace
|
11
11
|
module Parser # This module is used as a namespace
|
12
|
-
# Builder GoF pattern.
|
12
|
+
# Builder GoF pattern.
|
13
|
+
# The Builder pattern creates a complex object
|
13
14
|
# (say, a parse tree) from simpler objects (terminal and non-terminal
|
14
15
|
# nodes) and using a step by step approach.
|
15
16
|
class ParseTreeBuilder
|
16
|
-
# The sequence of input tokens
|
17
|
+
# @return [Array<Token>] The sequence of input tokens
|
17
18
|
attr_reader(:tokens)
|
18
19
|
|
19
|
-
# Link to tree object
|
20
|
-
attr_reader(:
|
20
|
+
# Link to tree object (being) built
|
21
|
+
attr_reader(:result)
|
21
22
|
|
22
23
|
# Link to current path
|
23
24
|
attr_reader(:curr_path)
|
@@ -28,7 +29,8 @@ module Rley # This module is used as a namespace
|
|
28
29
|
# A hash with pairs of the form: visited parse entry => tree node
|
29
30
|
attr_reader(:entry2node)
|
30
31
|
|
31
|
-
|
32
|
+
# Create a new builder instance.
|
33
|
+
# @param theTokens [Array<Token>] The sequence of input tokens.
|
32
34
|
def initialize(theTokens)
|
33
35
|
@tokens = theTokens
|
34
36
|
@curr_path = []
|
@@ -71,7 +73,7 @@ module Rley # This module is used as a namespace
|
|
71
73
|
range = { low: anEntry.origin, high: anIndex }
|
72
74
|
non_terminal = anEntry.vertex.non_terminal
|
73
75
|
create_non_terminal_node(anEntry, range, non_terminal)
|
74
|
-
@
|
76
|
+
@result = create_tree(curr_parent) unless @last_visitee
|
75
77
|
else
|
76
78
|
raise NotImplementedError
|
77
79
|
end
|
@@ -143,7 +145,7 @@ module Rley # This module is used as a namespace
|
|
143
145
|
range = curr_parent.range
|
144
146
|
alternative = Rley::PTree::AlternativeNode.new(vertex, range)
|
145
147
|
add_subnode(alternative)
|
146
|
-
|
148
|
+
result.is_ambiguous = true
|
147
149
|
# puts "FOREST ADD #{alternative.key}"
|
148
150
|
|
149
151
|
return alternative
|
@@ -1,49 +1,13 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative 'parse_rep_creator'
|
2
2
|
require_relative 'parse_tree_builder'
|
3
3
|
|
4
4
|
module Rley # This module is used as a namespace
|
5
5
|
module Parser # This module is used as a namespace
|
6
6
|
# Utility class that helps to create a ParseTree from
|
7
7
|
# a given Parsing object.
|
8
|
-
class ParseTreeFactory
|
9
|
-
# Link to Parsing object (= results of recognizer)
|
10
|
-
attr_reader(:parsing)
|
8
|
+
class ParseTreeFactory < ParseRepCreator
|
11
9
|
|
12
|
-
|
13
|
-
def initialize(aParsingResult)
|
14
|
-
@parsing = aParsingResult
|
15
|
-
end
|
16
|
-
|
17
|
-
# Factory that produces the parse tree
|
18
|
-
def build_parse_tree()
|
19
|
-
a_walker = walker(parsing)
|
20
|
-
a_builder = builder(parsing)
|
21
|
-
|
22
|
-
begin
|
23
|
-
loop do
|
24
|
-
event = a_walker.next
|
25
|
-
# puts "EVENT #{event[0]} #{event[1]}"
|
26
|
-
a_builder.receive_event(*event)
|
27
|
-
end
|
28
|
-
rescue StopIteration
|
29
|
-
# Do nothing
|
30
|
-
end
|
31
|
-
|
32
|
-
return a_builder.tree
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# Create a Parsing walker, that is, an object
|
39
|
-
# that will iterate over the relevant nodes (= parsing entries)
|
40
|
-
# of a GFGParsing
|
41
|
-
def walker(aParseResult)
|
42
|
-
walker_factory = ParseWalkerFactory.new
|
43
|
-
accept_entry = aParseResult.accepting_entry
|
44
|
-
accept_index = aParseResult.chart.last_index
|
45
|
-
walker_factory.build_walker(accept_entry, accept_index)
|
46
|
-
end
|
10
|
+
protected
|
47
11
|
|
48
12
|
# Create a Builder, that is, an object
|
49
13
|
# that will create piece by piece the forest
|
@@ -8,16 +8,20 @@ require_relative '../gfg/start_vertex'
|
|
8
8
|
|
9
9
|
module Rley # This module is used as a namespace
|
10
10
|
module Parser # This module is used as a namespace
|
11
|
+
# Utility class used internally by the Enumerator created
|
12
|
+
# with a ParseWalkerContext object. It holds the state of
|
13
|
+
# the walk over a GFGParsing object.
|
11
14
|
ParseWalkerContext = Struct.new(
|
12
|
-
:curr_entry, #
|
13
|
-
:entry_set_index, #
|
14
|
-
:visitees, # The set of already visited
|
15
|
+
:curr_entry, # @return [ParseEntry] entry being visited
|
16
|
+
:entry_set_index, # @return [Integer] Set index of current parse entry
|
17
|
+
:visitees, # @return [Set<ParseEntry>] The set of already visited entries
|
15
18
|
:nterm2start, # Nested hashes. Pairs of first level are of the form:
|
16
19
|
# non-terminal symbol => { index(=origin) => start entry }
|
17
|
-
:return_stack, # A stack of parse entries
|
20
|
+
:return_stack, # @return [Array<ParseEntry>] A stack of parse entries
|
18
21
|
:backtrack_points
|
19
22
|
)
|
20
23
|
|
24
|
+
|
21
25
|
WalkerBacktrackpoint = Struct.new(
|
22
26
|
:entry_set_index, # Sigma set index of current parse entry
|
23
27
|
:return_stack, # A stack of parse entries
|
@@ -25,12 +29,11 @@ module Rley # This module is used as a namespace
|
|
25
29
|
:antecedent_index
|
26
30
|
)
|
27
31
|
|
28
|
-
# A factory that creates an
|
29
|
-
#
|
30
|
-
# The walker yields visit events.
|
31
|
-
#
|
32
|
-
# for a given GFGParsing object.
|
33
|
-
# distinct for the GFGParsing.
|
32
|
+
# A factory that creates an Enumerator object
|
33
|
+
# that itself walks through a GFGParsing object.
|
34
|
+
# The walker (= Enumerator) yields visit events.
|
35
|
+
# This class implements an external iterator
|
36
|
+
# for a given GFGParsing object.
|
34
37
|
# This is different from the internal iterators, usually implemented
|
35
38
|
# in Ruby with an :each method.
|
36
39
|
# Allows to perform a backwards traversal over the relevant parse entries.
|
@@ -40,7 +43,12 @@ module Rley # This module is used as a namespace
|
|
40
43
|
# (i.e. they belong to a path that leads to the accepting parse entry)
|
41
44
|
class ParseWalkerFactory
|
42
45
|
# Build an Enumerator that will yield the parse entries as it
|
43
|
-
# walks backwards on the parse graph
|
46
|
+
# walks backwards on the parse graph.
|
47
|
+
# @param acceptingEntry [ParseEntry] the final ParseEntry of a
|
48
|
+
# successful parse.
|
49
|
+
# @param maxIndex [Integer] the index of the last input token.
|
50
|
+
# @return [Enumerator] yields visit events when walking over the
|
51
|
+
# parse result
|
44
52
|
def build_walker(acceptingEntry, maxIndex)
|
45
53
|
# Local context for the enumerator
|
46
54
|
ctx = init_context(acceptingEntry, maxIndex)
|
@@ -12,7 +12,7 @@ module Rley # This module is used as a namespace
|
|
12
12
|
# during the parse.
|
13
13
|
# The root node corresponds to the main/start symbol of the grammar.
|
14
14
|
class ParseTree
|
15
|
-
# The root node of the tree
|
15
|
+
# @return [ParseTreeNode] The root node of the tree.
|
16
16
|
attr_reader(:root)
|
17
17
|
|
18
18
|
# @param theRootNode [ParseTreeNode] The root node of the parse tree.
|
@@ -8,7 +8,7 @@ module Rley # This module is used as a namespace
|
|
8
8
|
|
9
9
|
# aPosition is the position of the token in the input stream.
|
10
10
|
def initialize(aToken, aPos)
|
11
|
-
(major, minor) =
|
11
|
+
# (major, minor) =
|
12
12
|
|
13
13
|
# Use '1.class' trick to support both Integer and Fixnum classes
|
14
14
|
range = aPos.kind_of?(1.class) ? { low: aPos, high: aPos + 1 } : aPos
|
data/lib/rley/syntax/grammar.rb
CHANGED
@@ -31,7 +31,10 @@ module Rley # This module is used as a namespace
|
|
31
31
|
@symbols = []
|
32
32
|
@name2symbol = {}
|
33
33
|
valid_productions = validate_productions(theProductions)
|
34
|
-
valid_productions.each
|
34
|
+
valid_productions.each do |prod|
|
35
|
+
add_production(prod)
|
36
|
+
name_production(prod)
|
37
|
+
end
|
35
38
|
diagnose
|
36
39
|
|
37
40
|
# TODO: use topological sorting
|
@@ -65,6 +68,22 @@ module Rley # This module is used as a namespace
|
|
65
68
|
|
66
69
|
aProduction.rhs.each { |symb| add_symbol(symb) }
|
67
70
|
end
|
71
|
+
|
72
|
+
def name_production(aProduction)
|
73
|
+
if aProduction.name.nil?
|
74
|
+
index = rules.find_index(aProduction)
|
75
|
+
prefix = aProduction.lhs.name.dup
|
76
|
+
previous = index.zero? ? nil : rules[index - 1]
|
77
|
+
if previous.nil? || previous.lhs != aProduction.lhs
|
78
|
+
suffix = '[0]'
|
79
|
+
else
|
80
|
+
prev_serial = previous.name.match(/\[(\d+)\]$/)
|
81
|
+
suffix = "[#{prev_serial[1].to_i + 1}]"
|
82
|
+
end
|
83
|
+
|
84
|
+
aProduction.name = prefix + suffix
|
85
|
+
end
|
86
|
+
end
|
68
87
|
|
69
88
|
# Perform some check of the grammar.
|
70
89
|
def diagnose()
|
@@ -67,7 +67,7 @@ module Rley # This module is used as a namespace
|
|
67
67
|
# builder.rule 'A' => %w[a A c] # Call parentheses are optional
|
68
68
|
# @param aProductionRepr [Hash{String, Array<String>}]
|
69
69
|
# A Hash-based representation of a production.
|
70
|
-
# @return [
|
70
|
+
# @return [Production] The created Production instance
|
71
71
|
def add_production(aProductionRepr)
|
72
72
|
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
73
73
|
lhs = get_nonterminal(lhs_name)
|
@@ -82,6 +82,8 @@ module Rley # This module is used as a namespace
|
|
82
82
|
new_prod = Production.new(lhs, rhs_constituents)
|
83
83
|
productions << new_prod
|
84
84
|
end
|
85
|
+
|
86
|
+
return productions.last
|
85
87
|
end
|
86
88
|
|
87
89
|
# Given the grammar symbols and productions added to the builder,
|
@@ -16,6 +16,9 @@ module Rley # This module is used as a namespace
|
|
16
16
|
# @return [NonTerminal] The left-hand side of the rule.
|
17
17
|
attr_reader(:lhs)
|
18
18
|
|
19
|
+
# @return [String] The unique name of the production rule.
|
20
|
+
attr_accessor(:name)
|
21
|
+
|
19
22
|
# @return [Boolean ]A production is generative when all of its
|
20
23
|
# rhs members are generative (that is, they can each generate/derive
|
21
24
|
# a non-empty string of terminals).
|
@@ -26,9 +29,12 @@ module Rley # This module is used as a namespace
|
|
26
29
|
alias body rhs
|
27
30
|
alias head lhs
|
28
31
|
|
32
|
+
# Create a Production instance.
|
33
|
+
# @param aNonTerminal [NonTerminal] The left-hand side of the rule.
|
34
|
+
# @param theSymbols [list<Terminal | NonTerminal>] symbols of rhs.
|
29
35
|
def initialize(aNonTerminal, theSymbols)
|
30
36
|
@lhs = valid_lhs(aNonTerminal)
|
31
|
-
@rhs = SymbolSeq.new(theSymbols)
|
37
|
+
@rhs = SymbolSeq.new(theSymbols)
|
32
38
|
end
|
33
39
|
|
34
40
|
# Is the rhs empty?
|
@@ -97,7 +97,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
97
97
|
|
98
98
|
it 'should initialize the root node' do
|
99
99
|
next_event(:visit, 'Phi. | 0')
|
100
|
-
forest = subject.
|
100
|
+
forest = subject.result
|
101
101
|
|
102
102
|
expect(forest.root.to_string(0)).to eq('Phi[0, 4]')
|
103
103
|
expected_curr_path('Phi[0, 4]')
|
@@ -68,7 +68,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
68
68
|
|
69
69
|
context 'Parse forest construction' do
|
70
70
|
it 'should build a parse forest' do
|
71
|
-
forest = subject.
|
71
|
+
forest = subject.create
|
72
72
|
expect(forest).to be_kind_of(SPPF::ParseForest)
|
73
73
|
=begin
|
74
74
|
require 'yaml'
|
@@ -81,7 +81,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
81
81
|
|
82
82
|
it 'should initialize the root node' do
|
83
83
|
next_event(:visit, 'P. | 0')
|
84
|
-
tree = subject.
|
84
|
+
tree = subject.result
|
85
85
|
|
86
86
|
expect(tree.root.to_string(0)).to eq('P[0, 5]')
|
87
87
|
expected_curr_path('P[0, 5]')
|
@@ -227,12 +227,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
227
227
|
end
|
228
228
|
|
229
229
|
# Lightweight sanity check
|
230
|
-
expect(subject.
|
231
|
-
expect(subject.
|
232
|
-
expect(subject.
|
233
|
-
expect(subject.
|
234
|
-
child_node = subject.
|
230
|
+
expect(subject.result).not_to be_nil
|
231
|
+
expect(subject.result).to be_kind_of(PTree::ParseTree)
|
232
|
+
expect(subject.result.root.to_s).to eq('P[0, 5]')
|
233
|
+
expect(subject.result.root.subnodes.size).to eq(1)
|
234
|
+
child_node = subject.result.root.subnodes[0]
|
235
235
|
expect(child_node.to_s).to eq('S[0, 5]')
|
236
|
+
|
236
237
|
expect(child_node.subnodes.size).to eq(3)
|
237
238
|
first_grandchild = child_node.subnodes[0]
|
238
239
|
expect(first_grandchild.to_s).to eq('S[0, 1]')
|
@@ -1,40 +1,28 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
2
|
|
3
3
|
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
4
|
-
|
5
4
|
require_relative '../../../lib/rley/syntax/grammar_builder'
|
6
5
|
require_relative '../support/grammar_helper'
|
6
|
+
require_relative '../support/grammar_abc_helper'
|
7
7
|
require_relative '../support/expectation_helper'
|
8
8
|
|
9
9
|
# Load the class under test
|
10
|
-
require_relative '../../../lib/rley/parser/
|
10
|
+
require_relative '../../../lib/rley/parser/parse_tree_factory'
|
11
11
|
|
12
12
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
13
13
|
module Parser
|
14
|
-
describe
|
14
|
+
describe ParseTreeFactory do
|
15
15
|
include GrammarHelper # Mix-in with token factory method
|
16
16
|
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
17
|
+
include GrammarABCHelper # Mix-in for a sample grammar
|
17
18
|
|
18
19
|
let(:sample_grammar) do
|
19
|
-
|
20
|
-
|
21
|
-
# Notes in Theoretical Computer Science 203, (2008), pp. 53-67
|
22
|
-
# contains a hidden left recursion and a cycle
|
23
|
-
builder = Syntax::GrammarBuilder.new do
|
24
|
-
add_terminals('a', 'b')
|
25
|
-
rule 'Phi' => 'S'
|
26
|
-
rule 'S' => %w[A T]
|
27
|
-
rule 'S' => %w[a T]
|
28
|
-
rule 'A' => 'a'
|
29
|
-
rule 'A' => %w[B A]
|
30
|
-
rule 'B' => []
|
31
|
-
rule 'T' => %w[b b b]
|
32
|
-
end
|
33
|
-
builder.grammar
|
20
|
+
builder = grammar_abc_builder
|
21
|
+
builder.grammar
|
34
22
|
end
|
35
23
|
|
36
24
|
let(:sample_tokens) do
|
37
|
-
build_token_sequence(%w[a b
|
25
|
+
build_token_sequence(%w[a b c], sample_grammar)
|
38
26
|
end
|
39
27
|
|
40
28
|
let(:sample_result) do
|
@@ -44,7 +32,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
44
32
|
|
45
33
|
|
46
34
|
subject do
|
47
|
-
|
35
|
+
ParseTreeFactory.new(sample_result)
|
48
36
|
end
|
49
37
|
|
50
38
|
# Emit a text representation of the current path.
|
@@ -58,7 +46,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
58
46
|
|
59
47
|
context 'Initialization:' do
|
60
48
|
it 'should be created with a GFGParsing' do
|
61
|
-
expect {
|
49
|
+
expect { ParseTreeFactory.new(sample_result) }.not_to raise_error
|
62
50
|
end
|
63
51
|
|
64
52
|
it 'should know the parse result' do
|
@@ -66,17 +54,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
66
54
|
end
|
67
55
|
end
|
68
56
|
|
69
|
-
context 'Parse
|
70
|
-
it 'should build a parse
|
71
|
-
forest = subject.
|
72
|
-
expect(forest).to be_kind_of(
|
73
|
-
=begin
|
74
|
-
require 'yaml'
|
75
|
-
require_relative '../../../exp/lab/forest_representation'
|
76
|
-
File.open("forest.yml", "w") { |f| YAML.dump(forest, f) }
|
77
|
-
pen = ForestRepresentation.new
|
78
|
-
pen.generate_graph(forest, File.open("forest.dot", "w"))
|
79
|
-
=end
|
57
|
+
context 'Parse tree construction' do
|
58
|
+
it 'should build a parse tree' do
|
59
|
+
forest = subject.create
|
60
|
+
expect(forest).to be_kind_of(PTree::ParseTree)
|
80
61
|
end
|
81
62
|
end # context
|
82
63
|
end # describe
|
@@ -159,6 +159,12 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
159
159
|
expect(subject.name2symbol['b']).to eq(b_)
|
160
160
|
expect(subject.name2symbol['c']).to eq(c_)
|
161
161
|
end
|
162
|
+
|
163
|
+
it 'should ensure that each production has a name' do
|
164
|
+
subject.rules.each do |prod|
|
165
|
+
expect(prod.name).to match(Regexp.new("#{prod.lhs.name}\\[\\d\\]"))
|
166
|
+
end
|
167
|
+
end
|
162
168
|
end # context
|
163
169
|
|
164
170
|
context 'Grammar diagnosis:' do
|
@@ -40,6 +40,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
40
40
|
expect(instance).to be_empty
|
41
41
|
end
|
42
42
|
|
43
|
+
it 'should be anonymous at creation' do
|
44
|
+
expect(subject.name).to be_nil
|
45
|
+
end
|
46
|
+
|
43
47
|
it 'should complain if its lhs is not a non-terminal' do
|
44
48
|
err = StandardError
|
45
49
|
msg_prefix = 'Left side of production must be a non-terminal symbol'
|
@@ -48,6 +52,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
48
52
|
expect { Production.new('wrong', sequence) }.to raise_error(err, msg)
|
49
53
|
end
|
50
54
|
end # context
|
55
|
+
|
56
|
+
context 'Provided services:' do
|
57
|
+
it 'should accept a name' do
|
58
|
+
a_name = 'nominem'
|
59
|
+
subject.name = a_name
|
60
|
+
expect(subject.name).to eq(a_name)
|
61
|
+
end
|
62
|
+
end # context
|
51
63
|
end # describe
|
52
64
|
end # module
|
53
65
|
end # module
|
@@ -58,7 +58,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
58
58
|
|
59
59
|
it 'should complain when unable to compare' do
|
60
60
|
err = StandardError
|
61
|
-
msg =
|
61
|
+
msg = 'Cannot compare a SymbolSeq with a String'
|
62
62
|
expect { subject == 'dummy-text' }.to raise_error(err, msg)
|
63
63
|
end
|
64
64
|
end # context
|
data/spec/spec_helper.rb
CHANGED
@@ -6,14 +6,14 @@ require 'coveralls'
|
|
6
6
|
|
7
7
|
Coveralls.wear!
|
8
8
|
|
9
|
-
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
9
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new([
|
10
10
|
SimpleCov::Formatter::HTMLFormatter,
|
11
11
|
Coveralls::SimpleCov::Formatter
|
12
|
-
]
|
12
|
+
])
|
13
13
|
|
14
|
-
|
15
|
-
require 'rspec' # Use the RSpec framework
|
16
14
|
require 'pp' # Use pretty-print for debugging purposes
|
15
|
+
require 'rspec' # Use the RSpec framework
|
16
|
+
|
17
17
|
|
18
18
|
RSpec.configure do |config|
|
19
19
|
config.expect_with :rspec do |c|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.08
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|
@@ -135,6 +135,7 @@ files:
|
|
135
135
|
- examples/data_formats/JSON/json_demo.rb
|
136
136
|
- examples/data_formats/JSON/json_grammar.rb
|
137
137
|
- examples/data_formats/JSON/json_lexer.rb
|
138
|
+
- examples/data_formats/JSON/json_minifier.rb
|
138
139
|
- examples/data_formats/JSON/json_parser.rb
|
139
140
|
- examples/general/calc/calc_demo.rb
|
140
141
|
- examples/general/calc/calc_grammar.rb
|
@@ -173,6 +174,7 @@ files:
|
|
173
174
|
- lib/rley/parser/parse_entry_tracker.rb
|
174
175
|
- lib/rley/parser/parse_forest_builder.rb
|
175
176
|
- lib/rley/parser/parse_forest_factory.rb
|
177
|
+
- lib/rley/parser/parse_rep_creator.rb
|
176
178
|
- lib/rley/parser/parse_state.rb
|
177
179
|
- lib/rley/parser/parse_state_tracker.rb
|
178
180
|
- lib/rley/parser/parse_tracer.rb
|