rley 0.4.07 → 0.4.08
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +0 -2
- data/CHANGELOG.md +14 -9
- data/README.md +8 -9
- data/examples/data_formats/JSON/cli_options.rb +4 -3
- data/examples/data_formats/JSON/json_demo.rb +4 -1
- data/examples/data_formats/JSON/json_lexer.rb +1 -1
- data/examples/data_formats/JSON/json_minifier.rb +45 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/formatter/asciitree.rb +1 -1
- data/lib/rley/gfg/grm_flow_graph.rb +2 -3
- data/lib/rley/parser/gfg_parsing.rb +2 -2
- data/lib/rley/parser/parse_forest_builder.rb +7 -7
- data/lib/rley/parser/parse_forest_factory.rb +3 -39
- data/lib/rley/parser/parse_rep_creator.rb +53 -0
- data/lib/rley/parser/parse_tree_builder.rb +9 -7
- data/lib/rley/parser/parse_tree_factory.rb +3 -39
- data/lib/rley/parser/parse_walker_factory.rb +19 -11
- data/lib/rley/ptree/parse_tree.rb +1 -1
- data/lib/rley/ptree/terminal_node.rb +1 -1
- data/lib/rley/syntax/grammar.rb +20 -1
- data/lib/rley/syntax/grammar_builder.rb +3 -1
- data/lib/rley/syntax/production.rb +7 -1
- data/spec/rley/parser/groucho_spec.rb +1 -1
- data/spec/rley/parser/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parser/parse_forest_factory_spec.rb +1 -1
- data/spec/rley/parser/parse_tree_builder_spec.rb +7 -6
- data/spec/rley/parser/parse_tree_factory_spec.rb +13 -32
- data/spec/rley/syntax/grammar_spec.rb +6 -0
- data/spec/rley/syntax/production_spec.rb +12 -0
- data/spec/rley/syntax/symbol_seq_spec.rb +1 -1
- data/spec/spec_helper.rb +4 -4
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ce91c79f9088da09449654c9b6e1b6e0581cf1c
|
4
|
+
data.tar.gz: 3b1e3cc1ce21d3b031b7b09cdbb7606e2eb5a747
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bd55bc558c857bab67f080169c1f9d72705fb66fda177c63032fdb0e8e6674efd1119f9fafba3681593dbc9b027beb05081bfe3f38fb99f46068f94b2563282
|
7
|
+
data.tar.gz: 54186ced4b193fa133b48ecdb2cddad7df0960cd2499b2ae48bc586a657cc7a2e49466b42570d78e8be4d0d123b6c821f5c9ab75b98d2e3c3d04d4876fd61f0e
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,16 +1,24 @@
|
|
1
|
+
### 0.4.08 / 2017-08-06
|
2
|
+
* [FIX] File `/spec/spec_helper.rb` replaced deprecated syntax for `SimpleCov::Formatter::MultiFormatter` construction
|
3
|
+
* [NEW] File `examples/data_formats/JSON/json_minifier.rb` Added a working JSON minifier to the demo app.
|
4
|
+
* [NEW] Class `Syntax::Production` Each production can have a name.
|
5
|
+
* [CHANGE] File `.travis.yml`: removal of Rubinius (rbx) in the list of Ruby versions.
|
6
|
+
* [CHANGE] DRYing the classes `ParseTreeFactory` and `ParseForestFactory` by inheriting from `ParseRepCreator`
|
7
|
+
* [CHANGE] Minor documentation updates.
|
8
|
+
|
1
9
|
### 0.4.07 / 2017-05-25
|
2
|
-
* [FIX] To avoid Fixnum deprecation error in Ruby 2.4
|
10
|
+
* [FIX] To avoid Fixnum deprecation error in Ruby 2.4 and higher, all explicit references to Fixnum has been removed.
|
3
11
|
* [FIX] File `.rubocop.yml`: folder `examples` was excluded from Rubocop control. Now it is in code analysis scope.
|
4
|
-
* [NEW] File `spec
|
12
|
+
* [NEW] File `spec/.rubocop.yml` to tune the code analysis for Rspec files
|
5
13
|
* [NEW] File `examples\.rubocop.yml` to tune the code analysis for example files
|
6
14
|
* [CHANGE] Code re-styling to please Rubocop 0.49.0: less than 10 offences remain (from above 200 count!)
|
7
15
|
* [CHANGE] Added support for Ruby 2.4.x. Files `.travis.yml` and `README.md` updated.
|
8
16
|
|
9
17
|
|
10
18
|
### 0.4.06 / 2017-05-25
|
11
|
-
* [FIX] File `formatter
|
12
|
-
* [FIX] File `formatter
|
13
|
-
* [FIX] File `parser
|
19
|
+
* [FIX] File `formatter/asciitree.rb` fixed inconsistency in comments that caused Yard warnings.
|
20
|
+
* [FIX] File `formatter/bracket_notation.rb` fixed inconsistency in comments that caused Yard warnings.
|
21
|
+
* [FIX] File `parser/parse_entry_set.rb` fixed inconsistency in comments that caused Yard warnings.
|
14
22
|
* [NEW] Method `Grammar#diagnose` performs a number of checks on the grammar. It detects whether:
|
15
23
|
there are undefined non-terminals (i.e. non-terminals without a rule that define them)
|
16
24
|
there are non-productive non-terminals (i.e. non-terminals that don't derive a sting of terminals)
|
@@ -20,14 +28,11 @@
|
|
20
28
|
* [NEW] Method `GrmSymbol#generative?` inidcates whether the grammar symbol can produce a sequence of terminals.
|
21
29
|
* [CHANGE] Class `GrammarBuilder` Improved the API documentation.
|
22
30
|
|
23
|
-
|
24
31
|
### 0.4.05 / 2017-05-06
|
25
32
|
* [CHANGE] File `README.md` Added documentation on how to build parse trees and manipulate them.
|
26
|
-
* [CHANGE] File `examples
|
33
|
+
* [CHANGE] File `examples/NLP/mini_en_demo.rb` now emits different parse tree representations.
|
27
34
|
* [NEW] Directory `www`. Contains a diagram output produced from Rley and fed to online RSyntaxTree tool.
|
28
35
|
|
29
|
-
|
30
|
-
|
31
36
|
### 0.4.04 / 2017-05-01
|
32
37
|
* [NEW] `Asciitree` formatter class. Allows parse tree output in simple printable text.
|
33
38
|
* [CHANGE] Major enhancements in directory `examples\data_formats\JSON`. The demo command-line tool parses JSON and outputs the parse tree in one of the supported formats.
|
data/README.md
CHANGED
@@ -110,7 +110,7 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
110
110
|
grammar = builder.grammar
|
111
111
|
```
|
112
112
|
|
113
|
-
|
113
|
+
### Creating a lexicon
|
114
114
|
|
115
115
|
```ruby
|
116
116
|
# To simplify things, lexicon is implemented as a Hash with pairs of the form:
|
@@ -139,7 +139,7 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
139
139
|
```
|
140
140
|
|
141
141
|
|
142
|
-
|
142
|
+
### Creating a tokenizer
|
143
143
|
```ruby
|
144
144
|
# A tokenizer reads the input string and converts it into a sequence of tokens
|
145
145
|
# Highly simplified tokenizer implementation.
|
@@ -162,14 +162,14 @@ creating a lexicon and tokenizer from scratch. Here are a few Ruby Part-of-Speec
|
|
162
162
|
|
163
163
|
|
164
164
|
|
165
|
-
|
165
|
+
### Building the parser
|
166
166
|
```ruby
|
167
167
|
# Easy with Rley...
|
168
168
|
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
169
169
|
```
|
170
170
|
|
171
171
|
|
172
|
-
|
172
|
+
### Parsing some input
|
173
173
|
```ruby
|
174
174
|
input_to_parse = 'John saw Mary with a telescope'
|
175
175
|
# Convert input text into a sequence of token objects...
|
@@ -191,7 +191,7 @@ representing the parse outcome:
|
|
191
191
|
|
192
192
|
For our whirlwind tour, we will opt for parse trees.
|
193
193
|
|
194
|
-
|
194
|
+
### Generating the parse tree
|
195
195
|
|
196
196
|
```ruby
|
197
197
|
ptree = result.parse_tree
|
@@ -210,7 +210,7 @@ Let's do it by:
|
|
210
210
|
- Using one of the built-in visit subscribers specifically created to render the
|
211
211
|
parse tree in a given output format.
|
212
212
|
|
213
|
-
|
213
|
+
#### Creating a parse tree visitor
|
214
214
|
Good news: creating a parse tree visitor for the parse tree `ptree` is just
|
215
215
|
an one-liner:
|
216
216
|
|
@@ -219,7 +219,7 @@ an one-liner:
|
|
219
219
|
visitor = Rley::ParseTreeVisitor.new(ptree)
|
220
220
|
```
|
221
221
|
|
222
|
-
|
222
|
+
#### Visiting the parse tree
|
223
223
|
|
224
224
|
Unsurprisingly, to start the parse tree visit, one calls the `#start` method:
|
225
225
|
|
@@ -293,7 +293,7 @@ the tree node being visited.
|
|
293
293
|
|
294
294
|
Not really impressive? So let's use another formatter...
|
295
295
|
|
296
|
-
|
296
|
+
#### Visualizing the parse tree structure
|
297
297
|
If one replaces the previous formatter by an instance of
|
298
298
|
`Rley::Formatter::Asciitree` the output now shows the parse tree structure.
|
299
299
|
|
@@ -345,7 +345,6 @@ By the way, this tool is also a Ruby gem, [rsyntaxtree](https://rubygems.org/gem
|
|
345
345
|

|
346
346
|
|
347
347
|
|
348
|
-
|
349
348
|
## Error reporting
|
350
349
|
__Rley__ is a non-violent parser, that is, it won't throw an exception when it
|
351
350
|
detects a syntax error. Instead, the parse result will be marked as
|
@@ -26,10 +26,10 @@ class CLIOptions < Hash
|
|
26
26
|
and renders its parse tree to the standard output
|
27
27
|
in the format specified in the command-line.
|
28
28
|
|
29
|
-
Usage:
|
29
|
+
Usage: json_demo.rb [options] FILE
|
30
30
|
|
31
31
|
Examples:
|
32
|
-
|
32
|
+
json_demo --format ascii_tree sample01.json
|
33
33
|
END_BANNER
|
34
34
|
|
35
35
|
opts.separator ''
|
@@ -37,11 +37,12 @@ END_BANNER
|
|
37
37
|
format_help = <<-END_TEXT
|
38
38
|
Select the output format (default: ascii_tree). Available formats:
|
39
39
|
ascii_tree Simple text representation of parse trees
|
40
|
+
minify Strip all unnecessary whitespace in the input json file
|
40
41
|
labelled Labelled square notation (LBN)
|
41
42
|
Use online tools (e.g. http://yohasebe.com/rsyntaxtree/)
|
42
43
|
to visualize parse trees from LBN output.
|
43
44
|
END_TEXT
|
44
|
-
formats = %i[ascii_tree labelled]
|
45
|
+
formats = %i[ascii_tree labelled minify]
|
45
46
|
opts.on('-f', '--format FORMAT', formats, format_help) do |frm|
|
46
47
|
self[:format] = frm
|
47
48
|
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
require_relative 'cli_options'
|
2
2
|
require_relative 'json_parser'
|
3
|
+
require_relative 'json_minifier'
|
3
4
|
|
4
5
|
prog_name = 'json_demo'
|
5
|
-
prog_version = '0.
|
6
|
+
prog_version = '0.2.0'
|
6
7
|
|
7
8
|
cli_options = CLIOptions.new(prog_name, prog_version, ARGV)
|
8
9
|
if ARGV.empty?
|
@@ -32,6 +33,8 @@ case cli_options[:format]
|
|
32
33
|
renderer = Rley::Formatter::Asciitree.new($stdout)
|
33
34
|
when :labelled
|
34
35
|
renderer = Rley::Formatter::BracketNotation.new($stdout)
|
36
|
+
when :minify
|
37
|
+
renderer = JSONMinifier.new($stdout)
|
35
38
|
end
|
36
39
|
|
37
40
|
# Let's create a parse tree visitor
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# File: json_minifier.rb
|
2
|
+
|
3
|
+
|
4
|
+
# A JSON minifier, it removes unnecessary whitespaces in a JSON expression.
|
5
|
+
# It typically reduces size by half.
|
6
|
+
class JSONMinifier
|
7
|
+
# The IO output stream in which the formatter's result will be sent.
|
8
|
+
attr_reader(:output)
|
9
|
+
|
10
|
+
# Constructor.
|
11
|
+
# @param anIO [IO] an output IO where the formatter's result will
|
12
|
+
# be placed.
|
13
|
+
def initialize(anIO)
|
14
|
+
@output = anIO
|
15
|
+
end
|
16
|
+
|
17
|
+
# Given a parse tree visitor, perform the visit
|
18
|
+
# and render the visit events in the output stream.
|
19
|
+
# @param aVisitor [ParseTreeVisitor]
|
20
|
+
def render(aVisitor)
|
21
|
+
aVisitor.subscribe(self)
|
22
|
+
aVisitor.start
|
23
|
+
aVisitor.unsubscribe(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
27
|
+
# Notification of a visit event: the visitor is about to visit
|
28
|
+
# a terminal node. The only thing the JSON minifier has to do is
|
29
|
+
# to render the input tokens almost as they appear initially.
|
30
|
+
# @param aTerm [TerminalNode]
|
31
|
+
def before_terminal(aTerm)
|
32
|
+
# Lexeme is the original text representation of the token
|
33
|
+
lexeme = aTerm.token.lexeme
|
34
|
+
literal = if aTerm.symbol.name == 'string'
|
35
|
+
# String values are delimited by double quotes
|
36
|
+
'"' + lexeme + '"'
|
37
|
+
else
|
38
|
+
lexeme
|
39
|
+
end
|
40
|
+
|
41
|
+
output << literal
|
42
|
+
end
|
43
|
+
end # class
|
44
|
+
|
45
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
@@ -41,7 +41,7 @@ module Rley # This module is used as a namespace
|
|
41
41
|
# Notification of a visit event: the visitor is about to visit
|
42
42
|
# the children of a non-terminal node
|
43
43
|
# @param parent [NonTerminalNode]
|
44
|
-
# @param
|
44
|
+
# @param _children [Array<ParseTreeNode>] array of children nodes
|
45
45
|
def before_subnodes(parent, _children)
|
46
46
|
rank_of(parent)
|
47
47
|
curr_path << parent
|
@@ -67,10 +67,9 @@ module Rley # This module is used as a namespace
|
|
67
67
|
|
68
68
|
# Walk over all the vertices of the graph that are reachable from a given
|
69
69
|
# start vertex. This is a depth-first graph traversal.
|
70
|
-
# @param aStartVertex [StartVertex] the depth-first traversal begins
|
70
|
+
# @param aStartVertex [StartVertex] the depth-first traversal begins
|
71
71
|
# from here
|
72
|
-
# @param
|
73
|
-
# is found
|
72
|
+
# @param _visitAction [Proc] block called when a new graph vertex is found
|
74
73
|
def traverse_df(aStartVertex, &_visitAction)
|
75
74
|
visited = Set.new
|
76
75
|
stack = []
|
@@ -146,7 +146,7 @@ module Rley # This module is used as a namespace
|
|
146
146
|
def parse_forest()
|
147
147
|
factory = ParseForestFactory.new(self)
|
148
148
|
|
149
|
-
return factory.
|
149
|
+
return factory.create
|
150
150
|
end
|
151
151
|
|
152
152
|
# Factory method. Builds a ParseTree from the parse result.
|
@@ -154,7 +154,7 @@ module Rley # This module is used as a namespace
|
|
154
154
|
def parse_tree()
|
155
155
|
factory = ParseTreeFactory.new(self)
|
156
156
|
|
157
|
-
return factory.
|
157
|
+
return factory.create
|
158
158
|
end
|
159
159
|
|
160
160
|
# Retrieve the very first parse entry added to the chart.
|
@@ -17,8 +17,8 @@ module Rley # This module is used as a namespace
|
|
17
17
|
# The sequence of input tokens
|
18
18
|
attr_reader(:tokens)
|
19
19
|
|
20
|
-
# Link to forest object
|
21
|
-
attr_reader(:
|
20
|
+
# Link to forest object (being) built
|
21
|
+
attr_reader(:result)
|
22
22
|
|
23
23
|
# Link to current path
|
24
24
|
attr_reader(:curr_path)
|
@@ -77,7 +77,7 @@ module Rley # This module is used as a namespace
|
|
77
77
|
range = { low: anEntry.origin, high: anIndex }
|
78
78
|
non_terminal = anEntry.vertex.non_terminal
|
79
79
|
create_non_terminal_node(anEntry, range, non_terminal)
|
80
|
-
@
|
80
|
+
@result = create_forest(curr_parent) unless @last_visitee
|
81
81
|
|
82
82
|
when :backtrack
|
83
83
|
# Restore path
|
@@ -187,7 +187,7 @@ module Rley # This module is used as a namespace
|
|
187
187
|
range = curr_parent.range
|
188
188
|
alternative = Rley::SPPF::AlternativeNode.new(vertex, range)
|
189
189
|
add_subnode(alternative)
|
190
|
-
|
190
|
+
result.is_ambiguous = true
|
191
191
|
# puts "FOREST ADD #{alternative.key}"
|
192
192
|
|
193
193
|
return alternative
|
@@ -218,11 +218,11 @@ module Rley # This module is used as a namespace
|
|
218
218
|
# Add the given node if not yet present in parse forest
|
219
219
|
def add_node_to_forest(aNode)
|
220
220
|
key_node = aNode.key
|
221
|
-
if
|
222
|
-
new_node =
|
221
|
+
if result.include?(key_node)
|
222
|
+
new_node = result.key2node[key_node]
|
223
223
|
else
|
224
224
|
new_node = aNode
|
225
|
-
|
225
|
+
result.key2node[key_node] = new_node
|
226
226
|
# puts "FOREST ADD #{key_node}"
|
227
227
|
end
|
228
228
|
add_subnode(new_node, false)
|
@@ -1,49 +1,13 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative 'parse_rep_creator'
|
2
2
|
require_relative 'parse_forest_builder'
|
3
3
|
|
4
4
|
module Rley # This module is used as a namespace
|
5
5
|
module Parser # This module is used as a namespace
|
6
6
|
# Utility class that helps to create a ParseForest from
|
7
7
|
# a given Parsing object.
|
8
|
-
class ParseForestFactory
|
9
|
-
# Link to Parsing object (= results of recognizer)
|
10
|
-
attr_reader(:parsing)
|
8
|
+
class ParseForestFactory < ParseRepCreator
|
11
9
|
|
12
|
-
|
13
|
-
def initialize(aParsingResult)
|
14
|
-
@parsing = aParsingResult
|
15
|
-
end
|
16
|
-
|
17
|
-
# Factory that produces the parse forest
|
18
|
-
def build_parse_forest()
|
19
|
-
a_walker = walker(parsing)
|
20
|
-
a_builder = builder(parsing)
|
21
|
-
|
22
|
-
begin
|
23
|
-
loop do
|
24
|
-
event = a_walker.next
|
25
|
-
# puts "EVENT #{event[0]} #{event[1]}"
|
26
|
-
a_builder.receive_event(*event)
|
27
|
-
end
|
28
|
-
rescue StopIteration
|
29
|
-
# Do nothing
|
30
|
-
end
|
31
|
-
|
32
|
-
return a_builder.forest
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# Create a Parsing walker, that is, an object
|
39
|
-
# that will iterate over the relevant nodes (= parsing entries)
|
40
|
-
# of a GFGParsing
|
41
|
-
def walker(aParseResult)
|
42
|
-
walker_factory = ParseWalkerFactory.new
|
43
|
-
accept_entry = aParseResult.accepting_entry
|
44
|
-
accept_index = aParseResult.chart.last_index
|
45
|
-
walker_factory.build_walker(accept_entry, accept_index)
|
46
|
-
end
|
10
|
+
protected
|
47
11
|
|
48
12
|
# Create a Builder, that is, an object
|
49
13
|
# that will create piece by piece the forest
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require_relative 'parse_walker_factory'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
# Utility class that helps to create a representation of a parse from
|
6
|
+
# a given Parsing object.
|
7
|
+
class ParseRepCreator
|
8
|
+
# @return [GFGParsing] Link to Parsing object (= results of recognizer)
|
9
|
+
attr_reader(:parsing)
|
10
|
+
|
11
|
+
# Constructor. Creates and initialize a ParseRepCreator instance.
|
12
|
+
# @return [ParseRepCreator]
|
13
|
+
def initialize(aParsingResult)
|
14
|
+
@parsing = aParsingResult
|
15
|
+
end
|
16
|
+
|
17
|
+
# Factory method that produces the representation of the parse.
|
18
|
+
# @return [ParseTree] The parse representation.
|
19
|
+
def create()
|
20
|
+
a_walker = walker(parsing)
|
21
|
+
a_builder = builder(parsing)
|
22
|
+
|
23
|
+
begin
|
24
|
+
loop do
|
25
|
+
event = a_walker.next
|
26
|
+
# puts "EVENT #{event[0]} #{event[1]}"
|
27
|
+
a_builder.receive_event(*event)
|
28
|
+
end
|
29
|
+
rescue StopIteration
|
30
|
+
# Do nothing
|
31
|
+
end
|
32
|
+
|
33
|
+
return a_builder.result
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Create a Parsing walker, that is, an object
|
39
|
+
# that will iterate over the relevant nodes (= parsing entries)
|
40
|
+
# of a GFGParsing
|
41
|
+
def walker(aParseResult)
|
42
|
+
walker_factory = ParseWalkerFactory.new
|
43
|
+
accept_entry = aParseResult.accepting_entry
|
44
|
+
accept_index = aParseResult.chart.last_index
|
45
|
+
walker_factory.build_walker(accept_entry, accept_index)
|
46
|
+
end
|
47
|
+
|
48
|
+
end # class
|
49
|
+
end # module
|
50
|
+
end # module
|
51
|
+
|
52
|
+
# End of file
|
53
|
+
|
@@ -9,15 +9,16 @@ require_relative '../ptree/parse_tree'
|
|
9
9
|
|
10
10
|
module Rley # This module is used as a namespace
|
11
11
|
module Parser # This module is used as a namespace
|
12
|
-
# Builder GoF pattern.
|
12
|
+
# Builder GoF pattern.
|
13
|
+
# The Builder pattern creates a complex object
|
13
14
|
# (say, a parse tree) from simpler objects (terminal and non-terminal
|
14
15
|
# nodes) and using a step by step approach.
|
15
16
|
class ParseTreeBuilder
|
16
|
-
# The sequence of input tokens
|
17
|
+
# @return [Array<Token>] The sequence of input tokens
|
17
18
|
attr_reader(:tokens)
|
18
19
|
|
19
|
-
# Link to tree object
|
20
|
-
attr_reader(:
|
20
|
+
# Link to tree object (being) built
|
21
|
+
attr_reader(:result)
|
21
22
|
|
22
23
|
# Link to current path
|
23
24
|
attr_reader(:curr_path)
|
@@ -28,7 +29,8 @@ module Rley # This module is used as a namespace
|
|
28
29
|
# A hash with pairs of the form: visited parse entry => tree node
|
29
30
|
attr_reader(:entry2node)
|
30
31
|
|
31
|
-
|
32
|
+
# Create a new builder instance.
|
33
|
+
# @param theTokens [Array<Token>] The sequence of input tokens.
|
32
34
|
def initialize(theTokens)
|
33
35
|
@tokens = theTokens
|
34
36
|
@curr_path = []
|
@@ -71,7 +73,7 @@ module Rley # This module is used as a namespace
|
|
71
73
|
range = { low: anEntry.origin, high: anIndex }
|
72
74
|
non_terminal = anEntry.vertex.non_terminal
|
73
75
|
create_non_terminal_node(anEntry, range, non_terminal)
|
74
|
-
@
|
76
|
+
@result = create_tree(curr_parent) unless @last_visitee
|
75
77
|
else
|
76
78
|
raise NotImplementedError
|
77
79
|
end
|
@@ -143,7 +145,7 @@ module Rley # This module is used as a namespace
|
|
143
145
|
range = curr_parent.range
|
144
146
|
alternative = Rley::PTree::AlternativeNode.new(vertex, range)
|
145
147
|
add_subnode(alternative)
|
146
|
-
|
148
|
+
result.is_ambiguous = true
|
147
149
|
# puts "FOREST ADD #{alternative.key}"
|
148
150
|
|
149
151
|
return alternative
|
@@ -1,49 +1,13 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative 'parse_rep_creator'
|
2
2
|
require_relative 'parse_tree_builder'
|
3
3
|
|
4
4
|
module Rley # This module is used as a namespace
|
5
5
|
module Parser # This module is used as a namespace
|
6
6
|
# Utility class that helps to create a ParseTree from
|
7
7
|
# a given Parsing object.
|
8
|
-
class ParseTreeFactory
|
9
|
-
# Link to Parsing object (= results of recognizer)
|
10
|
-
attr_reader(:parsing)
|
8
|
+
class ParseTreeFactory < ParseRepCreator
|
11
9
|
|
12
|
-
|
13
|
-
def initialize(aParsingResult)
|
14
|
-
@parsing = aParsingResult
|
15
|
-
end
|
16
|
-
|
17
|
-
# Factory that produces the parse tree
|
18
|
-
def build_parse_tree()
|
19
|
-
a_walker = walker(parsing)
|
20
|
-
a_builder = builder(parsing)
|
21
|
-
|
22
|
-
begin
|
23
|
-
loop do
|
24
|
-
event = a_walker.next
|
25
|
-
# puts "EVENT #{event[0]} #{event[1]}"
|
26
|
-
a_builder.receive_event(*event)
|
27
|
-
end
|
28
|
-
rescue StopIteration
|
29
|
-
# Do nothing
|
30
|
-
end
|
31
|
-
|
32
|
-
return a_builder.tree
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
# Create a Parsing walker, that is, an object
|
39
|
-
# that will iterate over the relevant nodes (= parsing entries)
|
40
|
-
# of a GFGParsing
|
41
|
-
def walker(aParseResult)
|
42
|
-
walker_factory = ParseWalkerFactory.new
|
43
|
-
accept_entry = aParseResult.accepting_entry
|
44
|
-
accept_index = aParseResult.chart.last_index
|
45
|
-
walker_factory.build_walker(accept_entry, accept_index)
|
46
|
-
end
|
10
|
+
protected
|
47
11
|
|
48
12
|
# Create a Builder, that is, an object
|
49
13
|
# that will create piece by piece the forest
|
@@ -8,16 +8,20 @@ require_relative '../gfg/start_vertex'
|
|
8
8
|
|
9
9
|
module Rley # This module is used as a namespace
|
10
10
|
module Parser # This module is used as a namespace
|
11
|
+
# Utility class used internally by the Enumerator created
|
12
|
+
# with a ParseWalkerContext object. It holds the state of
|
13
|
+
# the walk over a GFGParsing object.
|
11
14
|
ParseWalkerContext = Struct.new(
|
12
|
-
:curr_entry, #
|
13
|
-
:entry_set_index, #
|
14
|
-
:visitees, # The set of already visited
|
15
|
+
:curr_entry, # @return [ParseEntry] entry being visited
|
16
|
+
:entry_set_index, # @return [Integer] Set index of current parse entry
|
17
|
+
:visitees, # @return [Set<ParseEntry>] The set of already visited entries
|
15
18
|
:nterm2start, # Nested hashes. Pairs of first level are of the form:
|
16
19
|
# non-terminal symbol => { index(=origin) => start entry }
|
17
|
-
:return_stack, # A stack of parse entries
|
20
|
+
:return_stack, # @return [Array<ParseEntry>] A stack of parse entries
|
18
21
|
:backtrack_points
|
19
22
|
)
|
20
23
|
|
24
|
+
|
21
25
|
WalkerBacktrackpoint = Struct.new(
|
22
26
|
:entry_set_index, # Sigma set index of current parse entry
|
23
27
|
:return_stack, # A stack of parse entries
|
@@ -25,12 +29,11 @@ module Rley # This module is used as a namespace
|
|
25
29
|
:antecedent_index
|
26
30
|
)
|
27
31
|
|
28
|
-
# A factory that creates an
|
29
|
-
#
|
30
|
-
# The walker yields visit events.
|
31
|
-
#
|
32
|
-
# for a given GFGParsing object.
|
33
|
-
# distinct for the GFGParsing.
|
32
|
+
# A factory that creates an Enumerator object
|
33
|
+
# that itself walks through a GFGParsing object.
|
34
|
+
# The walker (= Enumerator) yields visit events.
|
35
|
+
# This class implements an external iterator
|
36
|
+
# for a given GFGParsing object.
|
34
37
|
# This is different from the internal iterators, usually implemented
|
35
38
|
# in Ruby with an :each method.
|
36
39
|
# Allows to perform a backwards traversal over the relevant parse entries.
|
@@ -40,7 +43,12 @@ module Rley # This module is used as a namespace
|
|
40
43
|
# (i.e. they belong to a path that leads to the accepting parse entry)
|
41
44
|
class ParseWalkerFactory
|
42
45
|
# Build an Enumerator that will yield the parse entries as it
|
43
|
-
# walks backwards on the parse graph
|
46
|
+
# walks backwards on the parse graph.
|
47
|
+
# @param acceptingEntry [ParseEntry] the final ParseEntry of a
|
48
|
+
# successful parse.
|
49
|
+
# @param maxIndex [Integer] the index of the last input token.
|
50
|
+
# @return [Enumerator] yields visit events when walking over the
|
51
|
+
# parse result
|
44
52
|
def build_walker(acceptingEntry, maxIndex)
|
45
53
|
# Local context for the enumerator
|
46
54
|
ctx = init_context(acceptingEntry, maxIndex)
|
@@ -12,7 +12,7 @@ module Rley # This module is used as a namespace
|
|
12
12
|
# during the parse.
|
13
13
|
# The root node corresponds to the main/start symbol of the grammar.
|
14
14
|
class ParseTree
|
15
|
-
# The root node of the tree
|
15
|
+
# @return [ParseTreeNode] The root node of the tree.
|
16
16
|
attr_reader(:root)
|
17
17
|
|
18
18
|
# @param theRootNode [ParseTreeNode] The root node of the parse tree.
|
@@ -8,7 +8,7 @@ module Rley # This module is used as a namespace
|
|
8
8
|
|
9
9
|
# aPosition is the position of the token in the input stream.
|
10
10
|
def initialize(aToken, aPos)
|
11
|
-
(major, minor) =
|
11
|
+
# (major, minor) =
|
12
12
|
|
13
13
|
# Use '1.class' trick to support both Integer and Fixnum classes
|
14
14
|
range = aPos.kind_of?(1.class) ? { low: aPos, high: aPos + 1 } : aPos
|
data/lib/rley/syntax/grammar.rb
CHANGED
@@ -31,7 +31,10 @@ module Rley # This module is used as a namespace
|
|
31
31
|
@symbols = []
|
32
32
|
@name2symbol = {}
|
33
33
|
valid_productions = validate_productions(theProductions)
|
34
|
-
valid_productions.each
|
34
|
+
valid_productions.each do |prod|
|
35
|
+
add_production(prod)
|
36
|
+
name_production(prod)
|
37
|
+
end
|
35
38
|
diagnose
|
36
39
|
|
37
40
|
# TODO: use topological sorting
|
@@ -65,6 +68,22 @@ module Rley # This module is used as a namespace
|
|
65
68
|
|
66
69
|
aProduction.rhs.each { |symb| add_symbol(symb) }
|
67
70
|
end
|
71
|
+
|
72
|
+
def name_production(aProduction)
|
73
|
+
if aProduction.name.nil?
|
74
|
+
index = rules.find_index(aProduction)
|
75
|
+
prefix = aProduction.lhs.name.dup
|
76
|
+
previous = index.zero? ? nil : rules[index - 1]
|
77
|
+
if previous.nil? || previous.lhs != aProduction.lhs
|
78
|
+
suffix = '[0]'
|
79
|
+
else
|
80
|
+
prev_serial = previous.name.match(/\[(\d+)\]$/)
|
81
|
+
suffix = "[#{prev_serial[1].to_i + 1}]"
|
82
|
+
end
|
83
|
+
|
84
|
+
aProduction.name = prefix + suffix
|
85
|
+
end
|
86
|
+
end
|
68
87
|
|
69
88
|
# Perform some check of the grammar.
|
70
89
|
def diagnose()
|
@@ -67,7 +67,7 @@ module Rley # This module is used as a namespace
|
|
67
67
|
# builder.rule 'A' => %w[a A c] # Call parentheses are optional
|
68
68
|
# @param aProductionRepr [Hash{String, Array<String>}]
|
69
69
|
# A Hash-based representation of a production.
|
70
|
-
# @return [
|
70
|
+
# @return [Production] The created Production instance
|
71
71
|
def add_production(aProductionRepr)
|
72
72
|
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
73
73
|
lhs = get_nonterminal(lhs_name)
|
@@ -82,6 +82,8 @@ module Rley # This module is used as a namespace
|
|
82
82
|
new_prod = Production.new(lhs, rhs_constituents)
|
83
83
|
productions << new_prod
|
84
84
|
end
|
85
|
+
|
86
|
+
return productions.last
|
85
87
|
end
|
86
88
|
|
87
89
|
# Given the grammar symbols and productions added to the builder,
|
@@ -16,6 +16,9 @@ module Rley # This module is used as a namespace
|
|
16
16
|
# @return [NonTerminal] The left-hand side of the rule.
|
17
17
|
attr_reader(:lhs)
|
18
18
|
|
19
|
+
# @return [String] The unique name of the production rule.
|
20
|
+
attr_accessor(:name)
|
21
|
+
|
19
22
|
# @return [Boolean ]A production is generative when all of its
|
20
23
|
# rhs members are generative (that is, they can each generate/derive
|
21
24
|
# a non-empty string of terminals).
|
@@ -26,9 +29,12 @@ module Rley # This module is used as a namespace
|
|
26
29
|
alias body rhs
|
27
30
|
alias head lhs
|
28
31
|
|
32
|
+
# Create a Production instance.
|
33
|
+
# @param aNonTerminal [NonTerminal] The left-hand side of the rule.
|
34
|
+
# @param theSymbols [list<Terminal | NonTerminal>] symbols of rhs.
|
29
35
|
def initialize(aNonTerminal, theSymbols)
|
30
36
|
@lhs = valid_lhs(aNonTerminal)
|
31
|
-
@rhs = SymbolSeq.new(theSymbols)
|
37
|
+
@rhs = SymbolSeq.new(theSymbols)
|
32
38
|
end
|
33
39
|
|
34
40
|
# Is the rhs empty?
|
@@ -97,7 +97,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
97
97
|
|
98
98
|
it 'should initialize the root node' do
|
99
99
|
next_event(:visit, 'Phi. | 0')
|
100
|
-
forest = subject.
|
100
|
+
forest = subject.result
|
101
101
|
|
102
102
|
expect(forest.root.to_string(0)).to eq('Phi[0, 4]')
|
103
103
|
expected_curr_path('Phi[0, 4]')
|
@@ -68,7 +68,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
68
68
|
|
69
69
|
context 'Parse forest construction' do
|
70
70
|
it 'should build a parse forest' do
|
71
|
-
forest = subject.
|
71
|
+
forest = subject.create
|
72
72
|
expect(forest).to be_kind_of(SPPF::ParseForest)
|
73
73
|
=begin
|
74
74
|
require 'yaml'
|
@@ -81,7 +81,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
81
81
|
|
82
82
|
it 'should initialize the root node' do
|
83
83
|
next_event(:visit, 'P. | 0')
|
84
|
-
tree = subject.
|
84
|
+
tree = subject.result
|
85
85
|
|
86
86
|
expect(tree.root.to_string(0)).to eq('P[0, 5]')
|
87
87
|
expected_curr_path('P[0, 5]')
|
@@ -227,12 +227,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
227
227
|
end
|
228
228
|
|
229
229
|
# Lightweight sanity check
|
230
|
-
expect(subject.
|
231
|
-
expect(subject.
|
232
|
-
expect(subject.
|
233
|
-
expect(subject.
|
234
|
-
child_node = subject.
|
230
|
+
expect(subject.result).not_to be_nil
|
231
|
+
expect(subject.result).to be_kind_of(PTree::ParseTree)
|
232
|
+
expect(subject.result.root.to_s).to eq('P[0, 5]')
|
233
|
+
expect(subject.result.root.subnodes.size).to eq(1)
|
234
|
+
child_node = subject.result.root.subnodes[0]
|
235
235
|
expect(child_node.to_s).to eq('S[0, 5]')
|
236
|
+
|
236
237
|
expect(child_node.subnodes.size).to eq(3)
|
237
238
|
first_grandchild = child_node.subnodes[0]
|
238
239
|
expect(first_grandchild.to_s).to eq('S[0, 1]')
|
@@ -1,40 +1,28 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
2
|
|
3
3
|
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
4
|
-
|
5
4
|
require_relative '../../../lib/rley/syntax/grammar_builder'
|
6
5
|
require_relative '../support/grammar_helper'
|
6
|
+
require_relative '../support/grammar_abc_helper'
|
7
7
|
require_relative '../support/expectation_helper'
|
8
8
|
|
9
9
|
# Load the class under test
|
10
|
-
require_relative '../../../lib/rley/parser/
|
10
|
+
require_relative '../../../lib/rley/parser/parse_tree_factory'
|
11
11
|
|
12
12
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
13
13
|
module Parser
|
14
|
-
describe
|
14
|
+
describe ParseTreeFactory do
|
15
15
|
include GrammarHelper # Mix-in with token factory method
|
16
16
|
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
17
|
+
include GrammarABCHelper # Mix-in for a sample grammar
|
17
18
|
|
18
19
|
let(:sample_grammar) do
|
19
|
-
|
20
|
-
|
21
|
-
# Notes in Theoretical Computer Science 203, (2008), pp. 53-67
|
22
|
-
# contains a hidden left recursion and a cycle
|
23
|
-
builder = Syntax::GrammarBuilder.new do
|
24
|
-
add_terminals('a', 'b')
|
25
|
-
rule 'Phi' => 'S'
|
26
|
-
rule 'S' => %w[A T]
|
27
|
-
rule 'S' => %w[a T]
|
28
|
-
rule 'A' => 'a'
|
29
|
-
rule 'A' => %w[B A]
|
30
|
-
rule 'B' => []
|
31
|
-
rule 'T' => %w[b b b]
|
32
|
-
end
|
33
|
-
builder.grammar
|
20
|
+
builder = grammar_abc_builder
|
21
|
+
builder.grammar
|
34
22
|
end
|
35
23
|
|
36
24
|
let(:sample_tokens) do
|
37
|
-
build_token_sequence(%w[a b
|
25
|
+
build_token_sequence(%w[a b c], sample_grammar)
|
38
26
|
end
|
39
27
|
|
40
28
|
let(:sample_result) do
|
@@ -44,7 +32,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
44
32
|
|
45
33
|
|
46
34
|
subject do
|
47
|
-
|
35
|
+
ParseTreeFactory.new(sample_result)
|
48
36
|
end
|
49
37
|
|
50
38
|
# Emit a text representation of the current path.
|
@@ -58,7 +46,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
58
46
|
|
59
47
|
context 'Initialization:' do
|
60
48
|
it 'should be created with a GFGParsing' do
|
61
|
-
expect {
|
49
|
+
expect { ParseTreeFactory.new(sample_result) }.not_to raise_error
|
62
50
|
end
|
63
51
|
|
64
52
|
it 'should know the parse result' do
|
@@ -66,17 +54,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
66
54
|
end
|
67
55
|
end
|
68
56
|
|
69
|
-
context 'Parse
|
70
|
-
it 'should build a parse
|
71
|
-
forest = subject.
|
72
|
-
expect(forest).to be_kind_of(
|
73
|
-
=begin
|
74
|
-
require 'yaml'
|
75
|
-
require_relative '../../../exp/lab/forest_representation'
|
76
|
-
File.open("forest.yml", "w") { |f| YAML.dump(forest, f) }
|
77
|
-
pen = ForestRepresentation.new
|
78
|
-
pen.generate_graph(forest, File.open("forest.dot", "w"))
|
79
|
-
=end
|
57
|
+
context 'Parse tree construction' do
|
58
|
+
it 'should build a parse tree' do
|
59
|
+
forest = subject.create
|
60
|
+
expect(forest).to be_kind_of(PTree::ParseTree)
|
80
61
|
end
|
81
62
|
end # context
|
82
63
|
end # describe
|
@@ -159,6 +159,12 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
159
159
|
expect(subject.name2symbol['b']).to eq(b_)
|
160
160
|
expect(subject.name2symbol['c']).to eq(c_)
|
161
161
|
end
|
162
|
+
|
163
|
+
it 'should ensure that each production has a name' do
|
164
|
+
subject.rules.each do |prod|
|
165
|
+
expect(prod.name).to match(Regexp.new("#{prod.lhs.name}\\[\\d\\]"))
|
166
|
+
end
|
167
|
+
end
|
162
168
|
end # context
|
163
169
|
|
164
170
|
context 'Grammar diagnosis:' do
|
@@ -40,6 +40,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
40
40
|
expect(instance).to be_empty
|
41
41
|
end
|
42
42
|
|
43
|
+
it 'should be anonymous at creation' do
|
44
|
+
expect(subject.name).to be_nil
|
45
|
+
end
|
46
|
+
|
43
47
|
it 'should complain if its lhs is not a non-terminal' do
|
44
48
|
err = StandardError
|
45
49
|
msg_prefix = 'Left side of production must be a non-terminal symbol'
|
@@ -48,6 +52,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
48
52
|
expect { Production.new('wrong', sequence) }.to raise_error(err, msg)
|
49
53
|
end
|
50
54
|
end # context
|
55
|
+
|
56
|
+
context 'Provided services:' do
|
57
|
+
it 'should accept a name' do
|
58
|
+
a_name = 'nominem'
|
59
|
+
subject.name = a_name
|
60
|
+
expect(subject.name).to eq(a_name)
|
61
|
+
end
|
62
|
+
end # context
|
51
63
|
end # describe
|
52
64
|
end # module
|
53
65
|
end # module
|
@@ -58,7 +58,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
58
58
|
|
59
59
|
it 'should complain when unable to compare' do
|
60
60
|
err = StandardError
|
61
|
-
msg =
|
61
|
+
msg = 'Cannot compare a SymbolSeq with a String'
|
62
62
|
expect { subject == 'dummy-text' }.to raise_error(err, msg)
|
63
63
|
end
|
64
64
|
end # context
|
data/spec/spec_helper.rb
CHANGED
@@ -6,14 +6,14 @@ require 'coveralls'
|
|
6
6
|
|
7
7
|
Coveralls.wear!
|
8
8
|
|
9
|
-
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
9
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new([
|
10
10
|
SimpleCov::Formatter::HTMLFormatter,
|
11
11
|
Coveralls::SimpleCov::Formatter
|
12
|
-
]
|
12
|
+
])
|
13
13
|
|
14
|
-
|
15
|
-
require 'rspec' # Use the RSpec framework
|
16
14
|
require 'pp' # Use pretty-print for debugging purposes
|
15
|
+
require 'rspec' # Use the RSpec framework
|
16
|
+
|
17
17
|
|
18
18
|
RSpec.configure do |config|
|
19
19
|
config.expect_with :rspec do |c|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.08
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|
@@ -135,6 +135,7 @@ files:
|
|
135
135
|
- examples/data_formats/JSON/json_demo.rb
|
136
136
|
- examples/data_formats/JSON/json_grammar.rb
|
137
137
|
- examples/data_formats/JSON/json_lexer.rb
|
138
|
+
- examples/data_formats/JSON/json_minifier.rb
|
138
139
|
- examples/data_formats/JSON/json_parser.rb
|
139
140
|
- examples/general/calc/calc_demo.rb
|
140
141
|
- examples/general/calc/calc_grammar.rb
|
@@ -173,6 +174,7 @@ files:
|
|
173
174
|
- lib/rley/parser/parse_entry_tracker.rb
|
174
175
|
- lib/rley/parser/parse_forest_builder.rb
|
175
176
|
- lib/rley/parser/parse_forest_factory.rb
|
177
|
+
- lib/rley/parser/parse_rep_creator.rb
|
176
178
|
- lib/rley/parser/parse_state.rb
|
177
179
|
- lib/rley/parser/parse_state_tracker.rb
|
178
180
|
- lib/rley/parser/parse_tracer.rb
|