dendroid 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/dendroid/grm_dsl/base_grm_builder.rb +169 -0
- data/lib/dendroid/syntax/grm_symbol.rb +1 -0
- data/spec/dendroid/grm_dsl/base_grm_builder_spec.rb +106 -0
- data/spec/dendroid/syntax/grammar_spec.rb +0 -2
- data/version.txt +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '06667077966c902b083c8e73debebc0808bbc0952243712d19a34f26f9d0c1ba'
|
4
|
+
data.tar.gz: 56e29eb75f509f37212d9bc384c139b6e903b730308fa320e84dc4f352c93f2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e135fc4c2ce34cf54d226d6ed82e188174d99220645abb127877b8d7a97ea77bbce77f8b8a731c390b362e592f961f758078c1af8618c339e41b6564052f1bf3
|
7
|
+
data.tar.gz: 8b30951295517fbf46fa2a1560b4e27dbae13b101cff1841142b641711b4be6696cb50e16333e03dbcb472fa4803380a8e706c85ddab69837db148b69f8ffae4
|
data/CHANGELOG.md
CHANGED
@@ -0,0 +1,169 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\syntax\terminal'
|
4
|
+
require_relative '..\syntax\non_terminal'
|
5
|
+
require_relative '..\syntax\symbol_seq'
|
6
|
+
require_relative '..\syntax\production'
|
7
|
+
require_relative '..\syntax\choice'
|
8
|
+
require_relative '..\syntax\grammar'
|
9
|
+
|
10
|
+
module Dendroid
|
11
|
+
# This module contains classes that define Domain-Specific Language specialized
|
12
|
+
# in grammar definition.
|
13
|
+
module GrmDSL
|
14
|
+
# Builder GoF pattern: Builder builds a complex object.
|
15
|
+
# here the builder creates a grammar from simpler objects
|
16
|
+
# (symbols and production rules)
|
17
|
+
# and using a step by step approach.
|
18
|
+
class BaseGrmBuilder
|
19
|
+
# @return [Symbol] one of: :declaring, :building, :complete
|
20
|
+
attr_reader :state
|
21
|
+
|
22
|
+
# @return [Hash{String, Dendroid::Syntax::GrmSymbol}] The mapping of grammar symbol names
|
23
|
+
# to the matching grammar symbol object.
|
24
|
+
attr_reader :symbols
|
25
|
+
|
26
|
+
# @return [Array<Dendroid::Syntax::Rule>] The list of rules of the grammar
|
27
|
+
attr_reader :rules
|
28
|
+
|
29
|
+
# Creates a new grammar builder object.
|
30
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
31
|
+
# @example Building a tiny English grammar
|
32
|
+
# builder = Rley::Syntax::GrammarBuilder.new do
|
33
|
+
# declare_terminals('n', 'v', 'adj', 'det')
|
34
|
+
# rule 'S' => 'NP VP'
|
35
|
+
# rule 'VP' => 'v NP'
|
36
|
+
# rule 'NP' => ['det n', 'adj NP']
|
37
|
+
# end
|
38
|
+
# # Now with `builder`, let's create the grammar
|
39
|
+
# tiny_eng = builder.grammar
|
40
|
+
def initialize(&aBlock)
|
41
|
+
@symbols = {}
|
42
|
+
@rules = []
|
43
|
+
@state = :declaring
|
44
|
+
|
45
|
+
if block_given?
|
46
|
+
instance_exec(&aBlock)
|
47
|
+
grammar_complete!
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Add the given terminal symbols to the grammar of the language
|
52
|
+
# @param terminalSymbols [String, Terminal] 1..* terminal symbols.
|
53
|
+
# @return [void]
|
54
|
+
def declare_terminals(*terminalSymbols)
|
55
|
+
err_msg = "Terminal symbols may only be declared in state :declaring, current state is: #{state}"
|
56
|
+
raise StandardError, err_msg unless state == :declaring
|
57
|
+
|
58
|
+
new_symbs = build_symbols(Dendroid::Syntax::Terminal, terminalSymbols)
|
59
|
+
symbols.merge!(new_symbs)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add a production rule in the grammar given one
|
63
|
+
# key-value pair of the form: String => String.
|
64
|
+
# Where the key is the name of the non-terminal appearing in the
|
65
|
+
# left side of the rule.
|
66
|
+
# When the value is a String, it is a sequence of grammar symbol names separated by space.
|
67
|
+
# When the value is an array of String, the elements represent an alternative rhs
|
68
|
+
# The rule is created and inserted in the grammar.
|
69
|
+
# @example
|
70
|
+
# builder.rule('sentence' => 'noun_phrase verb_phrase')
|
71
|
+
# builder.rule('noun_phrase' => ['noun', 'adj noun'])
|
72
|
+
# @param productionRuleRepr [Hash{String, String|Array<String>}]
|
73
|
+
# A Hash-based representation of a production.
|
74
|
+
# @return [Dendroid::Syntax::Rule] The created Production or Choice instance
|
75
|
+
def rule(productionRuleRepr)
|
76
|
+
raise Exception, "Cannot add a production rule in state :complete" if state == :complete
|
77
|
+
@state = :building
|
78
|
+
|
79
|
+
if productionRuleRepr.is_a?(Hash)
|
80
|
+
head_name = productionRuleRepr.keys.first
|
81
|
+
if symbols.include? head_name
|
82
|
+
err_msg = "Terminal symbol '#{head_name}' may not be on left-side of a rule."
|
83
|
+
raise StandardError, err_msg if symbols[head_name].is_a?(Dendroid::Syntax::Terminal)
|
84
|
+
else
|
85
|
+
symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [head_name]))
|
86
|
+
end
|
87
|
+
lhs = symbols[head_name]
|
88
|
+
raw_rhs = productionRuleRepr.values.first
|
89
|
+
|
90
|
+
if raw_rhs.is_a? String
|
91
|
+
new_prod = Dendroid::Syntax::Production.new(lhs, build_symbol_seq(raw_rhs))
|
92
|
+
else
|
93
|
+
rhs = raw_rhs.map { |raw| build_symbol_seq(raw) }
|
94
|
+
new_prod = Dendroid::Syntax::Choice.new(lhs, rhs)
|
95
|
+
end
|
96
|
+
rules << new_prod
|
97
|
+
new_prod
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# A method used to notify the builder that the grammar is complete
|
102
|
+
# (i.e. all rules were entered).
|
103
|
+
def grammar_complete!
|
104
|
+
@state = :complete
|
105
|
+
end
|
106
|
+
|
107
|
+
# Generate the grammar according to the specifications.
|
108
|
+
# @return [Dendroid::Syntax::Grammar]
|
109
|
+
def grammar
|
110
|
+
terminals = symbols.values.select(&:terminal?)
|
111
|
+
grm = Dendroid::Syntax::Grammar.new(terminals)
|
112
|
+
rules.each { |prod| grm.add_rule(prod) }
|
113
|
+
grm.complete!
|
114
|
+
grm
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def build_symbol_seq(raw_symbols)
|
120
|
+
symb_array = []
|
121
|
+
raw_stripped = raw_symbols.strip
|
122
|
+
return Dendroid::Syntax::SymbolSeq.new([]) if raw_stripped.empty?
|
123
|
+
|
124
|
+
symbol_names = raw_stripped.split(/(?: |\t)+/)
|
125
|
+
symbol_names.each do |symb_name|
|
126
|
+
if symbols.include? symb_name
|
127
|
+
symb_array << symbols[symb_name]
|
128
|
+
else
|
129
|
+
symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [symb_name]))
|
130
|
+
symb_array << symbols[symb_name]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
Dendroid::Syntax::SymbolSeq.new(symb_array)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Add the given grammar symbols.
|
138
|
+
# @param aClass [Class] The class of grammar symbols to instantiate.
|
139
|
+
# @param theSymbols [Array] array of elements are treated as follows:
|
140
|
+
# if the element is already a grammar symbol, then it added as is,
|
141
|
+
# otherwise it is considered as the name of a grammar symbol
|
142
|
+
# of the specified class to build.
|
143
|
+
def build_symbols(aClass, theSymbols)
|
144
|
+
symbs = {}
|
145
|
+
theSymbols.each do |s|
|
146
|
+
new_symbol = build_symbol(aClass, s)
|
147
|
+
symbs[new_symbol.name] = new_symbol
|
148
|
+
symbs[s] = new_symbol
|
149
|
+
end
|
150
|
+
|
151
|
+
symbs
|
152
|
+
end
|
153
|
+
|
154
|
+
# If the argument is already a grammar symbol object then it is
|
155
|
+
# returned as is. Otherwise, the argument is treated as a name
|
156
|
+
# for a new instance of the given class.
|
157
|
+
# @param aClass [Class] The class of grammar symbols to instantiate
|
158
|
+
# @param aSymbolArg [GrmSymbol-like or String]
|
159
|
+
# @return [Array] list of grammar symbols
|
160
|
+
def build_symbol(aClass, aSymbolArg)
|
161
|
+
if aSymbolArg.is_a?(Dendroid::Syntax::GrmSymbol)
|
162
|
+
aSymbolArg
|
163
|
+
else
|
164
|
+
aClass.new(aSymbolArg)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end # class
|
168
|
+
end # module
|
169
|
+
end # module
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\grm_dsl\base_grm_builder'
|
5
|
+
|
6
|
+
describe Dendroid::GrmDSL::BaseGrmBuilder do
|
7
|
+
# Builds ingredients for a grammar inspired from https://en.wikipedia.org/wiki/Earley_parser
|
8
|
+
subject do
|
9
|
+
instance = described_class.new
|
10
|
+
instance.declare_terminals('PLUS', 'STAR', 'INTEGER')
|
11
|
+
instance
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'Initialization:' do
|
15
|
+
it 'is initialized with an optional code block' do
|
16
|
+
expect { described_class.new }.not_to raise_error
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'is in "declaring" state by default' do
|
20
|
+
expect(described_class.new.state).to eq(:declaring)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has no grammar symbol by default' do
|
24
|
+
expect(described_class.new.symbols).to be_empty
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'has no production rule by default' do
|
28
|
+
expect(described_class.new.rules).to be_empty
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
|
32
|
+
context 'Provided services:' do
|
33
|
+
it 'builds declared terminal symbols' do
|
34
|
+
instance = described_class.new
|
35
|
+
terminals = %w[PLUS STAR INTEGER]
|
36
|
+
instance.declare_terminals(*terminals)
|
37
|
+
expect(instance.symbols.size).to eq(2 * terminals.size)
|
38
|
+
expect(instance.symbols[:PLUS]).to be_kind_of(Dendroid::Syntax::Terminal)
|
39
|
+
expect(instance.symbols['PLUS']).to eq(instance.symbols[:PLUS])
|
40
|
+
expect(instance.symbols[:PLUS].name).to eq(:PLUS)
|
41
|
+
expect(instance.symbols[:STAR]).to be_kind_of(Dendroid::Syntax::Terminal)
|
42
|
+
expect(instance.symbols['STAR']).to eq(instance.symbols[:STAR])
|
43
|
+
expect(instance.symbols[:STAR].name).to eq(:STAR)
|
44
|
+
expect(instance.symbols[:INTEGER]).to be_kind_of(Dendroid::Syntax::Terminal)
|
45
|
+
expect(instance.symbols['INTEGER']).to eq(instance.symbols[:INTEGER])
|
46
|
+
expect(instance.symbols[:INTEGER].name).to eq(:INTEGER)
|
47
|
+
expect(instance.state).to eq(:declaring)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'builds production rules' do
|
51
|
+
subject.rule('p' => 's')
|
52
|
+
expect(subject.state).to eq(:building)
|
53
|
+
|
54
|
+
# Undeclared symbols in production represent non-terminals
|
55
|
+
expect(subject.symbols['p']).to be_kind_of(Dendroid::Syntax::NonTerminal)
|
56
|
+
expect(subject.symbols['s']).to be_kind_of(Dendroid::Syntax::NonTerminal)
|
57
|
+
|
58
|
+
expect(subject.rules.size).to eq(1)
|
59
|
+
expect(subject.rules.first.to_s).to eq('p => s')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'builds a grammar' do
|
63
|
+
subject.rule('p' => 's')
|
64
|
+
subject.rule('s' => ['s PLUS m', 'm'])
|
65
|
+
subject.rule('m' => ['m STAR t', 't'])
|
66
|
+
subject.rule('t' => 'INTEGER')
|
67
|
+
subject.grammar_complete!
|
68
|
+
|
69
|
+
grm = subject.grammar
|
70
|
+
expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
|
71
|
+
(terms, nonterms) = grm.symbols.partition(&:terminal?)
|
72
|
+
expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
|
73
|
+
expect(nonterms.map(&:name)).to eq(%i[p s m t])
|
74
|
+
grammar_rules = [
|
75
|
+
'p => s',
|
76
|
+
's => s PLUS m | m',
|
77
|
+
'm => m STAR t | t',
|
78
|
+
't => INTEGER'
|
79
|
+
]
|
80
|
+
expect(subject.rules.map(&:to_s)).to eq(grammar_rules)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'provides a simple DSL' do
|
84
|
+
instance = described_class.new do
|
85
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
86
|
+
rule('p' => 's')
|
87
|
+
rule('s' => ['s PLUS m', 'm'])
|
88
|
+
rule('m' => ['m STAR t', 't'])
|
89
|
+
rule('t' => 'INTEGER')
|
90
|
+
end
|
91
|
+
|
92
|
+
grm = instance.grammar
|
93
|
+
expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
|
94
|
+
(terms, nonterms) = grm.symbols.partition(&:terminal?)
|
95
|
+
expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
|
96
|
+
expect(nonterms.map(&:name)).to eq(%i[p s m t])
|
97
|
+
grammar_rules = [
|
98
|
+
'p => s',
|
99
|
+
's => s PLUS m | m',
|
100
|
+
'm => m STAR t | t',
|
101
|
+
't => INTEGER'
|
102
|
+
]
|
103
|
+
expect(instance.rules.map(&:to_s)).to eq(grammar_rules)
|
104
|
+
end
|
105
|
+
end # context
|
106
|
+
end # describe
|
@@ -40,7 +40,6 @@ describe Dendroid::Syntax::Grammar do
|
|
40
40
|
Dendroid::Syntax::Choice.new(lhs, sequences.map { |arr| build_symbol_seq(arr) })
|
41
41
|
end
|
42
42
|
|
43
|
-
# rubocop: disable Metrics/AbcSize
|
44
43
|
def build_all_rules
|
45
44
|
rule1 = build_production(p_symb, [s_symb]) # p => s
|
46
45
|
rule2 = build_choice(s_symb, [[s_symb, plus_symb, m_symb], [m_symb]]) # s => s + m | m
|
@@ -48,7 +47,6 @@ describe Dendroid::Syntax::Grammar do
|
|
48
47
|
rule4 = build_production(t_symb, [int_symb]) # t => INTEGER
|
49
48
|
[rule1, rule2, rule3, rule4]
|
50
49
|
end
|
51
|
-
# rubocop: enable Metrics/AbcSize
|
52
50
|
|
53
51
|
context 'Initialization:' do
|
54
52
|
it 'is initialized with an array of terminal symbols' do
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dendroid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
@@ -24,6 +24,7 @@ files:
|
|
24
24
|
- bin/dendroid
|
25
25
|
- dendroid.gemspec
|
26
26
|
- lib/dendroid.rb
|
27
|
+
- lib/dendroid/grm_dsl/base_grm_builder.rb
|
27
28
|
- lib/dendroid/syntax/choice.rb
|
28
29
|
- lib/dendroid/syntax/grammar.rb
|
29
30
|
- lib/dendroid/syntax/grm_symbol.rb
|
@@ -32,6 +33,7 @@ files:
|
|
32
33
|
- lib/dendroid/syntax/rule.rb
|
33
34
|
- lib/dendroid/syntax/symbol_seq.rb
|
34
35
|
- lib/dendroid/syntax/terminal.rb
|
36
|
+
- spec/dendroid/grm_dsl/base_grm_builder_spec.rb
|
35
37
|
- spec/dendroid/syntax/choice_spec.rb
|
36
38
|
- spec/dendroid/syntax/grammar_spec.rb
|
37
39
|
- spec/dendroid/syntax/grm_symbol_spec.rb
|