cofgratx 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +64 -0
- data/Rakefile +2 -0
- data/cofgratx.gemspec +24 -0
- data/lib/cofgratx.rb +10 -0
- data/lib/cofgratx/cfg/grammar.rb +46 -0
- data/lib/cofgratx/cfg/grammar_error.rb +2 -0
- data/lib/cofgratx/cfg/non_terminal.rb +54 -0
- data/lib/cofgratx/cfg/repetition.rb +9 -0
- data/lib/cofgratx/cfg/rule.rb +160 -0
- data/lib/cofgratx/cfg/rule_error.rb +2 -0
- data/lib/cofgratx/cfg/terminal.rb +25 -0
- data/lib/cofgratx/cfg/translation_repetition_set.rb +33 -0
- data/lib/cofgratx/cfg/translation_repetition_set_error.rb +2 -0
- data/lib/cofgratx/version.rb +3 -0
- data/spec/grammar_spec.rb +423 -0
- data/spec/non_terminal_spec.rb +126 -0
- data/spec/repetition_spec.rb +48 -0
- data/spec/rule_spec.rb +247 -0
- data/spec/spec_helper.rb +91 -0
- data/spec/terminal_spec.rb +60 -0
- data/spec/translation_repetition_set_spec.rb +68 -0
- metadata +119 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MTZkNzBmZjkzZDllZjY3YjQxYWY5NTFkYmNmYjcyYWQ2ZDEyMDJjMQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MDBmMWVlNDRiOGJlMGU3ZDdhNmJmZjlmMzM0NGY4MzRhZjkzYTM3OQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YjhmOTdjMzk5ZDlhZjViMzQzNzVlMzdhZTcyYjk5ZGNkMjdiMzg3NmI2M2Vi
|
10
|
+
MDRjOTQwOTUzNTNiOThiZDQ4ODY5NzRhNWUxNjljMDE5NTIwZmQzZmMwM2U1
|
11
|
+
MWU4ZTM4NDQwODc4YmRlNmM0ODJkNzY0ZmYxZjBiZTE4ODAzYzM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZTQyNDJkMTBmM2VjNjcwMzRiNWYwMTllNGJiYzVkOGJkZmQ1NzI2MDg4NWI4
|
14
|
+
MmVlNDI5YzZiODA5OWIxYmVmYmY2ZDljODNkOWVjNTlmYjI0YTY2MTIwODY3
|
15
|
+
YWUyYzYwY2Q0Mzg3OGNmZmJkYjlmNGY0NzJhMGFkYTE0MzkzYTk=
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 callahat
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Cofgratx
|
2
|
+
|
3
|
+
This is a context free grammar translation gem. Define a grammar (with or without some translations),
|
4
|
+
feed it a string with a starting rule, and it returns a array of possible translations.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'cofgratx'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install cofgratx
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
The Grammar class is the main entry point. This class is where rules/productions are defined.
|
25
|
+
|
26
|
+
### Adding rules and translations
|
27
|
+
|
28
|
+
Rules are made up of Terminals, Repetitions (a special kind of terminal), and other rules.
|
29
|
+
|
30
|
+
Terminals can be defined as a string, or regular expression.
|
31
|
+
|
32
|
+
Terminal.new "a"
|
33
|
+
Terminal.new /a/
|
34
|
+
|
35
|
+
Repetitions can only be defined as a string. These will be the last part of a rule and indicate that the rule may repeat when this is found.
|
36
|
+
|
37
|
+
Repetition.new ","
|
38
|
+
|
39
|
+
Translations are made up of Translation Repetition Sets (if the rule has a repetition), integers and strings. The Translation Repetition Set (TRS) is also made up of an offset (which repeated set to begin with), followed by integers and strings. The strings are straight up substitution, and the integers represent a smaller part of the rule.
|
40
|
+
|
41
|
+
For example, given:
|
42
|
+
|
43
|
+
abc = Terminal.new /a|b|c/
|
44
|
+
comma = Repetition.new ","
|
45
|
+
|
46
|
+
trs = TranslationRepetitionSet.new(2, " third:", 3)
|
47
|
+
|
48
|
+
g = Grammar.new
|
49
|
+
g.add_rules :S, [ [ abc, abc, abc, comma ], [4,3,2,1,":",trs] ]
|
50
|
+
|
51
|
+
g.translate "abc,cba,bac,acb", :S
|
52
|
+
|
53
|
+
=> [[",cba: third:a third:c third:b", ""]]
|
54
|
+
|
55
|
+
The TRS adds to the translated string by starting with the second repeated match ("cba,") and prints the string "third:" followed by the third character from the match set.
|
56
|
+
|
57
|
+
|
58
|
+
## Contributing
|
59
|
+
|
60
|
+
1. Fork it ( https://github.com/callahat/cofgratx/fork )
|
61
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
62
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
63
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
64
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/cofgratx.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cofgratx/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cofgratx"
|
8
|
+
spec.version = Cofgratx::VERSION
|
9
|
+
spec.authors = ["callahat"]
|
10
|
+
spec.email = ["tim.callahan25@yahoo.com"]
|
11
|
+
spec.summary = "A context free grammar validator and translator"
|
12
|
+
spec.description = "The CFG class can be used to create a specification for a context free grammar and define translations for it"
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
24
|
+
end
|
data/lib/cofgratx.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "cofgratx/version"
|
2
|
+
require "cofgratx/cfg/terminal"
|
3
|
+
require "cofgratx/cfg/repetition"
|
4
|
+
require "cofgratx/cfg/translation_repetition_set_error"
|
5
|
+
require "cofgratx/cfg/translation_repetition_set"
|
6
|
+
require "cofgratx/cfg/rule_error"
|
7
|
+
require "cofgratx/cfg/grammar_error"
|
8
|
+
require "cofgratx/cfg/rule"
|
9
|
+
require "cofgratx/cfg/non_terminal"
|
10
|
+
require "cofgratx/cfg/grammar"
|
@@ -0,0 +1,46 @@
|
|
1
|
+
class Grammar
|
2
|
+
attr_reader :rules
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
@rules = { }
|
6
|
+
end
|
7
|
+
|
8
|
+
def add_rules non_terminal_symbol, *rules
|
9
|
+
@rules[non_terminal_symbol.to_sym] ||= NonTerminal.new
|
10
|
+
good_rules = []
|
11
|
+
|
12
|
+
rules.each do |rule, translation|
|
13
|
+
0.upto(rule.size) do |index|
|
14
|
+
if rule[index].class == Symbol
|
15
|
+
rule[index] = (@rules[rule[index]] ||= NonTerminal.new)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
new_rule = Rule.new( rule, translation )
|
19
|
+
if new_rule.valid_translation?
|
20
|
+
good_rules << Rule.new( rule, translation )
|
21
|
+
else
|
22
|
+
raise GrammarError.new(new_rule.translation_error_message)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
@rules[non_terminal_symbol.to_sym].add_rules *good_rules
|
27
|
+
end
|
28
|
+
|
29
|
+
def clear_rule non_terminal_symbol
|
30
|
+
@rules[non_terminal_symbol.to_sym] = NonTerminal.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def match? string, starting_non_terminal
|
34
|
+
#the grammar only matches if the string has no remainder
|
35
|
+
raise "Unknown initial non terminal: '#{starting_non_terminal}'" unless @rules[starting_non_terminal.to_sym]
|
36
|
+
candidate_matches = @rules[starting_non_terminal.to_sym].extract(string)
|
37
|
+
candidate_matches.select{|m| m[1] == "" and m[0] != nil}.size > 0
|
38
|
+
end
|
39
|
+
|
40
|
+
def translate string, starting_non_terminal
|
41
|
+
raise "Unknown initial non terminal: '#{starting_non_terminal}'" unless @rules[starting_non_terminal.to_sym]
|
42
|
+
candidate_matches = @rules[starting_non_terminal.to_sym].translate(string)
|
43
|
+
candidate_matches.select{|m| m[1] == "" and m[0] != nil}
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
class NonTerminal
|
2
|
+
attr_reader :rules
|
3
|
+
|
4
|
+
def initialize *rules
|
5
|
+
validate_list_of_rules *rules
|
6
|
+
@rules = rules.to_a
|
7
|
+
@rules.uniq!{|r| [r.rule, r.translation]}
|
8
|
+
end
|
9
|
+
|
10
|
+
def add_rules *rules
|
11
|
+
validate_list_of_rules *rules
|
12
|
+
@rules.push *rules
|
13
|
+
@rules.uniq!{|r| [r.rule, r.translation]}
|
14
|
+
end
|
15
|
+
|
16
|
+
def match? string
|
17
|
+
@rules.each do |rule|
|
18
|
+
return true if rule.match? string
|
19
|
+
end
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def extract string
|
24
|
+
matches = @rules.inject([]) do |matches, rule|
|
25
|
+
rule_matches = rule.extract string
|
26
|
+
matches.concat rule_matches if rule_matches.first[0]
|
27
|
+
matches
|
28
|
+
end
|
29
|
+
|
30
|
+
return [ [nil, string, [[]]] ] if matches.length == 0
|
31
|
+
matches
|
32
|
+
end
|
33
|
+
|
34
|
+
def translate string
|
35
|
+
translations = @rules.inject([]) do |translations, rule|
|
36
|
+
rule.translate( string ).each do |translation, remainder|
|
37
|
+
translations << [translation, remainder] if translation and remainder
|
38
|
+
end
|
39
|
+
translations
|
40
|
+
end
|
41
|
+
return [ [nil, string] ] if translations.length == 0
|
42
|
+
translations
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
def validate_list_of_rules *params
|
47
|
+
bad_args = params.inject([]){|bad_args, param| bad_args << param unless param.class == Rule; bad_args}
|
48
|
+
if bad_args.to_a.size > 0
|
49
|
+
raise ArgumentError.new("expected a list of Rules; found bad items: " +
|
50
|
+
bad_args.map{|bad_arg| "#{bad_arg.class.name} #{bad_arg}"}.join("\n") )
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
class Rule
|
2
|
+
attr_reader :rule, :translation, :translation_error_message
|
3
|
+
|
4
|
+
def initialize subrules = [], translations = []
|
5
|
+
@translation_error_message = nil
|
6
|
+
@rule = set_rule subrules
|
7
|
+
@translation = set_translation translations
|
8
|
+
end
|
9
|
+
|
10
|
+
def set_rule *rule
|
11
|
+
good_parts = []
|
12
|
+
rule.flatten(2).each do |part|
|
13
|
+
if ! [Repetition, Terminal, NonTerminal].include? part.class
|
14
|
+
raise ArgumentError.new("expected Terminal, NonTerminal or Repetition; got #{part.class.name}")
|
15
|
+
elsif part.class == Repetition and good_parts.size == 0
|
16
|
+
raise RuleError.new("cannot have repetition as the first part of the rule")
|
17
|
+
elsif good_parts.last.class == Repetition
|
18
|
+
raise RuleError.new("nothing can follow the repetition")
|
19
|
+
end
|
20
|
+
good_parts << part
|
21
|
+
end
|
22
|
+
@rule = good_parts
|
23
|
+
end
|
24
|
+
|
25
|
+
def set_translation *translation
|
26
|
+
good_parts = []
|
27
|
+
translation.flatten.each do |part|
|
28
|
+
if ! [Fixnum, String, TranslationRepetitionSet].include? part.class
|
29
|
+
raise ArgumentError.new("expected Fixnum, String or TranslationRepetitionSet; got #{part.class.name}")
|
30
|
+
end
|
31
|
+
good_parts << part
|
32
|
+
end
|
33
|
+
@translation = good_parts
|
34
|
+
end
|
35
|
+
|
36
|
+
def valid_translation?
|
37
|
+
@translation.each do |part|
|
38
|
+
if part.class == TranslationRepetitionSet
|
39
|
+
if @rule.last.class != Repetition
|
40
|
+
@translation_error_message = "rule does not contain repetition"
|
41
|
+
return false
|
42
|
+
elsif part.translations.select{|tx| tx.class == Fixnum and tx > @rule.size}.count > 0
|
43
|
+
@translation_error_message = "rule contains fewer parts than the TranslationRepetitionSet has for a translation: #{part.translations.inspect}"
|
44
|
+
return false
|
45
|
+
end
|
46
|
+
elsif part.class == Fixnum
|
47
|
+
@translation_error_message = "rule contains fewer parts than translation number: #{part.inspect}"
|
48
|
+
return false if part > @rule.size
|
49
|
+
end
|
50
|
+
end
|
51
|
+
true
|
52
|
+
end
|
53
|
+
|
54
|
+
def match? candidate
|
55
|
+
extract(candidate).first[0] != nil
|
56
|
+
end
|
57
|
+
|
58
|
+
def extract candidate, translate_non_terminals = false
|
59
|
+
working_matches = [ ["",candidate.dup,[[]]] ]
|
60
|
+
|
61
|
+
@rule.each do |subrule|
|
62
|
+
surviving_matches = []
|
63
|
+
working_matches.each do |current_match, working_candidate, current_set|
|
64
|
+
|
65
|
+
if subrule.class == Repetition
|
66
|
+
surviving_matches.concat extract_repetition_character subrule, current_match.dup, deep_clone_a_set(current_set), working_candidate.dup, translate_non_terminals
|
67
|
+
elsif subrule.class == Terminal
|
68
|
+
match, working_candidate = subrule.extract working_candidate
|
69
|
+
if match
|
70
|
+
current_set.first << match
|
71
|
+
surviving_matches << [ current_match + match, working_candidate.dup, deep_clone_a_set(current_set) ]
|
72
|
+
end
|
73
|
+
elsif subrule.class == NonTerminal
|
74
|
+
matches = if translate_non_terminals
|
75
|
+
translations = subrule.translate(working_candidate).select{|tx| tx[0]}
|
76
|
+
translations.map{|tx| tx[2] = deep_clone_a_set(current_set); tx[2].first << tx[0].dup; tx }
|
77
|
+
translations
|
78
|
+
else
|
79
|
+
subrule.extract(working_candidate)
|
80
|
+
end
|
81
|
+
|
82
|
+
if matches.size > 0 and matches.first[0]
|
83
|
+
surviving_matches.concat matches
|
84
|
+
end
|
85
|
+
else
|
86
|
+
raise "Rule is corrupt, found a bad subrule:#{subrule} with class:#{subrule.class}"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
working_matches = surviving_matches.dup
|
90
|
+
end
|
91
|
+
return [ [nil,candidate,[[]]] ] if working_matches.size == 0
|
92
|
+
|
93
|
+
working_matches
|
94
|
+
end
|
95
|
+
|
96
|
+
def translate candidate
|
97
|
+
matches = extract candidate, true
|
98
|
+
translations = matches.inject([]) do |txs, match|
|
99
|
+
current_match, working_candidate, current_set = match
|
100
|
+
next txs << [nil, candidate] unless current_match
|
101
|
+
next txs << [current_set.join(""), working_candidate.dup] unless @translation.size > 0
|
102
|
+
current_translation = ""
|
103
|
+
@translation.each do |sub_translation|
|
104
|
+
if sub_translation.class == TranslationRepetitionSet
|
105
|
+
current_set[(sub_translation.offset-1)..-1].to_a.each do |current|
|
106
|
+
sub_translation.translations.each do |translation|
|
107
|
+
current_translation += translation_helper current, translation
|
108
|
+
end
|
109
|
+
end
|
110
|
+
else
|
111
|
+
current_translation += translation_helper current_set.first, sub_translation
|
112
|
+
end
|
113
|
+
end
|
114
|
+
txs << [current_translation, working_candidate]
|
115
|
+
end
|
116
|
+
return [ [nil, candidate] ] unless translations.size > 0
|
117
|
+
translations
|
118
|
+
end
|
119
|
+
|
120
|
+
protected
|
121
|
+
def translation_helper current_set, translation
|
122
|
+
if translation.class == Fixnum
|
123
|
+
current_set[translation-1].to_s
|
124
|
+
else
|
125
|
+
translation
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def extract_repetition_character subrule, current_match, current_set, working_candidate, translate_non_terminals
|
130
|
+
match, temp_working_candidate = subrule.extract(working_candidate)
|
131
|
+
additional_productions = []
|
132
|
+
if match
|
133
|
+
matches = self.extract(temp_working_candidate,translate_non_terminals)
|
134
|
+
matches.each do |more_match, repetition_working_candidate, repetition_current_set|
|
135
|
+
if more_match
|
136
|
+
first_current_set = current_set.first.dup
|
137
|
+
|
138
|
+
first_current_set << match
|
139
|
+
|
140
|
+
repetition_current_set.unshift first_current_set
|
141
|
+
|
142
|
+
working_candidate = repetition_working_candidate
|
143
|
+
additional_productions << [ current_match + match + more_match,
|
144
|
+
repetition_working_candidate.dup,
|
145
|
+
deep_clone_a_set(repetition_current_set) ]
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
return [ [current_match, working_candidate, deep_clone_a_set(current_set)] ] if additional_productions.size == 0
|
150
|
+
additional_productions
|
151
|
+
end
|
152
|
+
|
153
|
+
def deep_clone_a_set set
|
154
|
+
set.inject([]){ |s, subset|
|
155
|
+
s << subset.inject([]){ |ss, string|
|
156
|
+
ss << string.dup
|
157
|
+
}
|
158
|
+
}
|
159
|
+
end
|
160
|
+
end
|