sequitur 0.0.04

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MzhjZGJlNTkxNTYxNGE4ZWU2NGUzNzI1NTI3NWMxNTNjMTdhZjRhZA==
5
+ data.tar.gz: !binary |-
6
+ YzI2ZjFiYTk2NmRlYzgyZDI1MmRmNzZmZjQ3NTY3NmY2NjA1MzU2OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ NGE3ODVkMzA0MDVlOGY3MmM4NTU1MDU0NGRhYWU1ZjI1NWRlYjU4MjZkZTEw
10
+ YjkxMzQ0MTU2ZGRlNTg3NGE3YjkxMjMzNzYwOWVkMWNlZjE1MzI5OTIwMWEw
11
+ ODE4Njk0MGQ0ZTFmOTRkOTJlY2QwZDVlZTA2YTc5NWYwMzM3NGU=
12
+ data.tar.gz: !binary |-
13
+ ZDEyNDU3NjVhZTdkNDM0ZWVhN2IxYWIyZGRlZjE3YzBjZDdjOTRlYWVkZmY0
14
+ YjliZDJhZWJjMTZjYjUyMzg5MDZmOThjN2ZlMzBlMGI4Y2E4N2ZlMWY2ODA0
15
+ ODQ2NDQ0YzkzODRiNjdlOTc4NGFiYzk1ZGZlM2I5MTkyZTUwYzA=
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --backtrace
data/.rubocop.yml ADDED
@@ -0,0 +1,74 @@
1
+ AllCops:
2
+ Exclude:
3
+ - 'examples/**/*'
4
+ - 'features/**/*'
5
+ - 'gems/**/*'
6
+
7
+ # This is disabled because some demos use UTF-8
8
+ AsciiComments:
9
+ Enabled: false
10
+
11
+ CaseIndentation:
12
+ IndentWhenRelativeTo: end
13
+ IndentOneStep: true
14
+
15
+ # Rubocop enforces the use of is_a? instead of kind_of?
16
+ # Which is contrary to modelling practice.
17
+ ClassCheck:
18
+ Enabled: false
19
+
20
+ ClassLength:
21
+ Max: 250
22
+ CountComments: false
23
+
24
+ ConstantName:
25
+ Enabled: false
26
+
27
+ CyclomaticComplexity:
28
+ Enabled: false
29
+
30
+ DefWithParentheses:
31
+ Enabled: false
32
+
33
+ Documentation:
34
+ Enabled: false
35
+
36
+ EmptyLines:
37
+ Enabled: false
38
+
39
+ EmptyLinesAroundBody:
40
+ Enabled: false
41
+
42
+ Encoding:
43
+ Enabled: false
44
+
45
+ FileName:
46
+ Enabled: false
47
+
48
+ IndentationWidth :
49
+ Enabled: false
50
+
51
+
52
+ # Avoid methods longer than 50 lines of code
53
+ MethodLength:
54
+ Max: 50
55
+ CountComments: false
56
+
57
+ NonNilCheck:
58
+ Enabled: false
59
+
60
+ NumericLiterals:
61
+ Enabled: false
62
+
63
+ RaiseArgs:
64
+ Enabled: false
65
+
66
+ RedundantReturn:
67
+ Enabled: false
68
+
69
+ SpaceInsideBrackets:
70
+ Enabled: false
71
+
72
+ TrailingWhitespace:
73
+ Enabled: false
74
+
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ sequitur
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 1.9.3
data/.simplecov ADDED
@@ -0,0 +1,7 @@
1
+ # .simplecov
2
+ # Configuration
3
+
4
+ SimpleCov.start do
5
+ # Remove all files that match /spec/ in their path
6
+ add_filter "/spec/"
7
+ end
data/.travis.yml ADDED
@@ -0,0 +1,15 @@
1
+ language: ruby
2
+ # TODO: add JRuby (after enabling ObjectSpace)
3
+ rvm:
4
+ - 2.1.0
5
+ - 2.0.0
6
+ - 1.9.3
7
+ - 1.9.2
8
+
9
+ gemfile:
10
+ - Gemfile
11
+
12
+ # whitelist
13
+ branches:
14
+ only:
15
+ - master
data/.yardopts ADDED
@@ -0,0 +1,6 @@
1
+ --exclude examples --exclude features --exclude spec
2
+ --no-private
3
+ --markup markdown
4
+ -
5
+ Changelog.md
6
+ License.txt
data/CHANGELOG.md ADDED
@@ -0,0 +1,12 @@
1
+ ### 0.0.04 / 2014-08-24
2
+ * [FIX] `.travis.yml`: removed JRuby from Travis CI. Rationale: ObjectSpace class is disabled!
3
+
4
+ ### 0.0.03 / 2014-08-24
5
+ * [FIX] `Rakefile`: removed unused Cucumber-based task
6
+
7
+ ### 0.0.02 / 2014-08-24
8
+ * [CHANGE] `README.md`: added badge from Travis CI
9
+
10
+ ### 0.0.01 / 2014-08-24
11
+
12
+ * [FEATURE] Initial public working version
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+ # Prevent Bundler to load the dependencies from our .gemspec file
3
+
4
+
5
+ group :development do
6
+ gem 'rake', '>= 0.8.0'
7
+ gem 'rspec', '>= 3.0.0'
8
+ gem 'simplecov', '>= 0.5.0'
9
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2014 Dimitri Geshef
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,10 @@
1
+ Sequitur
2
+ ===========
3
+ _Ruby gem implementing the Sequitur algorithm_
4
+ [Homepage](https://github.com/famished-tiger/Sequitur)
5
+
6
+ [![Build Status](https://travis-ci.org/famished-tiger/Sequitur.svg?branch=master)](https://travis-ci.org/famished-tiger/Sequitur)
7
+
8
+ Copyright
9
+ ---------
10
+ Copyright (c) 2014, Dimitri Geshef. Sequitur is released under the MIT License see [LICENSE.txt](https://github.com/famished-tiger/Sequitur/blob/master/LICENSE.txt) for details.
data/Rakefile ADDED
@@ -0,0 +1,31 @@
1
+ require 'rubygems'
2
+ require_relative './lib/sequitur/constants'
3
+
4
+ namespace :gem do
5
+
6
+ desc 'Push the gem to rubygems.org'
7
+ task :push do
8
+ system("gem push sequitur-#{Sequitur::Version}.gem")
9
+ end
10
+
11
+ end # namespace
12
+
13
+ # Testing-specific tasks
14
+
15
+ # RSpec as testing tool
16
+ require 'rspec/core/rake_task'
17
+ desc 'Run RSpec'
18
+ RSpec::Core::RakeTask.new do |spec|
19
+ spec.pattern = 'spec/**/*_spec.rb'
20
+ end
21
+
22
+
23
+ # Run RSpec tests
24
+ desc 'Run tests, with RSpec'
25
+ task test: [:spec]
26
+
27
+
28
+ # Default rake task
29
+ task default: :test
30
+
31
+ # End of file
data/lib/sequitur.rb ADDED
@@ -0,0 +1,8 @@
1
+ # File: sequitur.rb
2
+ # This file acts as a jumping-off point for loading dependencies expected
3
+ # for a MSequitur client.
4
+
5
+ require_relative './sequitur/constants'
6
+ require_relative './sequitur/sequitur-grammar'
7
+
8
+ # End of file
@@ -0,0 +1,26 @@
1
+ # File: constants.rb
2
+ # Purpose: definition of Sequitur constants.
3
+
4
+ module Sequitur # Module used as a namespace
5
+ # The version number of the gem.
6
+ Version = '0.0.04'
7
+
8
+ # Brief description of the gem.
9
+ Description = 'Ruby implementation of the Sequitur algorithm'
10
+
11
+ # Constant Sequitur::RootDir contains the absolute path of Sequitur's
12
+ # root directory. Note: it also ends with a slash character.
13
+ unless defined?(RootDir)
14
+ # The initialisation of constant RootDir is guarded in order
15
+ # to avoid multiple initialisation (not allowed for constants)
16
+
17
+ # The root folder of Sequitur.
18
+ RootDir = begin
19
+ require 'pathname' # Load Pathname class from standard library
20
+ rootdir = Pathname(__FILE__).dirname.parent.parent.expand_path
21
+ rootdir.to_s + '/' # Append trailing slash character to it
22
+ end
23
+ end
24
+ end # module
25
+
26
+ # End of file
@@ -0,0 +1,29 @@
1
+ module Sequitur # Module for classes implementing the Sequitur algorithm
2
+
3
+ # A digram is a sequence of two grammar symbols (terminal or not).
4
+ class Digram
5
+ # The sequence of two consecutive grammar symbols.
6
+ attr_reader(:symbols)
7
+
8
+ # The object id of the production that contains this digram in its rhs.
9
+ attr_reader(:production_id)
10
+
11
+ # An unique Hash key of the digram
12
+ attr_reader(:key)
13
+
14
+ # Constructor.
15
+ def initialize(symbol1, symbol2, aProduction)
16
+ @symbols = [symbol1, symbol2]
17
+ @key = "#{symbol1.hash.to_s(16)}:#{symbol2.hash.to_s(16)}"
18
+ @production_id = aProduction.object_id
19
+ end
20
+
21
+ # Return the production object of this digram
22
+ def production()
23
+ ObjectSpace._id2ref(production_id)
24
+ end
25
+ end # class
26
+
27
+ end # module
28
+
29
+ # End of file
@@ -0,0 +1,126 @@
1
+ require_relative 'production'
2
+
3
+ module Sequitur # Module for classes implementing the Sequitur algorithm
4
+
5
+ class DynamicGrammar
6
+ # Link to the root - start - production.
7
+ attr_reader(:root)
8
+
9
+ # The set of production rules of the grammar
10
+ attr_reader(:productions)
11
+
12
+ # nodoc Trace the execution of the algorithm.
13
+ attr(:trace, true)
14
+
15
+
16
+ # Constructor.
17
+ # Build a grammar with one empty rule as start/root rule
18
+ def initialize()
19
+ @root = Production.new
20
+ @productions = [ root ]
21
+ @trace = false
22
+ end
23
+
24
+ public
25
+
26
+ # Emit a text representation of the grammar.
27
+ # Each production rule is emitted per line.
28
+ def to_string()
29
+ rule_text = productions.map(&:to_string).join("\n")
30
+ return rule_text
31
+ end
32
+
33
+
34
+ # Add a production to the grammar.
35
+ def add_production(aProduction)
36
+ # TODO: remove output
37
+ puts "Adding #{aProduction.object_id}" if trace
38
+ puts aProduction.to_string if trace
39
+ check_rhs_of(aProduction) # TODO: configurable check
40
+ productions << aProduction
41
+ end
42
+
43
+
44
+ # Remove a production from the grammar
45
+ def delete_production(anIndex)
46
+ puts "Before production removal #{productions[anIndex].object_id}" if trace
47
+ puts to_string if trace
48
+ prod = productions.delete_at(anIndex)
49
+ # TODO: remove output
50
+ puts prod.to_string if trace
51
+ prod.clear_rhs
52
+
53
+ check_backrefs # TODO: configurable check
54
+
55
+ return prod
56
+ end
57
+
58
+
59
+ # Add the given token to the grammar.
60
+ def add_token(aToken)
61
+ append_symbol_to(root, aToken)
62
+ end
63
+
64
+ protected
65
+
66
+ def append_symbol_to(aProduction, aSymbol)
67
+ aProduction.append_symbol(aSymbol)
68
+ end
69
+
70
+
71
+ # Check that any production reference in rhs is
72
+ # pointing to a production of the grammar
73
+ def check_rhs_of(aProduction)
74
+ aProduction.references.each do |symb|
75
+ next if productions.include?(symb)
76
+
77
+ msg = "Production #{aProduction.object_id} refers to "
78
+ msg << "production #{symb.object_id}"
79
+ msg << " that is not part of the grammar."
80
+ fail StandardError, msg
81
+ end
82
+ end
83
+
84
+ # Check the invariants:
85
+ # Every back reference must must point to a production of the grammar
86
+ # Every back reference count must be equal to the number
87
+ # of occurrences in the referencing production.
88
+ def check_backrefs()
89
+ return if productions.size < 2
90
+
91
+ all_but_root = productions[1...productions.size]
92
+ all_but_root.each do |a_prod|
93
+ a_prod.backrefs.each do |other_prod_id, count|
94
+ begin
95
+ other_prod = ObjectSpace._id2ref(other_prod_id)
96
+ rescue RangeError => exc
97
+ msg = "Production #{a_prod.object_id} has a backref to "
98
+ msg << "recycled production #{other_prod_id}."
99
+ msg << "\n#{to_string}"
100
+ $stderr.puts msg
101
+ raise exc
102
+ end
103
+ found = productions.find { |elem| elem == other_prod }
104
+ unless found
105
+ msg = "Production #{a_prod.object_id} is referenced by the "
106
+ msg << "unknown production (#{other_prod_id})."
107
+ msg << "\n#{to_string}"
108
+ fail StandardError, msg
109
+ end
110
+
111
+ unless count == found.rhs.count { |symb| symb == a_prod }
112
+ msg = "Production #{a_prod.object_id} has a count mismatch"
113
+ msg << "\nIt expects #{count} references in rhs of #{other_prod_id} "
114
+ msg << "but actual count is #{other_prod.rhs.count}."
115
+ msg << "\n#{to_string}"
116
+ fail StandardError, msg
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ end # class
123
+
124
+ end # module
125
+
126
+ # End of file
@@ -0,0 +1,202 @@
1
+ require_relative 'digram'
2
+
3
+ module Sequitur # Module for classes implementing the Sequitur algorithm
4
+
5
+
6
+ # In a context-free grammar, a production is a rule in which
7
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
8
+ # and the right-hand side (RHS) consists of a sequence of symbols.
9
+ # The symbols in RHS can be either terminal or non-terminal symbols.
10
+ # The rule stipulates that the LHS is equivalent to the RHS,
11
+ # in other words every occurrence of the LHS can be substituted to
12
+ # corresponding RHS.
13
+ # The object id of the production is taken as its LHS.
14
+ class Production
15
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
16
+ attr_reader(:rhs)
17
+
18
+ # A Hash with pairs of the form:
19
+ # production id => reference count
20
+ # Where the reference count is the number of times this production
21
+ # appears in the rhs of the production with given id.
22
+ attr_reader(:backrefs)
23
+
24
+ # Constructor. Build a production with an empty RHS.
25
+ def initialize()
26
+ clear_rhs
27
+ @backrefs = {}
28
+ end
29
+
30
+ public
31
+
32
+ # Is the rhs empty?
33
+ def empty?
34
+ return rhs.empty?
35
+ end
36
+
37
+
38
+ # Return the set of productions appearing in the rhs
39
+ def references()
40
+ return rhs.select { |symb| symb.kind_of?(Production) }
41
+ end
42
+
43
+
44
+ # Does the rhs have exactly one digram only (= 2 symbols)?
45
+ def single_digram?
46
+ return rhs.size == 2
47
+ end
48
+
49
+
50
+ # Detect whether the last digram occurs twice
51
+ # Assumption: when a digram occurs twice in a production then it must occur
52
+ # at the end of the rhs
53
+ def repeated_digram?
54
+ return false if rhs.size < 3
55
+
56
+ my_digrams = digrams
57
+ all_keys = my_digrams.map(&:key)
58
+ last_key = all_keys.pop
59
+ same_key_found = all_keys.index(last_key)
60
+ return !same_key_found.nil?
61
+ end
62
+
63
+ # Return the last digram appearing in the RHS.
64
+ def last_digram()
65
+ return nil if rhs.size < 2
66
+
67
+ return Digram.new(rhs[-2], rhs[-1], self)
68
+ end
69
+
70
+
71
+
72
+ # The back reference count is the number of times this production
73
+ # appears in the rhs of all the productions of the grammar
74
+ def refcount()
75
+ total = backrefs.values.reduce(0) do |sub_result, count|
76
+ sub_result += count
77
+ end
78
+
79
+ return total
80
+ end
81
+
82
+ # Add a back reference to the given production.
83
+ def add_backref(aProduction)
84
+ prod_id = aProduction.object_id
85
+
86
+ count = backrefs.fetch(prod_id, 0)
87
+ backrefs[prod_id] = count + 1
88
+ return count
89
+ end
90
+
91
+ # Decrement the reference count for the given production.
92
+ # If result is zero, then the entry is removed from the Hash.
93
+ def remove_backref(aProduction)
94
+ prod_id = aProduction.object_id
95
+
96
+ count = backrefs.fetch(prod_id)
97
+ fail StandardError if count < 1
98
+
99
+ if count > 1
100
+ backrefs[prod_id] = count - 1
101
+ else
102
+ backrefs.delete(prod_id)
103
+ end
104
+
105
+ return count
106
+ end
107
+
108
+ # Emit a text representation of the production rule.
109
+ # Text is of the form:
110
+ # object id of production : rhs as space-separated sequence of symbols.
111
+ def to_string()
112
+ rhs_text = rhs.map do |elem|
113
+ case elem
114
+ when String then "'#{elem}'"
115
+ when Production then "#{elem.object_id}"
116
+ else "#{elem}"
117
+ end
118
+ end
119
+
120
+ return "#{object_id} : #{rhs_text.join(' ')}."
121
+ end
122
+
123
+ # Return the digrams for this production as if
124
+ # the given symbol is appended at the end of the rhs
125
+ def calc_append_symbol(aSymbol)
126
+ return [] if empty?
127
+
128
+ return digrams + [ Digram.new(rhs.last, aSymbol, self) ]
129
+ end
130
+
131
+ def append_symbol(aSymbol)
132
+ aSymbol.add_backref(self) if aSymbol.kind_of?(Production)
133
+ rhs << aSymbol
134
+ end
135
+
136
+ # Clear the right-hand side.
137
+ # Any referenced production has its back reference counter decremented
138
+ def clear_rhs()
139
+ if rhs
140
+ refs = references
141
+ refs.each { |a_ref| a_ref.remove_backref(self) }
142
+ end
143
+ @rhs = []
144
+ end
145
+
146
+ # Return the list digrams found in rhs of this production.
147
+ def digrams()
148
+ return [] if rhs.size < 2
149
+
150
+ result = []
151
+ rhs.each_cons(2) { |couple| result << Digram.new(*couple, self) }
152
+
153
+ return result
154
+ end
155
+
156
+ # Substitute in self all occurence of the digram that
157
+ # appears in the rhs of the other production
158
+ # Pre-condition:
159
+ # another has a rhs with exactly one digram (= a two-symbol sequence).
160
+ def replace_digram(another)
161
+ # Find the positions where the digram occur in rhs
162
+ (symb1, symb2) = another.rhs
163
+ indices = [ -2 ] # Dummy index!
164
+
165
+ (0...rhs.size).each do |i|
166
+ next if i == indices.last + 1
167
+ indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
168
+ end
169
+ indices.shift
170
+
171
+ pos = indices.reverse
172
+
173
+ # Replace the two symbol sequence by the production
174
+ pos.each do |index|
175
+ rhs[index].remove_backref(self) if rhs[index].kind_of?(Production)
176
+ rhs[index] = another
177
+ index1 = index + 1
178
+ rhs[index1].remove_backref(self) if rhs[index1].kind_of?(Production)
179
+ rhs.delete_at(index1)
180
+ another.add_backref(self)
181
+ end
182
+ end
183
+
184
+ # Replace every occurrence of 'another' production in rhs by
185
+ # the rhs of 'another'.
186
+ def replace_production(another)
187
+ (0...rhs.size).to_a.reverse.each do |index|
188
+ next unless rhs[index] == another
189
+ rhs.insert(index + 1, *another.rhs)
190
+ another.rhs.each do |new_symb|
191
+ new_symb.add_backref(self) if new_symb.kind_of?(Production)
192
+ end
193
+ another.remove_backref(self)
194
+ rhs.delete_at(index)
195
+ end
196
+ end
197
+
198
+ end # class
199
+
200
+ end # module
201
+
202
+ # End of file