sequitur 0.0.04

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MzhjZGJlNTkxNTYxNGE4ZWU2NGUzNzI1NTI3NWMxNTNjMTdhZjRhZA==
5
+ data.tar.gz: !binary |-
6
+ YzI2ZjFiYTk2NmRlYzgyZDI1MmRmNzZmZjQ3NTY3NmY2NjA1MzU2OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ NGE3ODVkMzA0MDVlOGY3MmM4NTU1MDU0NGRhYWU1ZjI1NWRlYjU4MjZkZTEw
10
+ YjkxMzQ0MTU2ZGRlNTg3NGE3YjkxMjMzNzYwOWVkMWNlZjE1MzI5OTIwMWEw
11
+ ODE4Njk0MGQ0ZTFmOTRkOTJlY2QwZDVlZTA2YTc5NWYwMzM3NGU=
12
+ data.tar.gz: !binary |-
13
+ ZDEyNDU3NjVhZTdkNDM0ZWVhN2IxYWIyZGRlZjE3YzBjZDdjOTRlYWVkZmY0
14
+ YjliZDJhZWJjMTZjYjUyMzg5MDZmOThjN2ZlMzBlMGI4Y2E4N2ZlMWY2ODA0
15
+ ODQ2NDQ0YzkzODRiNjdlOTc4NGFiYzk1ZGZlM2I5MTkyZTUwYzA=
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --backtrace
data/.rubocop.yml ADDED
@@ -0,0 +1,74 @@
1
+ AllCops:
2
+ Exclude:
3
+ - 'examples/**/*'
4
+ - 'features/**/*'
5
+ - 'gems/**/*'
6
+
7
+ # This is disabled because some demos use UTF-8
8
+ AsciiComments:
9
+ Enabled: false
10
+
11
+ CaseIndentation:
12
+ IndentWhenRelativeTo: end
13
+ IndentOneStep: true
14
+
15
+ # Rubocop enforces the use of is_a? instead of kind_of?
16
+ # Which is contrary to modelling practice.
17
+ ClassCheck:
18
+ Enabled: false
19
+
20
+ ClassLength:
21
+ Max: 250
22
+ CountComments: false
23
+
24
+ ConstantName:
25
+ Enabled: false
26
+
27
+ CyclomaticComplexity:
28
+ Enabled: false
29
+
30
+ DefWithParentheses:
31
+ Enabled: false
32
+
33
+ Documentation:
34
+ Enabled: false
35
+
36
+ EmptyLines:
37
+ Enabled: false
38
+
39
+ EmptyLinesAroundBody:
40
+ Enabled: false
41
+
42
+ Encoding:
43
+ Enabled: false
44
+
45
+ FileName:
46
+ Enabled: false
47
+
48
+ IndentationWidth :
49
+ Enabled: false
50
+
51
+
52
+ # Avoid methods longer than 50 lines of code
53
+ MethodLength:
54
+ Max: 50
55
+ CountComments: false
56
+
57
+ NonNilCheck:
58
+ Enabled: false
59
+
60
+ NumericLiterals:
61
+ Enabled: false
62
+
63
+ RaiseArgs:
64
+ Enabled: false
65
+
66
+ RedundantReturn:
67
+ Enabled: false
68
+
69
+ SpaceInsideBrackets:
70
+ Enabled: false
71
+
72
+ TrailingWhitespace:
73
+ Enabled: false
74
+
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ sequitur
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 1.9.3
data/.simplecov ADDED
@@ -0,0 +1,7 @@
1
+ # .simplecov
2
+ # Configuration
3
+
4
+ SimpleCov.start do
5
+ # Remove all files that match /spec/ in their path
6
+ add_filter "/spec/"
7
+ end
data/.travis.yml ADDED
@@ -0,0 +1,15 @@
1
+ language: ruby
2
+ # TODO: add JRuby (after enabling ObjectSpace)
3
+ rvm:
4
+ - 2.1.0
5
+ - 2.0.0
6
+ - 1.9.3
7
+ - 1.9.2
8
+
9
+ gemfile:
10
+ - Gemfile
11
+
12
+ # whitelist
13
+ branches:
14
+ only:
15
+ - master
data/.yardopts ADDED
@@ -0,0 +1,6 @@
1
+ --exclude examples --exclude features --exclude spec
2
+ --no-private
3
+ --markup markdown
4
+ -
5
+ Changelog.md
6
+ License.txt
data/CHANGELOG.md ADDED
@@ -0,0 +1,12 @@
1
+ ### 0.0.04 / 2014-08-24
2
+ * [FIX] `.travis.yml`: removed JRuby from Travis CI. Rationale: ObjectSpace class is disabled!
3
+
4
+ ### 0.0.03 / 2014-08-24
5
+ * [FIX] `Rakefile`: removed unused Cucumber-based task
6
+
7
+ ### 0.0.02 / 2014-08-24
8
+ * [CHANGE] `README.md`: added badge from Travis CI
9
+
10
+ ### 0.0.01 / 2014-08-24
11
+
12
+ * [FEATURE] Initial public working version
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+ # Prevent Bundler to load the dependencies from our .gemspec file
3
+
4
+
5
+ group :development do
6
+ gem 'rake', '>= 0.8.0'
7
+ gem 'rspec', '>= 3.0.0'
8
+ gem 'simplecov', '>= 0.5.0'
9
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2014 Dimitri Geshef
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,10 @@
1
+ Sequitur
2
+ ===========
3
+ _Ruby gem implementing the Sequitur algorithm_
4
+ [Homepage](https://github.com/famished-tiger/Sequitur)
5
+
6
+ [![Build Status](https://travis-ci.org/famished-tiger/Sequitur.svg?branch=master)](https://travis-ci.org/famished-tiger/Sequitur)
7
+
8
+ Copyright
9
+ ---------
10
+ Copyright (c) 2014, Dimitri Geshef. Sequitur is released under the MIT License see [LICENSE.txt](https://github.com/famished-tiger/Sequitur/blob/master/LICENSE.txt) for details.
data/Rakefile ADDED
@@ -0,0 +1,31 @@
1
+ require 'rubygems'
2
+ require_relative './lib/sequitur/constants'
3
+
4
+ namespace :gem do
5
+
6
+ desc 'Push the gem to rubygems.org'
7
+ task :push do
8
+ system("gem push sequitur-#{Sequitur::Version}.gem")
9
+ end
10
+
11
+ end # namespace
12
+
13
+ # Testing-specific tasks
14
+
15
+ # RSpec as testing tool
16
+ require 'rspec/core/rake_task'
17
+ desc 'Run RSpec'
18
+ RSpec::Core::RakeTask.new do |spec|
19
+ spec.pattern = 'spec/**/*_spec.rb'
20
+ end
21
+
22
+
23
+ # Run RSpec tests
24
+ desc 'Run tests, with RSpec'
25
+ task test: [:spec]
26
+
27
+
28
+ # Default rake task
29
+ task default: :test
30
+
31
+ # End of file
data/lib/sequitur.rb ADDED
@@ -0,0 +1,8 @@
1
+ # File: sequitur.rb
2
+ # This file acts as a jumping-off point for loading dependencies expected
3
+ # for a MSequitur client.
4
+
5
+ require_relative './sequitur/constants'
6
+ require_relative './sequitur/sequitur-grammar'
7
+
8
+ # End of file
@@ -0,0 +1,26 @@
1
+ # File: constants.rb
2
+ # Purpose: definition of Sequitur constants.
3
+
4
+ module Sequitur # Module used as a namespace
5
+ # The version number of the gem.
6
+ Version = '0.0.04'
7
+
8
+ # Brief description of the gem.
9
+ Description = 'Ruby implementation of the Sequitur algorithm'
10
+
11
+ # Constant Sequitur::RootDir contains the absolute path of Sequitur's
12
+ # root directory. Note: it also ends with a slash character.
13
+ unless defined?(RootDir)
14
+ # The initialisation of constant RootDir is guarded in order
15
+ # to avoid multiple initialisation (not allowed for constants)
16
+
17
+ # The root folder of Sequitur.
18
+ RootDir = begin
19
+ require 'pathname' # Load Pathname class from standard library
20
+ rootdir = Pathname(__FILE__).dirname.parent.parent.expand_path
21
+ rootdir.to_s + '/' # Append trailing slash character to it
22
+ end
23
+ end
24
+ end # module
25
+
26
+ # End of file
@@ -0,0 +1,29 @@
1
+ module Sequitur # Module for classes implementing the Sequitur algorithm
2
+
3
+ # A digram is a sequence of two grammar symbols (terminal or not).
4
+ class Digram
5
+ # The sequence of two consecutive grammar symbols.
6
+ attr_reader(:symbols)
7
+
8
+ # The object id of the production that contains this digram in its rhs.
9
+ attr_reader(:production_id)
10
+
11
+ # An unique Hash key of the digram
12
+ attr_reader(:key)
13
+
14
+ # Constructor.
15
+ def initialize(symbol1, symbol2, aProduction)
16
+ @symbols = [symbol1, symbol2]
17
+ @key = "#{symbol1.hash.to_s(16)}:#{symbol2.hash.to_s(16)}"
18
+ @production_id = aProduction.object_id
19
+ end
20
+
21
+ # Return the production object of this digram
22
+ def production()
23
+ ObjectSpace._id2ref(production_id)
24
+ end
25
+ end # class
26
+
27
+ end # module
28
+
29
+ # End of file
@@ -0,0 +1,126 @@
1
+ require_relative 'production'
2
+
3
+ module Sequitur # Module for classes implementing the Sequitur algorithm
4
+
5
+ class DynamicGrammar
6
+ # Link to the root - start - production.
7
+ attr_reader(:root)
8
+
9
+ # The set of production rules of the grammar
10
+ attr_reader(:productions)
11
+
12
+ # nodoc Trace the execution of the algorithm.
13
+ attr(:trace, true)
14
+
15
+
16
+ # Constructor.
17
+ # Build a grammar with one empty rule as start/root rule
18
+ def initialize()
19
+ @root = Production.new
20
+ @productions = [ root ]
21
+ @trace = false
22
+ end
23
+
24
+ public
25
+
26
+ # Emit a text representation of the grammar.
27
+ # Each production rule is emitted per line.
28
+ def to_string()
29
+ rule_text = productions.map(&:to_string).join("\n")
30
+ return rule_text
31
+ end
32
+
33
+
34
+ # Add a production to the grammar.
35
+ def add_production(aProduction)
36
+ # TODO: remove output
37
+ puts "Adding #{aProduction.object_id}" if trace
38
+ puts aProduction.to_string if trace
39
+ check_rhs_of(aProduction) # TODO: configurable check
40
+ productions << aProduction
41
+ end
42
+
43
+
44
+ # Remove a production from the grammar
45
+ def delete_production(anIndex)
46
+ puts "Before production removal #{productions[anIndex].object_id}" if trace
47
+ puts to_string if trace
48
+ prod = productions.delete_at(anIndex)
49
+ # TODO: remove output
50
+ puts prod.to_string if trace
51
+ prod.clear_rhs
52
+
53
+ check_backrefs # TODO: configurable check
54
+
55
+ return prod
56
+ end
57
+
58
+
59
+ # Add the given token to the grammar.
60
+ def add_token(aToken)
61
+ append_symbol_to(root, aToken)
62
+ end
63
+
64
+ protected
65
+
66
+ def append_symbol_to(aProduction, aSymbol)
67
+ aProduction.append_symbol(aSymbol)
68
+ end
69
+
70
+
71
+ # Check that any production reference in rhs is
72
+ # pointing to a production of the grammar
73
+ def check_rhs_of(aProduction)
74
+ aProduction.references.each do |symb|
75
+ next if productions.include?(symb)
76
+
77
+ msg = "Production #{aProduction.object_id} refers to "
78
+ msg << "production #{symb.object_id}"
79
+ msg << " that is not part of the grammar."
80
+ fail StandardError, msg
81
+ end
82
+ end
83
+
84
+ # Check the invariants:
85
+ # Every back reference must must point to a production of the grammar
86
+ # Every back reference count must be equal to the number
87
+ # of occurrences in the referencing production.
88
+ def check_backrefs()
89
+ return if productions.size < 2
90
+
91
+ all_but_root = productions[1...productions.size]
92
+ all_but_root.each do |a_prod|
93
+ a_prod.backrefs.each do |other_prod_id, count|
94
+ begin
95
+ other_prod = ObjectSpace._id2ref(other_prod_id)
96
+ rescue RangeError => exc
97
+ msg = "Production #{a_prod.object_id} has a backref to "
98
+ msg << "recycled production #{other_prod_id}."
99
+ msg << "\n#{to_string}"
100
+ $stderr.puts msg
101
+ raise exc
102
+ end
103
+ found = productions.find { |elem| elem == other_prod }
104
+ unless found
105
+ msg = "Production #{a_prod.object_id} is referenced by the "
106
+ msg << "unknown production (#{other_prod_id})."
107
+ msg << "\n#{to_string}"
108
+ fail StandardError, msg
109
+ end
110
+
111
+ unless count == found.rhs.count { |symb| symb == a_prod }
112
+ msg = "Production #{a_prod.object_id} has a count mismatch"
113
+ msg << "\nIt expects #{count} references in rhs of #{other_prod_id} "
114
+ msg << "but actual count is #{other_prod.rhs.count}."
115
+ msg << "\n#{to_string}"
116
+ fail StandardError, msg
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ end # class
123
+
124
+ end # module
125
+
126
+ # End of file
@@ -0,0 +1,202 @@
1
+ require_relative 'digram'
2
+
3
+ module Sequitur # Module for classes implementing the Sequitur algorithm
4
+
5
+
6
+ # In a context-free grammar, a production is a rule in which
7
+ # its left-hand side (LHS) consists solely of a non-terminal symbol
8
+ # and the right-hand side (RHS) consists of a sequence of symbols.
9
+ # The symbols in RHS can be either terminal or non-terminal symbols.
10
+ # The rule stipulates that the LHS is equivalent to the RHS,
11
+ # in other words every occurrence of the LHS can be substituted to
12
+ # corresponding RHS.
13
+ # The object id of the production is taken as its LHS.
14
+ class Production
15
+ # The right-hand side (rhs) consists of a sequence of grammar symbols
16
+ attr_reader(:rhs)
17
+
18
+ # A Hash with pairs of the form:
19
+ # production id => reference count
20
+ # Where the reference count is the number of times this production
21
+ # appears in the rhs of the production with given id.
22
+ attr_reader(:backrefs)
23
+
24
+ # Constructor. Build a production with an empty RHS.
25
+ def initialize()
26
+ clear_rhs
27
+ @backrefs = {}
28
+ end
29
+
30
+ public
31
+
32
+ # Is the rhs empty?
33
+ def empty?
34
+ return rhs.empty?
35
+ end
36
+
37
+
38
+ # Return the set of productions appearing in the rhs
39
+ def references()
40
+ return rhs.select { |symb| symb.kind_of?(Production) }
41
+ end
42
+
43
+
44
+ # Does the rhs have exactly one digram only (= 2 symbols)?
45
+ def single_digram?
46
+ return rhs.size == 2
47
+ end
48
+
49
+
50
+ # Detect whether the last digram occurs twice
51
+ # Assumption: when a digram occurs twice in a production then it must occur
52
+ # at the end of the rhs
53
+ def repeated_digram?
54
+ return false if rhs.size < 3
55
+
56
+ my_digrams = digrams
57
+ all_keys = my_digrams.map(&:key)
58
+ last_key = all_keys.pop
59
+ same_key_found = all_keys.index(last_key)
60
+ return !same_key_found.nil?
61
+ end
62
+
63
+ # Return the last digram appearing in the RHS.
64
+ def last_digram()
65
+ return nil if rhs.size < 2
66
+
67
+ return Digram.new(rhs[-2], rhs[-1], self)
68
+ end
69
+
70
+
71
+
72
+ # The back reference count is the number of times this production
73
+ # appears in the rhs of all the productions of the grammar
74
+ def refcount()
75
+ total = backrefs.values.reduce(0) do |sub_result, count|
76
+ sub_result += count
77
+ end
78
+
79
+ return total
80
+ end
81
+
82
+ # Add a back reference to the given production.
83
+ def add_backref(aProduction)
84
+ prod_id = aProduction.object_id
85
+
86
+ count = backrefs.fetch(prod_id, 0)
87
+ backrefs[prod_id] = count + 1
88
+ return count
89
+ end
90
+
91
+ # Decrement the reference count for the given production.
92
+ # If result is zero, then the entry is removed from the Hash.
93
+ def remove_backref(aProduction)
94
+ prod_id = aProduction.object_id
95
+
96
+ count = backrefs.fetch(prod_id)
97
+ fail StandardError if count < 1
98
+
99
+ if count > 1
100
+ backrefs[prod_id] = count - 1
101
+ else
102
+ backrefs.delete(prod_id)
103
+ end
104
+
105
+ return count
106
+ end
107
+
108
+ # Emit a text representation of the production rule.
109
+ # Text is of the form:
110
+ # object id of production : rhs as space-separated sequence of symbols.
111
+ def to_string()
112
+ rhs_text = rhs.map do |elem|
113
+ case elem
114
+ when String then "'#{elem}'"
115
+ when Production then "#{elem.object_id}"
116
+ else "#{elem}"
117
+ end
118
+ end
119
+
120
+ return "#{object_id} : #{rhs_text.join(' ')}."
121
+ end
122
+
123
+ # Return the digrams for this production as if
124
+ # the given symbol is appended at the end of the rhs
125
+ def calc_append_symbol(aSymbol)
126
+ return [] if empty?
127
+
128
+ return digrams + [ Digram.new(rhs.last, aSymbol, self) ]
129
+ end
130
+
131
+ def append_symbol(aSymbol)
132
+ aSymbol.add_backref(self) if aSymbol.kind_of?(Production)
133
+ rhs << aSymbol
134
+ end
135
+
136
+ # Clear the right-hand side.
137
+ # Any referenced production has its back reference counter decremented
138
+ def clear_rhs()
139
+ if rhs
140
+ refs = references
141
+ refs.each { |a_ref| a_ref.remove_backref(self) }
142
+ end
143
+ @rhs = []
144
+ end
145
+
146
+ # Return the list digrams found in rhs of this production.
147
+ def digrams()
148
+ return [] if rhs.size < 2
149
+
150
+ result = []
151
+ rhs.each_cons(2) { |couple| result << Digram.new(*couple, self) }
152
+
153
+ return result
154
+ end
155
+
156
+ # Substitute in self all occurence of the digram that
157
+ # appears in the rhs of the other production
158
+ # Pre-condition:
159
+ # another has a rhs with exactly one digram (= a two-symbol sequence).
160
+ def replace_digram(another)
161
+ # Find the positions where the digram occur in rhs
162
+ (symb1, symb2) = another.rhs
163
+ indices = [ -2 ] # Dummy index!
164
+
165
+ (0...rhs.size).each do |i|
166
+ next if i == indices.last + 1
167
+ indices << i if (rhs[i] == symb1) && (rhs[i + 1] == symb2)
168
+ end
169
+ indices.shift
170
+
171
+ pos = indices.reverse
172
+
173
+ # Replace the two symbol sequence by the production
174
+ pos.each do |index|
175
+ rhs[index].remove_backref(self) if rhs[index].kind_of?(Production)
176
+ rhs[index] = another
177
+ index1 = index + 1
178
+ rhs[index1].remove_backref(self) if rhs[index1].kind_of?(Production)
179
+ rhs.delete_at(index1)
180
+ another.add_backref(self)
181
+ end
182
+ end
183
+
184
+ # Replace every occurrence of 'another' production in rhs by
185
+ # the rhs of 'another'.
186
+ def replace_production(another)
187
+ (0...rhs.size).to_a.reverse.each do |index|
188
+ next unless rhs[index] == another
189
+ rhs.insert(index + 1, *another.rhs)
190
+ another.rhs.each do |new_symb|
191
+ new_symb.add_backref(self) if new_symb.kind_of?(Production)
192
+ end
193
+ another.remove_backref(self)
194
+ rhs.delete_at(index)
195
+ end
196
+ end
197
+
198
+ end # class
199
+
200
+ end # module
201
+
202
+ # End of file