sequitur 0.1.18 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +430 -56
- data/.travis.yml +19 -13
- data/CHANGELOG.md +33 -0
- data/Gemfile +4 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -2
- data/Rakefile +2 -0
- data/appveyor.yml +20 -12
- data/examples/integer_sample.rb +8 -7
- data/examples/porridge.rb +6 -6
- data/examples/simple_case.rb +7 -6
- data/examples/symbol_sample.rb +8 -9
- data/examples/word_sample.rb +4 -3
- data/lib/sequitur/constants.rb +5 -3
- data/lib/sequitur/digram.rb +45 -43
- data/lib/sequitur/dynamic_grammar.rb +93 -95
- data/lib/sequitur/formatter/base_formatter.rb +3 -1
- data/lib/sequitur/formatter/base_text.rb +3 -1
- data/lib/sequitur/formatter/debug.rb +5 -3
- data/lib/sequitur/grammar_visitor.rb +99 -98
- data/lib/sequitur/production.rb +14 -19
- data/lib/sequitur/production_ref.rb +107 -106
- data/lib/sequitur/sequitur_grammar.rb +9 -6
- data/lib/sequitur/symbol_sequence.rb +10 -11
- data/lib/sequitur.rb +2 -0
- data/spec/sequitur/digram_spec.rb +10 -8
- data/spec/sequitur/dynamic_grammar_spec.rb +2 -0
- data/spec/sequitur/formatter/base_text_spec.rb +4 -2
- data/spec/sequitur/formatter/debug_spec.rb +4 -2
- data/spec/sequitur/grammar_visitor_spec.rb +2 -0
- data/spec/sequitur/production_ref_spec.rb +2 -0
- data/spec/sequitur/production_spec.rb +10 -8
- data/spec/sequitur/sequitur_grammar_spec.rb +13 -13
- data/spec/sequitur/symbol_sequence_spec.rb +6 -4
- data/spec/spec_helper.rb +2 -12
- metadata +18 -46
- data/.ruby-version +0 -1
- data/.simplecov +0 -7
data/lib/sequitur.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -5,28 +7,28 @@ require_relative '../../lib/sequitur/digram'
|
|
5
7
|
|
6
8
|
module Sequitur # Re-open the module to get rid of qualified names
|
7
9
|
describe Digram do
|
8
|
-
let(:two_symbols) { [
|
10
|
+
let(:two_symbols) { %i[b c] }
|
9
11
|
let(:production) { double('sample-production') }
|
10
12
|
|
11
13
|
context 'Standard creation & initialization:' do
|
12
14
|
it 'should be created with 3 arguments' do
|
13
15
|
instance = Digram.new(:b, :c, production)
|
14
|
-
|
16
|
+
|
15
17
|
expect(instance.symbols).to eq(two_symbols)
|
16
18
|
expect(instance.production).to eq(production)
|
17
19
|
end
|
18
|
-
|
20
|
+
|
19
21
|
it 'should return the production that it refers to' do
|
20
22
|
instance = Digram.new(:b, :c, production)
|
21
23
|
expect(instance.production).to eq(production)
|
22
24
|
end
|
23
|
-
|
25
|
+
|
24
26
|
it 'should whether its symbols are the same' do
|
25
27
|
instance1 = Digram.new(:a, :a, production)
|
26
28
|
expect(instance1).to be_repeating
|
27
|
-
|
29
|
+
|
28
30
|
instance1 = Digram.new(:a, :b, production)
|
29
|
-
expect(instance1).not_to be_repeating
|
31
|
+
expect(instance1).not_to be_repeating
|
30
32
|
end
|
31
33
|
end # context
|
32
34
|
|
@@ -35,11 +37,11 @@ describe Digram do
|
|
35
37
|
instance1 = Digram.new(:a, :b, production)
|
36
38
|
same = Digram.new(:a, :b, production)
|
37
39
|
different = Digram.new(:b, :c, production)
|
38
|
-
|
40
|
+
|
39
41
|
expect(instance1).to eq(instance1)
|
40
42
|
expect(instance1).to eq(same)
|
41
43
|
expect(instance1).not_to eq(different)
|
42
|
-
expect(same).not_to eq(different)
|
44
|
+
expect(same).not_to eq(different)
|
43
45
|
end
|
44
46
|
end # context
|
45
47
|
end # describe
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../spec_helper'
|
2
4
|
require 'stringio'
|
3
5
|
|
@@ -32,11 +34,11 @@ describe BaseText do
|
|
32
34
|
grm
|
33
35
|
end
|
34
36
|
|
35
|
-
let(:destination) { StringIO.new('', 'w') }
|
37
|
+
let(:destination) { StringIO.new(+'', 'w') }
|
36
38
|
|
37
39
|
context 'Standard creation & initialization:' do
|
38
40
|
it 'should be initialized with an IO argument' do
|
39
|
-
expect { BaseText.new(StringIO.new('', 'w')) }.not_to raise_error
|
41
|
+
expect { BaseText.new(StringIO.new(+'', 'w')) }.not_to raise_error
|
40
42
|
end
|
41
43
|
|
42
44
|
it 'should know its output destination' do
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../spec_helper'
|
2
4
|
require 'stringio'
|
3
5
|
|
@@ -32,11 +34,11 @@ describe Debug do
|
|
32
34
|
grm
|
33
35
|
end
|
34
36
|
|
35
|
-
let(:destination) { StringIO.new('', 'w') }
|
37
|
+
let(:destination) { StringIO.new(+'', 'w') }
|
36
38
|
|
37
39
|
context 'Standard creation & initialization:' do
|
38
40
|
it 'should be initialized with an IO argument' do
|
39
|
-
expect { Debug.new(StringIO.new('', 'w')) }.not_to raise_error
|
41
|
+
expect { Debug.new(StringIO.new(+'', 'w')) }.not_to raise_error
|
40
42
|
end
|
41
43
|
|
42
44
|
it 'should know its output destination' do
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -66,7 +68,7 @@ describe Production do
|
|
66
68
|
expect(subject.references).to be_empty
|
67
69
|
|
68
70
|
# Case 2: production without references
|
69
|
-
symbols = [
|
71
|
+
symbols = %i[a b c]
|
70
72
|
symbols.each { |symb| subject.append_symbol(symb) }
|
71
73
|
expect(subject.references).to be_empty
|
72
74
|
expect(subject.references_of(p_a)).to be_empty
|
@@ -90,14 +92,14 @@ describe Production do
|
|
90
92
|
end
|
91
93
|
|
92
94
|
it 'should know the position(s) of a given digram' do
|
93
|
-
sequence1 = [
|
95
|
+
sequence1 = %i[a b c a b a b d]
|
94
96
|
sequence1.each { |symb| subject.append_symbol(symb) }
|
95
97
|
positions = [0, 3, 5]
|
96
98
|
expect(subject.positions_of(:a, :b)).to eq(positions)
|
97
99
|
|
98
100
|
subject.clear_rhs
|
99
101
|
# Case of overlapping digrams
|
100
|
-
sequence2 = [
|
102
|
+
sequence2 = %i[a a b a a a c d]
|
101
103
|
sequence2.each { |symb| subject.append_symbol(symb) }
|
102
104
|
positions = [0, 3]
|
103
105
|
expect(subject.positions_of(:a, :a)).to eq(positions)
|
@@ -114,15 +116,15 @@ describe Production do
|
|
114
116
|
it 'should append a symbol when has one symbol' do
|
115
117
|
subject.append_symbol(:a)
|
116
118
|
subject.append_symbol(:b)
|
117
|
-
expect(subject.rhs).to eq([
|
118
|
-
expect(subject.last_digram.symbols).to eq([
|
119
|
+
expect(subject.rhs).to eq(%i[a b])
|
120
|
+
expect(subject.last_digram.symbols).to eq(%i[a b])
|
119
121
|
end
|
120
122
|
|
121
123
|
it 'should append a symbol when rhs has several symbols' do
|
122
|
-
symbols = [
|
124
|
+
symbols = %i[a b c d e f]
|
123
125
|
symbols.each { |symb| subject.append_symbol(symb) }
|
124
126
|
expect(subject.rhs).to eq(symbols)
|
125
|
-
expect(subject.last_digram.symbols).to eq([
|
127
|
+
expect(subject.last_digram.symbols).to eq(%i[e f])
|
126
128
|
end
|
127
129
|
|
128
130
|
it 'should append a production in its rhs' do
|
@@ -176,7 +178,7 @@ describe Production do
|
|
176
178
|
instance = Production.new
|
177
179
|
symbols = [:a, :b, 'c', :d, :e, 1000, instance]
|
178
180
|
symbols.each { |symb| subject.append_symbol(symb) }
|
179
|
-
expectation = "#{subject.object_id} : "
|
181
|
+
expectation = +"#{subject.object_id} : "
|
180
182
|
expectation << "a b 'c' d e 1000 #{instance.object_id}."
|
181
183
|
expect(subject.to_string).to eq(expectation)
|
182
184
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -7,7 +9,7 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
7
9
|
describe SequiturGrammar do
|
8
10
|
# Factory method. Returns an empty enumerator (
|
9
11
|
# i.e. without elements to iterate)
|
10
|
-
def empty_enum
|
12
|
+
def empty_enum
|
11
13
|
return [].to_enum
|
12
14
|
end
|
13
15
|
|
@@ -36,28 +38,28 @@ describe SequiturGrammar do
|
|
36
38
|
|
37
39
|
it 'could be created with multiple unique tokens' do
|
38
40
|
# Creation
|
39
|
-
instance = SequiturGrammar.new([
|
41
|
+
instance = SequiturGrammar.new(%i[a b c d].to_enum)
|
40
42
|
|
41
43
|
# Initialization
|
42
44
|
expect(instance.productions.size).to eq(1)
|
43
45
|
expect(instance.start).to eq(instance.productions.first)
|
44
|
-
expect(instance.start.rhs).to eq([
|
46
|
+
expect(instance.start.rhs).to eq(%i[a b c d])
|
45
47
|
end
|
46
48
|
|
47
49
|
it 'could be created with a repeating digram' do
|
48
|
-
instance = SequiturGrammar.new([
|
50
|
+
instance = SequiturGrammar.new(%i[a b a b].to_enum)
|
49
51
|
|
50
52
|
# Expectations:
|
51
53
|
# S : A A.
|
52
54
|
# A : a b.
|
53
55
|
expect(instance.productions.size).to eq(2)
|
54
56
|
p_a = instance.productions[1]
|
55
|
-
expect(p_a.rhs).to eq([
|
57
|
+
expect(p_a.rhs).to eq(%i[a b])
|
56
58
|
expect(instance.start.rhs).to eq([p_a, p_a])
|
57
59
|
end
|
58
60
|
|
59
61
|
it 'should enforce the utility rule' do
|
60
|
-
instance = SequiturGrammar.new([
|
62
|
+
instance = SequiturGrammar.new(%i[a b c a b c].to_enum)
|
61
63
|
|
62
64
|
# Expectations without utility rule:
|
63
65
|
# S : B B.
|
@@ -69,7 +71,7 @@ describe SequiturGrammar do
|
|
69
71
|
# A : a b c.
|
70
72
|
expect(instance.productions.size).to eq(2)
|
71
73
|
p_a = instance.productions.last
|
72
|
-
expect(p_a.rhs).to eq([
|
74
|
+
expect(p_a.rhs).to eq(%i[a b c])
|
73
75
|
expect(instance.start.rhs).to eq([p_a, p_a])
|
74
76
|
end
|
75
77
|
|
@@ -105,9 +107,7 @@ describe SequiturGrammar do
|
|
105
107
|
raw_input = 'bbebeebebebbebee'
|
106
108
|
|
107
109
|
# Convert them into multichar strings
|
108
|
-
input = raw_input.chars.map
|
109
|
-
'letter_' + ch
|
110
|
-
end
|
110
|
+
input = raw_input.chars.map { |ch| "letter_#{ch}" }
|
111
111
|
|
112
112
|
# Creation
|
113
113
|
instance = SequiturGrammar.new(input.to_enum)
|
@@ -124,7 +124,7 @@ describe SequiturGrammar do
|
|
124
124
|
expect(p2.rhs).to eq([p1, p1])
|
125
125
|
expect(p3.rhs).to eq(['letter_b', p2, 'letter_e'])
|
126
126
|
end
|
127
|
-
|
127
|
+
|
128
128
|
it 'should work with Symbol instead of single char input tokens' do
|
129
129
|
# Raw input is sequence of single characters
|
130
130
|
raw_input = 'bbebeebebebbebee'
|
@@ -143,7 +143,7 @@ describe SequiturGrammar do
|
|
143
143
|
expect(instance.productions.size).to eq(4)
|
144
144
|
(p1, p2, p3) = instance.productions[1..3]
|
145
145
|
expect(instance.start.rhs).to eq([p3, p2, p3])
|
146
|
-
expect(p1.rhs).to eq([
|
146
|
+
expect(p1.rhs).to eq(%i[b e])
|
147
147
|
expect(p2.rhs).to eq([p1, p1])
|
148
148
|
expect(p3.rhs).to eq([:b, p2, :e])
|
149
149
|
end
|
@@ -279,7 +279,7 @@ SNIPPET
|
|
279
279
|
expect(instance.productions.size).to eq(3)
|
280
280
|
(p1, p2) = instance.productions[1..2]
|
281
281
|
expect(instance.start.rhs).to eq([p2, p2])
|
282
|
-
expect(p1.rhs).to eq([
|
282
|
+
expect(p1.rhs).to eq(%i[a b])
|
283
283
|
expect(p2.rhs).to eq([p1, :c, p1, :d])
|
284
284
|
end
|
285
285
|
end # context
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -24,7 +26,7 @@ describe SymbolSequence do
|
|
24
26
|
|
25
27
|
subject do
|
26
28
|
an_instance = SymbolSequence.new
|
27
|
-
[
|
29
|
+
%i[a b c].each { |a_sym| an_instance << a_sym }
|
28
30
|
an_instance
|
29
31
|
end
|
30
32
|
|
@@ -63,7 +65,7 @@ describe SymbolSequence do
|
|
63
65
|
expect(instance).to eq(instance)
|
64
66
|
|
65
67
|
expect(subject).not_to eq(instance)
|
66
|
-
[
|
68
|
+
%i[a b c].each { |a_sym| instance << a_sym }
|
67
69
|
expect(subject).to eq(instance)
|
68
70
|
|
69
71
|
# Check that element order is relevant
|
@@ -72,10 +74,10 @@ describe SymbolSequence do
|
|
72
74
|
end
|
73
75
|
|
74
76
|
it 'should know whether it is equal to an array' do
|
75
|
-
expect(subject).to eq([
|
77
|
+
expect(subject).to eq(%i[a b c])
|
76
78
|
|
77
79
|
# Check that element order is relevant
|
78
|
-
expect(subject).not_to eq([
|
80
|
+
expect(subject).not_to eq(%i[c b a])
|
79
81
|
end
|
80
82
|
|
81
83
|
it 'should know that is not equal to something else' do
|
data/spec/spec_helper.rb
CHANGED
@@ -1,17 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# File: spec_helper.rb
|
2
4
|
# Purpose: utility file that is loaded by all our RSpec files
|
3
5
|
|
4
|
-
require 'simplecov'
|
5
|
-
require 'coveralls'
|
6
|
-
|
7
|
-
Coveralls.wear!
|
8
|
-
|
9
|
-
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
10
|
-
SimpleCov::Formatter::HTMLFormatter,
|
11
|
-
Coveralls::SimpleCov::Formatter
|
12
|
-
]
|
13
|
-
|
14
|
-
|
15
6
|
require 'rspec' # Use the RSpec framework
|
16
7
|
require 'pp' # Use pretty-print for debugging purposes
|
17
8
|
|
@@ -25,5 +16,4 @@ RSpec.configure do |config|
|
|
25
16
|
config.full_backtrace = true
|
26
17
|
end
|
27
18
|
|
28
|
-
|
29
19
|
# End of file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -36,48 +36,20 @@ dependencies:
|
|
36
36
|
requirements:
|
37
37
|
- - "~>"
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
39
|
+
version: 3.0.0
|
40
40
|
- - ">="
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
42
|
+
version: 3.0.0
|
43
43
|
type: :development
|
44
44
|
prerelease: false
|
45
45
|
version_requirements: !ruby/object:Gem::Requirement
|
46
46
|
requirements:
|
47
47
|
- - "~>"
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
49
|
+
version: 3.0.0
|
50
50
|
- - ">="
|
51
51
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
53
|
-
- !ruby/object:Gem::Dependency
|
54
|
-
name: simplecov
|
55
|
-
requirement: !ruby/object:Gem::Requirement
|
56
|
-
requirements:
|
57
|
-
- - ">="
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
version: 0.8.0
|
60
|
-
type: :development
|
61
|
-
prerelease: false
|
62
|
-
version_requirements: !ruby/object:Gem::Requirement
|
63
|
-
requirements:
|
64
|
-
- - ">="
|
65
|
-
- !ruby/object:Gem::Version
|
66
|
-
version: 0.8.0
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
|
-
name: coveralls
|
69
|
-
requirement: !ruby/object:Gem::Requirement
|
70
|
-
requirements:
|
71
|
-
- - ">="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
version: 0.7.0
|
74
|
-
type: :development
|
75
|
-
prerelease: false
|
76
|
-
version_requirements: !ruby/object:Gem::Requirement
|
77
|
-
requirements:
|
78
|
-
- - ">="
|
79
|
-
- !ruby/object:Gem::Version
|
80
|
-
version: 0.7.0
|
52
|
+
version: 3.0.0
|
81
53
|
- !ruby/object:Gem::Dependency
|
82
54
|
name: rubygems
|
83
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,11 +70,14 @@ dependencies:
|
|
98
70
|
- - ">="
|
99
71
|
- !ruby/object:Gem::Version
|
100
72
|
version: 2.0.0
|
101
|
-
description:
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
73
|
+
description: |
|
74
|
+
Ruby implementation of the Sequitur algorithm. This algorithm automatically
|
75
|
+
finds repetitions and hierarchical structures in a given sequence of input
|
76
|
+
tokens. It encodes the input into a context-free grammar.
|
77
|
+
The Sequitur algorithm can be used to
|
78
|
+
a) compress a sequence of items,
|
79
|
+
b) discover patterns in an sequence,
|
80
|
+
c) generate grammar rules that can represent a given input.
|
106
81
|
email: famished.tiger@yahoo.com
|
107
82
|
executables: []
|
108
83
|
extensions: []
|
@@ -112,8 +87,6 @@ files:
|
|
112
87
|
- ".rspec"
|
113
88
|
- ".rubocop.yml"
|
114
89
|
- ".ruby-gemset"
|
115
|
-
- ".ruby-version"
|
116
|
-
- ".simplecov"
|
117
90
|
- ".travis.yml"
|
118
91
|
- ".yardopts"
|
119
92
|
- CHANGELOG.md
|
@@ -165,16 +138,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
165
138
|
requirements:
|
166
139
|
- - ">="
|
167
140
|
- !ruby/object:Gem::Version
|
168
|
-
version: 2.
|
141
|
+
version: 2.5.0
|
169
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
143
|
requirements:
|
171
144
|
- - ">="
|
172
145
|
- !ruby/object:Gem::Version
|
173
146
|
version: '0'
|
174
147
|
requirements: []
|
175
|
-
|
176
|
-
|
177
|
-
signing_key:
|
148
|
+
rubygems_version: 3.1.4
|
149
|
+
signing_key:
|
178
150
|
specification_version: 4
|
179
151
|
summary: Ruby implementation of the Sequitur algorithm
|
180
152
|
test_files:
|