sequitur 0.1.18 → 0.1.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +430 -56
- data/.travis.yml +19 -13
- data/CHANGELOG.md +33 -0
- data/Gemfile +4 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -2
- data/Rakefile +2 -0
- data/appveyor.yml +20 -12
- data/examples/integer_sample.rb +8 -7
- data/examples/porridge.rb +6 -6
- data/examples/simple_case.rb +7 -6
- data/examples/symbol_sample.rb +8 -9
- data/examples/word_sample.rb +4 -3
- data/lib/sequitur/constants.rb +5 -3
- data/lib/sequitur/digram.rb +45 -43
- data/lib/sequitur/dynamic_grammar.rb +93 -95
- data/lib/sequitur/formatter/base_formatter.rb +3 -1
- data/lib/sequitur/formatter/base_text.rb +3 -1
- data/lib/sequitur/formatter/debug.rb +5 -3
- data/lib/sequitur/grammar_visitor.rb +99 -98
- data/lib/sequitur/production.rb +14 -19
- data/lib/sequitur/production_ref.rb +107 -106
- data/lib/sequitur/sequitur_grammar.rb +9 -6
- data/lib/sequitur/symbol_sequence.rb +10 -11
- data/lib/sequitur.rb +2 -0
- data/spec/sequitur/digram_spec.rb +10 -8
- data/spec/sequitur/dynamic_grammar_spec.rb +2 -0
- data/spec/sequitur/formatter/base_text_spec.rb +4 -2
- data/spec/sequitur/formatter/debug_spec.rb +4 -2
- data/spec/sequitur/grammar_visitor_spec.rb +2 -0
- data/spec/sequitur/production_ref_spec.rb +2 -0
- data/spec/sequitur/production_spec.rb +10 -8
- data/spec/sequitur/sequitur_grammar_spec.rb +13 -13
- data/spec/sequitur/symbol_sequence_spec.rb +6 -4
- data/spec/spec_helper.rb +2 -12
- metadata +18 -46
- data/.ruby-version +0 -1
- data/.simplecov +0 -7
data/lib/sequitur.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -5,28 +7,28 @@ require_relative '../../lib/sequitur/digram'
|
|
5
7
|
|
6
8
|
module Sequitur # Re-open the module to get rid of qualified names
|
7
9
|
describe Digram do
|
8
|
-
let(:two_symbols) { [
|
10
|
+
let(:two_symbols) { %i[b c] }
|
9
11
|
let(:production) { double('sample-production') }
|
10
12
|
|
11
13
|
context 'Standard creation & initialization:' do
|
12
14
|
it 'should be created with 3 arguments' do
|
13
15
|
instance = Digram.new(:b, :c, production)
|
14
|
-
|
16
|
+
|
15
17
|
expect(instance.symbols).to eq(two_symbols)
|
16
18
|
expect(instance.production).to eq(production)
|
17
19
|
end
|
18
|
-
|
20
|
+
|
19
21
|
it 'should return the production that it refers to' do
|
20
22
|
instance = Digram.new(:b, :c, production)
|
21
23
|
expect(instance.production).to eq(production)
|
22
24
|
end
|
23
|
-
|
25
|
+
|
24
26
|
it 'should whether its symbols are the same' do
|
25
27
|
instance1 = Digram.new(:a, :a, production)
|
26
28
|
expect(instance1).to be_repeating
|
27
|
-
|
29
|
+
|
28
30
|
instance1 = Digram.new(:a, :b, production)
|
29
|
-
expect(instance1).not_to be_repeating
|
31
|
+
expect(instance1).not_to be_repeating
|
30
32
|
end
|
31
33
|
end # context
|
32
34
|
|
@@ -35,11 +37,11 @@ describe Digram do
|
|
35
37
|
instance1 = Digram.new(:a, :b, production)
|
36
38
|
same = Digram.new(:a, :b, production)
|
37
39
|
different = Digram.new(:b, :c, production)
|
38
|
-
|
40
|
+
|
39
41
|
expect(instance1).to eq(instance1)
|
40
42
|
expect(instance1).to eq(same)
|
41
43
|
expect(instance1).not_to eq(different)
|
42
|
-
expect(same).not_to eq(different)
|
44
|
+
expect(same).not_to eq(different)
|
43
45
|
end
|
44
46
|
end # context
|
45
47
|
end # describe
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../spec_helper'
|
2
4
|
require 'stringio'
|
3
5
|
|
@@ -32,11 +34,11 @@ describe BaseText do
|
|
32
34
|
grm
|
33
35
|
end
|
34
36
|
|
35
|
-
let(:destination) { StringIO.new('', 'w') }
|
37
|
+
let(:destination) { StringIO.new(+'', 'w') }
|
36
38
|
|
37
39
|
context 'Standard creation & initialization:' do
|
38
40
|
it 'should be initialized with an IO argument' do
|
39
|
-
expect { BaseText.new(StringIO.new('', 'w')) }.not_to raise_error
|
41
|
+
expect { BaseText.new(StringIO.new(+'', 'w')) }.not_to raise_error
|
40
42
|
end
|
41
43
|
|
42
44
|
it 'should know its output destination' do
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../../spec_helper'
|
2
4
|
require 'stringio'
|
3
5
|
|
@@ -32,11 +34,11 @@ describe Debug do
|
|
32
34
|
grm
|
33
35
|
end
|
34
36
|
|
35
|
-
let(:destination) { StringIO.new('', 'w') }
|
37
|
+
let(:destination) { StringIO.new(+'', 'w') }
|
36
38
|
|
37
39
|
context 'Standard creation & initialization:' do
|
38
40
|
it 'should be initialized with an IO argument' do
|
39
|
-
expect { Debug.new(StringIO.new('', 'w')) }.not_to raise_error
|
41
|
+
expect { Debug.new(StringIO.new(+'', 'w')) }.not_to raise_error
|
40
42
|
end
|
41
43
|
|
42
44
|
it 'should know its output destination' do
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -66,7 +68,7 @@ describe Production do
|
|
66
68
|
expect(subject.references).to be_empty
|
67
69
|
|
68
70
|
# Case 2: production without references
|
69
|
-
symbols = [
|
71
|
+
symbols = %i[a b c]
|
70
72
|
symbols.each { |symb| subject.append_symbol(symb) }
|
71
73
|
expect(subject.references).to be_empty
|
72
74
|
expect(subject.references_of(p_a)).to be_empty
|
@@ -90,14 +92,14 @@ describe Production do
|
|
90
92
|
end
|
91
93
|
|
92
94
|
it 'should know the position(s) of a given digram' do
|
93
|
-
sequence1 = [
|
95
|
+
sequence1 = %i[a b c a b a b d]
|
94
96
|
sequence1.each { |symb| subject.append_symbol(symb) }
|
95
97
|
positions = [0, 3, 5]
|
96
98
|
expect(subject.positions_of(:a, :b)).to eq(positions)
|
97
99
|
|
98
100
|
subject.clear_rhs
|
99
101
|
# Case of overlapping digrams
|
100
|
-
sequence2 = [
|
102
|
+
sequence2 = %i[a a b a a a c d]
|
101
103
|
sequence2.each { |symb| subject.append_symbol(symb) }
|
102
104
|
positions = [0, 3]
|
103
105
|
expect(subject.positions_of(:a, :a)).to eq(positions)
|
@@ -114,15 +116,15 @@ describe Production do
|
|
114
116
|
it 'should append a symbol when has one symbol' do
|
115
117
|
subject.append_symbol(:a)
|
116
118
|
subject.append_symbol(:b)
|
117
|
-
expect(subject.rhs).to eq([
|
118
|
-
expect(subject.last_digram.symbols).to eq([
|
119
|
+
expect(subject.rhs).to eq(%i[a b])
|
120
|
+
expect(subject.last_digram.symbols).to eq(%i[a b])
|
119
121
|
end
|
120
122
|
|
121
123
|
it 'should append a symbol when rhs has several symbols' do
|
122
|
-
symbols = [
|
124
|
+
symbols = %i[a b c d e f]
|
123
125
|
symbols.each { |symb| subject.append_symbol(symb) }
|
124
126
|
expect(subject.rhs).to eq(symbols)
|
125
|
-
expect(subject.last_digram.symbols).to eq([
|
127
|
+
expect(subject.last_digram.symbols).to eq(%i[e f])
|
126
128
|
end
|
127
129
|
|
128
130
|
it 'should append a production in its rhs' do
|
@@ -176,7 +178,7 @@ describe Production do
|
|
176
178
|
instance = Production.new
|
177
179
|
symbols = [:a, :b, 'c', :d, :e, 1000, instance]
|
178
180
|
symbols.each { |symb| subject.append_symbol(symb) }
|
179
|
-
expectation = "#{subject.object_id} : "
|
181
|
+
expectation = +"#{subject.object_id} : "
|
180
182
|
expectation << "a b 'c' d e 1000 #{instance.object_id}."
|
181
183
|
expect(subject.to_string).to eq(expectation)
|
182
184
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -7,7 +9,7 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
7
9
|
describe SequiturGrammar do
|
8
10
|
# Factory method. Returns an empty enumerator (
|
9
11
|
# i.e. without elements to iterate)
|
10
|
-
def empty_enum
|
12
|
+
def empty_enum
|
11
13
|
return [].to_enum
|
12
14
|
end
|
13
15
|
|
@@ -36,28 +38,28 @@ describe SequiturGrammar do
|
|
36
38
|
|
37
39
|
it 'could be created with multiple unique tokens' do
|
38
40
|
# Creation
|
39
|
-
instance = SequiturGrammar.new([
|
41
|
+
instance = SequiturGrammar.new(%i[a b c d].to_enum)
|
40
42
|
|
41
43
|
# Initialization
|
42
44
|
expect(instance.productions.size).to eq(1)
|
43
45
|
expect(instance.start).to eq(instance.productions.first)
|
44
|
-
expect(instance.start.rhs).to eq([
|
46
|
+
expect(instance.start.rhs).to eq(%i[a b c d])
|
45
47
|
end
|
46
48
|
|
47
49
|
it 'could be created with a repeating digram' do
|
48
|
-
instance = SequiturGrammar.new([
|
50
|
+
instance = SequiturGrammar.new(%i[a b a b].to_enum)
|
49
51
|
|
50
52
|
# Expectations:
|
51
53
|
# S : A A.
|
52
54
|
# A : a b.
|
53
55
|
expect(instance.productions.size).to eq(2)
|
54
56
|
p_a = instance.productions[1]
|
55
|
-
expect(p_a.rhs).to eq([
|
57
|
+
expect(p_a.rhs).to eq(%i[a b])
|
56
58
|
expect(instance.start.rhs).to eq([p_a, p_a])
|
57
59
|
end
|
58
60
|
|
59
61
|
it 'should enforce the utility rule' do
|
60
|
-
instance = SequiturGrammar.new([
|
62
|
+
instance = SequiturGrammar.new(%i[a b c a b c].to_enum)
|
61
63
|
|
62
64
|
# Expectations without utility rule:
|
63
65
|
# S : B B.
|
@@ -69,7 +71,7 @@ describe SequiturGrammar do
|
|
69
71
|
# A : a b c.
|
70
72
|
expect(instance.productions.size).to eq(2)
|
71
73
|
p_a = instance.productions.last
|
72
|
-
expect(p_a.rhs).to eq([
|
74
|
+
expect(p_a.rhs).to eq(%i[a b c])
|
73
75
|
expect(instance.start.rhs).to eq([p_a, p_a])
|
74
76
|
end
|
75
77
|
|
@@ -105,9 +107,7 @@ describe SequiturGrammar do
|
|
105
107
|
raw_input = 'bbebeebebebbebee'
|
106
108
|
|
107
109
|
# Convert them into multichar strings
|
108
|
-
input = raw_input.chars.map
|
109
|
-
'letter_' + ch
|
110
|
-
end
|
110
|
+
input = raw_input.chars.map { |ch| "letter_#{ch}" }
|
111
111
|
|
112
112
|
# Creation
|
113
113
|
instance = SequiturGrammar.new(input.to_enum)
|
@@ -124,7 +124,7 @@ describe SequiturGrammar do
|
|
124
124
|
expect(p2.rhs).to eq([p1, p1])
|
125
125
|
expect(p3.rhs).to eq(['letter_b', p2, 'letter_e'])
|
126
126
|
end
|
127
|
-
|
127
|
+
|
128
128
|
it 'should work with Symbol instead of single char input tokens' do
|
129
129
|
# Raw input is sequence of single characters
|
130
130
|
raw_input = 'bbebeebebebbebee'
|
@@ -143,7 +143,7 @@ describe SequiturGrammar do
|
|
143
143
|
expect(instance.productions.size).to eq(4)
|
144
144
|
(p1, p2, p3) = instance.productions[1..3]
|
145
145
|
expect(instance.start.rhs).to eq([p3, p2, p3])
|
146
|
-
expect(p1.rhs).to eq([
|
146
|
+
expect(p1.rhs).to eq(%i[b e])
|
147
147
|
expect(p2.rhs).to eq([p1, p1])
|
148
148
|
expect(p3.rhs).to eq([:b, p2, :e])
|
149
149
|
end
|
@@ -279,7 +279,7 @@ SNIPPET
|
|
279
279
|
expect(instance.productions.size).to eq(3)
|
280
280
|
(p1, p2) = instance.productions[1..2]
|
281
281
|
expect(instance.start.rhs).to eq([p2, p2])
|
282
|
-
expect(p1.rhs).to eq([
|
282
|
+
expect(p1.rhs).to eq(%i[a b])
|
283
283
|
expect(p2.rhs).to eq([p1, :c, p1, :d])
|
284
284
|
end
|
285
285
|
end # context
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../spec_helper'
|
2
4
|
|
3
5
|
# Load the class under test
|
@@ -24,7 +26,7 @@ describe SymbolSequence do
|
|
24
26
|
|
25
27
|
subject do
|
26
28
|
an_instance = SymbolSequence.new
|
27
|
-
[
|
29
|
+
%i[a b c].each { |a_sym| an_instance << a_sym }
|
28
30
|
an_instance
|
29
31
|
end
|
30
32
|
|
@@ -63,7 +65,7 @@ describe SymbolSequence do
|
|
63
65
|
expect(instance).to eq(instance)
|
64
66
|
|
65
67
|
expect(subject).not_to eq(instance)
|
66
|
-
[
|
68
|
+
%i[a b c].each { |a_sym| instance << a_sym }
|
67
69
|
expect(subject).to eq(instance)
|
68
70
|
|
69
71
|
# Check that element order is relevant
|
@@ -72,10 +74,10 @@ describe SymbolSequence do
|
|
72
74
|
end
|
73
75
|
|
74
76
|
it 'should know whether it is equal to an array' do
|
75
|
-
expect(subject).to eq([
|
77
|
+
expect(subject).to eq(%i[a b c])
|
76
78
|
|
77
79
|
# Check that element order is relevant
|
78
|
-
expect(subject).not_to eq([
|
80
|
+
expect(subject).not_to eq(%i[c b a])
|
79
81
|
end
|
80
82
|
|
81
83
|
it 'should know that is not equal to something else' do
|
data/spec/spec_helper.rb
CHANGED
@@ -1,17 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# File: spec_helper.rb
|
2
4
|
# Purpose: utility file that is loaded by all our RSpec files
|
3
5
|
|
4
|
-
require 'simplecov'
|
5
|
-
require 'coveralls'
|
6
|
-
|
7
|
-
Coveralls.wear!
|
8
|
-
|
9
|
-
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
10
|
-
SimpleCov::Formatter::HTMLFormatter,
|
11
|
-
Coveralls::SimpleCov::Formatter
|
12
|
-
]
|
13
|
-
|
14
|
-
|
15
6
|
require 'rspec' # Use the RSpec framework
|
16
7
|
require 'pp' # Use pretty-print for debugging purposes
|
17
8
|
|
@@ -25,5 +16,4 @@ RSpec.configure do |config|
|
|
25
16
|
config.full_backtrace = true
|
26
17
|
end
|
27
18
|
|
28
|
-
|
29
19
|
# End of file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -36,48 +36,20 @@ dependencies:
|
|
36
36
|
requirements:
|
37
37
|
- - "~>"
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
39
|
+
version: 3.0.0
|
40
40
|
- - ">="
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
42
|
+
version: 3.0.0
|
43
43
|
type: :development
|
44
44
|
prerelease: false
|
45
45
|
version_requirements: !ruby/object:Gem::Requirement
|
46
46
|
requirements:
|
47
47
|
- - "~>"
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
49
|
+
version: 3.0.0
|
50
50
|
- - ">="
|
51
51
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
53
|
-
- !ruby/object:Gem::Dependency
|
54
|
-
name: simplecov
|
55
|
-
requirement: !ruby/object:Gem::Requirement
|
56
|
-
requirements:
|
57
|
-
- - ">="
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
version: 0.8.0
|
60
|
-
type: :development
|
61
|
-
prerelease: false
|
62
|
-
version_requirements: !ruby/object:Gem::Requirement
|
63
|
-
requirements:
|
64
|
-
- - ">="
|
65
|
-
- !ruby/object:Gem::Version
|
66
|
-
version: 0.8.0
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
|
-
name: coveralls
|
69
|
-
requirement: !ruby/object:Gem::Requirement
|
70
|
-
requirements:
|
71
|
-
- - ">="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
version: 0.7.0
|
74
|
-
type: :development
|
75
|
-
prerelease: false
|
76
|
-
version_requirements: !ruby/object:Gem::Requirement
|
77
|
-
requirements:
|
78
|
-
- - ">="
|
79
|
-
- !ruby/object:Gem::Version
|
80
|
-
version: 0.7.0
|
52
|
+
version: 3.0.0
|
81
53
|
- !ruby/object:Gem::Dependency
|
82
54
|
name: rubygems
|
83
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,11 +70,14 @@ dependencies:
|
|
98
70
|
- - ">="
|
99
71
|
- !ruby/object:Gem::Version
|
100
72
|
version: 2.0.0
|
101
|
-
description:
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
73
|
+
description: |
|
74
|
+
Ruby implementation of the Sequitur algorithm. This algorithm automatically
|
75
|
+
finds repetitions and hierarchical structures in a given sequence of input
|
76
|
+
tokens. It encodes the input into a context-free grammar.
|
77
|
+
The Sequitur algorithm can be used to
|
78
|
+
a) compress a sequence of items,
|
79
|
+
b) discover patterns in an sequence,
|
80
|
+
c) generate grammar rules that can represent a given input.
|
106
81
|
email: famished.tiger@yahoo.com
|
107
82
|
executables: []
|
108
83
|
extensions: []
|
@@ -112,8 +87,6 @@ files:
|
|
112
87
|
- ".rspec"
|
113
88
|
- ".rubocop.yml"
|
114
89
|
- ".ruby-gemset"
|
115
|
-
- ".ruby-version"
|
116
|
-
- ".simplecov"
|
117
90
|
- ".travis.yml"
|
118
91
|
- ".yardopts"
|
119
92
|
- CHANGELOG.md
|
@@ -165,16 +138,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
165
138
|
requirements:
|
166
139
|
- - ">="
|
167
140
|
- !ruby/object:Gem::Version
|
168
|
-
version: 2.
|
141
|
+
version: 2.5.0
|
169
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
143
|
requirements:
|
171
144
|
- - ">="
|
172
145
|
- !ruby/object:Gem::Version
|
173
146
|
version: '0'
|
174
147
|
requirements: []
|
175
|
-
|
176
|
-
|
177
|
-
signing_key:
|
148
|
+
rubygems_version: 3.1.4
|
149
|
+
signing_key:
|
178
150
|
specification_version: 4
|
179
151
|
summary: Ruby implementation of the Sequitur algorithm
|
180
152
|
test_files:
|