sequitur 0.1.18 → 0.1.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +163 -49
- data/.travis.yml +13 -10
- data/CHANGELOG.md +9 -0
- data/Gemfile +2 -3
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/appveyor.yml +13 -10
- data/examples/integer_sample.rb +5 -6
- data/examples/porridge.rb +4 -6
- data/examples/simple_case.rb +5 -6
- data/examples/symbol_sample.rb +5 -8
- data/examples/word_sample.rb +1 -2
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/digram.rb +38 -38
- data/lib/sequitur/dynamic_grammar.rb +91 -95
- data/lib/sequitur/formatter/base_text.rb +1 -1
- data/lib/sequitur/formatter/debug.rb +2 -2
- data/lib/sequitur/grammar_visitor.rb +96 -98
- data/lib/sequitur/production.rb +10 -19
- data/lib/sequitur/production_ref.rb +104 -105
- data/lib/sequitur/sequitur_grammar.rb +3 -3
- data/lib/sequitur/symbol_sequence.rb +7 -11
- data/spec/sequitur/digram_spec.rb +8 -8
- data/spec/sequitur/production_spec.rb +7 -7
- data/spec/sequitur/sequitur_grammar_spec.rb +10 -10
- data/spec/sequitur/symbol_sequence_spec.rb +4 -4
- data/spec/spec_helper.rb +6 -4
- metadata +44 -29
@@ -66,7 +66,7 @@ describe Production do
|
|
66
66
|
expect(subject.references).to be_empty
|
67
67
|
|
68
68
|
# Case 2: production without references
|
69
|
-
symbols = [
|
69
|
+
symbols = %i[a b c]
|
70
70
|
symbols.each { |symb| subject.append_symbol(symb) }
|
71
71
|
expect(subject.references).to be_empty
|
72
72
|
expect(subject.references_of(p_a)).to be_empty
|
@@ -90,14 +90,14 @@ describe Production do
|
|
90
90
|
end
|
91
91
|
|
92
92
|
it 'should know the position(s) of a given digram' do
|
93
|
-
sequence1 = [
|
93
|
+
sequence1 = %i[a b c a b a b d]
|
94
94
|
sequence1.each { |symb| subject.append_symbol(symb) }
|
95
95
|
positions = [0, 3, 5]
|
96
96
|
expect(subject.positions_of(:a, :b)).to eq(positions)
|
97
97
|
|
98
98
|
subject.clear_rhs
|
99
99
|
# Case of overlapping digrams
|
100
|
-
sequence2 = [
|
100
|
+
sequence2 = %i[a a b a a a c d]
|
101
101
|
sequence2.each { |symb| subject.append_symbol(symb) }
|
102
102
|
positions = [0, 3]
|
103
103
|
expect(subject.positions_of(:a, :a)).to eq(positions)
|
@@ -114,15 +114,15 @@ describe Production do
|
|
114
114
|
it 'should append a symbol when has one symbol' do
|
115
115
|
subject.append_symbol(:a)
|
116
116
|
subject.append_symbol(:b)
|
117
|
-
expect(subject.rhs).to eq([
|
118
|
-
expect(subject.last_digram.symbols).to eq([
|
117
|
+
expect(subject.rhs).to eq(%i[a b])
|
118
|
+
expect(subject.last_digram.symbols).to eq(%i[a b])
|
119
119
|
end
|
120
120
|
|
121
121
|
it 'should append a symbol when rhs has several symbols' do
|
122
|
-
symbols = [
|
122
|
+
symbols = %i[a b c d e f]
|
123
123
|
symbols.each { |symb| subject.append_symbol(symb) }
|
124
124
|
expect(subject.rhs).to eq(symbols)
|
125
|
-
expect(subject.last_digram.symbols).to eq([
|
125
|
+
expect(subject.last_digram.symbols).to eq(%i[e f])
|
126
126
|
end
|
127
127
|
|
128
128
|
it 'should append a production in its rhs' do
|
@@ -7,7 +7,7 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
7
7
|
describe SequiturGrammar do
|
8
8
|
# Factory method. Returns an empty enumerator (
|
9
9
|
# i.e. without elements to iterate)
|
10
|
-
def empty_enum
|
10
|
+
def empty_enum
|
11
11
|
return [].to_enum
|
12
12
|
end
|
13
13
|
|
@@ -36,28 +36,28 @@ describe SequiturGrammar do
|
|
36
36
|
|
37
37
|
it 'could be created with multiple unique tokens' do
|
38
38
|
# Creation
|
39
|
-
instance = SequiturGrammar.new([
|
39
|
+
instance = SequiturGrammar.new(%i[a b c d].to_enum)
|
40
40
|
|
41
41
|
# Initialization
|
42
42
|
expect(instance.productions.size).to eq(1)
|
43
43
|
expect(instance.start).to eq(instance.productions.first)
|
44
|
-
expect(instance.start.rhs).to eq([
|
44
|
+
expect(instance.start.rhs).to eq(%i[a b c d])
|
45
45
|
end
|
46
46
|
|
47
47
|
it 'could be created with a repeating digram' do
|
48
|
-
instance = SequiturGrammar.new([
|
48
|
+
instance = SequiturGrammar.new(%i[a b a b].to_enum)
|
49
49
|
|
50
50
|
# Expectations:
|
51
51
|
# S : A A.
|
52
52
|
# A : a b.
|
53
53
|
expect(instance.productions.size).to eq(2)
|
54
54
|
p_a = instance.productions[1]
|
55
|
-
expect(p_a.rhs).to eq([
|
55
|
+
expect(p_a.rhs).to eq(%i[a b])
|
56
56
|
expect(instance.start.rhs).to eq([p_a, p_a])
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'should enforce the utility rule' do
|
60
|
-
instance = SequiturGrammar.new([
|
60
|
+
instance = SequiturGrammar.new(%i[a b c a b c].to_enum)
|
61
61
|
|
62
62
|
# Expectations without utility rule:
|
63
63
|
# S : B B.
|
@@ -69,7 +69,7 @@ describe SequiturGrammar do
|
|
69
69
|
# A : a b c.
|
70
70
|
expect(instance.productions.size).to eq(2)
|
71
71
|
p_a = instance.productions.last
|
72
|
-
expect(p_a.rhs).to eq([
|
72
|
+
expect(p_a.rhs).to eq(%i[a b c])
|
73
73
|
expect(instance.start.rhs).to eq([p_a, p_a])
|
74
74
|
end
|
75
75
|
|
@@ -124,7 +124,7 @@ describe SequiturGrammar do
|
|
124
124
|
expect(p2.rhs).to eq([p1, p1])
|
125
125
|
expect(p3.rhs).to eq(['letter_b', p2, 'letter_e'])
|
126
126
|
end
|
127
|
-
|
127
|
+
|
128
128
|
it 'should work with Symbol instead of single char input tokens' do
|
129
129
|
# Raw input is sequence of single characters
|
130
130
|
raw_input = 'bbebeebebebbebee'
|
@@ -143,7 +143,7 @@ describe SequiturGrammar do
|
|
143
143
|
expect(instance.productions.size).to eq(4)
|
144
144
|
(p1, p2, p3) = instance.productions[1..3]
|
145
145
|
expect(instance.start.rhs).to eq([p3, p2, p3])
|
146
|
-
expect(p1.rhs).to eq([
|
146
|
+
expect(p1.rhs).to eq(%i[b e])
|
147
147
|
expect(p2.rhs).to eq([p1, p1])
|
148
148
|
expect(p3.rhs).to eq([:b, p2, :e])
|
149
149
|
end
|
@@ -279,7 +279,7 @@ SNIPPET
|
|
279
279
|
expect(instance.productions.size).to eq(3)
|
280
280
|
(p1, p2) = instance.productions[1..2]
|
281
281
|
expect(instance.start.rhs).to eq([p2, p2])
|
282
|
-
expect(p1.rhs).to eq([
|
282
|
+
expect(p1.rhs).to eq(%i[a b])
|
283
283
|
expect(p2.rhs).to eq([p1, :c, p1, :d])
|
284
284
|
end
|
285
285
|
end # context
|
@@ -24,7 +24,7 @@ describe SymbolSequence do
|
|
24
24
|
|
25
25
|
subject do
|
26
26
|
an_instance = SymbolSequence.new
|
27
|
-
[
|
27
|
+
%i[a b c].each { |a_sym| an_instance << a_sym }
|
28
28
|
an_instance
|
29
29
|
end
|
30
30
|
|
@@ -63,7 +63,7 @@ describe SymbolSequence do
|
|
63
63
|
expect(instance).to eq(instance)
|
64
64
|
|
65
65
|
expect(subject).not_to eq(instance)
|
66
|
-
[
|
66
|
+
%i[a b c].each { |a_sym| instance << a_sym }
|
67
67
|
expect(subject).to eq(instance)
|
68
68
|
|
69
69
|
# Check that element order is relevant
|
@@ -72,10 +72,10 @@ describe SymbolSequence do
|
|
72
72
|
end
|
73
73
|
|
74
74
|
it 'should know whether it is equal to an array' do
|
75
|
-
expect(subject).to eq([
|
75
|
+
expect(subject).to eq(%i[a b c])
|
76
76
|
|
77
77
|
# Check that element order is relevant
|
78
|
-
expect(subject).not_to eq([
|
78
|
+
expect(subject).not_to eq(%i[c b a])
|
79
79
|
end
|
80
80
|
|
81
81
|
it 'should know that is not equal to something else' do
|
data/spec/spec_helper.rb
CHANGED
@@ -6,10 +6,12 @@ require 'coveralls'
|
|
6
6
|
|
7
7
|
Coveralls.wear!
|
8
8
|
|
9
|
-
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new(
|
10
|
+
[
|
11
|
+
SimpleCov::Formatter::HTMLFormatter,
|
12
|
+
Coveralls::SimpleCov::Formatter
|
13
|
+
]
|
14
|
+
)
|
13
15
|
|
14
16
|
|
15
17
|
require 'rspec' # Use the RSpec framework
|
metadata
CHANGED
@@ -1,108 +1,123 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: coveralls
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: '0.7'
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
22
|
+
version: 0.7.0
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
27
|
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
29
|
+
version: '0.7'
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
32
|
+
version: 0.7.0
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
|
-
name:
|
34
|
+
name: rake
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
37
|
- - "~>"
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
39
|
+
version: 10.0.0
|
40
40
|
- - ">="
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
42
|
+
version: 10.0.0
|
43
43
|
type: :development
|
44
44
|
prerelease: false
|
45
45
|
version_requirements: !ruby/object:Gem::Requirement
|
46
46
|
requirements:
|
47
47
|
- - "~>"
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
49
|
+
version: 10.0.0
|
50
50
|
- - ">="
|
51
51
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
52
|
+
version: 10.0.0
|
53
53
|
- !ruby/object:Gem::Dependency
|
54
|
-
name:
|
54
|
+
name: rspec
|
55
55
|
requirement: !ruby/object:Gem::Requirement
|
56
56
|
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 3.0.0
|
57
60
|
- - ">="
|
58
61
|
- !ruby/object:Gem::Version
|
59
|
-
version: 0.
|
62
|
+
version: 3.0.0
|
60
63
|
type: :development
|
61
64
|
prerelease: false
|
62
65
|
version_requirements: !ruby/object:Gem::Requirement
|
63
66
|
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 3.0.0
|
64
70
|
- - ">="
|
65
71
|
- !ruby/object:Gem::Version
|
66
|
-
version: 0.
|
72
|
+
version: 3.0.0
|
67
73
|
- !ruby/object:Gem::Dependency
|
68
|
-
name:
|
74
|
+
name: rubygems
|
69
75
|
requirement: !ruby/object:Gem::Requirement
|
70
76
|
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '2.0'
|
71
80
|
- - ">="
|
72
81
|
- !ruby/object:Gem::Version
|
73
|
-
version: 0.
|
82
|
+
version: 2.0.0
|
74
83
|
type: :development
|
75
84
|
prerelease: false
|
76
85
|
version_requirements: !ruby/object:Gem::Requirement
|
77
86
|
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '2.0'
|
78
90
|
- - ">="
|
79
91
|
- !ruby/object:Gem::Version
|
80
|
-
version: 0.
|
92
|
+
version: 2.0.0
|
81
93
|
- !ruby/object:Gem::Dependency
|
82
|
-
name:
|
94
|
+
name: simplecov
|
83
95
|
requirement: !ruby/object:Gem::Requirement
|
84
96
|
requirements:
|
85
97
|
- - "~>"
|
86
98
|
- !ruby/object:Gem::Version
|
87
|
-
version:
|
99
|
+
version: 0.8.0
|
88
100
|
- - ">="
|
89
101
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
102
|
+
version: 0.8.0
|
91
103
|
type: :development
|
92
104
|
prerelease: false
|
93
105
|
version_requirements: !ruby/object:Gem::Requirement
|
94
106
|
requirements:
|
95
107
|
- - "~>"
|
96
108
|
- !ruby/object:Gem::Version
|
97
|
-
version:
|
109
|
+
version: 0.8.0
|
98
110
|
- - ">="
|
99
111
|
- !ruby/object:Gem::Version
|
100
|
-
version:
|
101
|
-
description:
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
112
|
+
version: 0.8.0
|
113
|
+
description: |
|
114
|
+
Ruby implementation of the Sequitur algorithm. This algorithm automatically
|
115
|
+
finds repetitions and hierarchical structures in a given sequence of input
|
116
|
+
tokens. It encodes the input into a context-free grammar.
|
117
|
+
The Sequitur algorithm can be used to
|
118
|
+
a) compress a sequence of items,
|
119
|
+
b) discover patterns in an sequence,
|
120
|
+
c) generate grammar rules that can represent a given input.
|
106
121
|
email: famished.tiger@yahoo.com
|
107
122
|
executables: []
|
108
123
|
extensions: []
|
@@ -173,7 +188,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
173
188
|
version: '0'
|
174
189
|
requirements: []
|
175
190
|
rubyforge_project:
|
176
|
-
rubygems_version: 2.6.
|
191
|
+
rubygems_version: 2.6.13
|
177
192
|
signing_key:
|
178
193
|
specification_version: 4
|
179
194
|
summary: Ruby implementation of the Sequitur algorithm
|