sequitur 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +4 -0
- data/README.md +23 -1
- data/lib/sequitur.rb +20 -0
- data/lib/sequitur/constants.rb +1 -1
- data/lib/sequitur/sequitur_grammar.rb +1 -1
- data/spec/sequitur/sequitur_grammar_spec.rb +7 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OGM4MWZiN2YzOTRmODhhNDlkNGIzMGRiOWYzZjU3MDY5ODc2YWUwMg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZGJmYzVjMzlkOTE0ZGRhZGI3ZWRjY2IyMDk3ZGY3ZDY1YjIyY2YyNg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Nzk1ZDRmNjBkN2FmODQ4Yjk5ODNiYTk3NDA1YTkxYmZlYjMyOGI0MDhkZTM5
|
10
|
+
NTg1ZWQyOGIxYzc2NjE4ODg0ODY1MDJiMzJhNDBmNGJlMjdiNzAwMTI3ZmZl
|
11
|
+
MDkzMGFjYWNhNTU4ZWUwYTBjYjdlMjJjNjc2MmY5ZTk5MTUzNTQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWQ2OGU3OWIyMzQ5MWVkM2IxZGFhYzhmNDM5MGRkMzA5MjlhNzUxYTViYWMz
|
14
|
+
MzIzY2MxYjk1OTBmMTNkNmM4NDM0NWI4YzJlZDk5YjU2ZjU4YmM2ZWYzMjkx
|
15
|
+
MmNkYWE5MjU3NmI3MTE1OTIwYWZmOWI1ODRlMmQ4ZmZjNjkwY2E=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
### 0.0.11 / 2014-08-24
|
2
|
+
* [FIX] `SequiturGrammar#check_unicity`: an exception was raised when it shouldn't. Added example in spec file.
|
3
|
+
* [CHANGE] `sequitur.rb` : Added the convenience Sequitur::build_from method.
|
4
|
+
|
1
5
|
### 0.0.10 / 2014-08-24
|
2
6
|
* [CHANGE] `README.md`: Added hyperlinks about Sequitur algorithm.
|
3
7
|
|
data/README.md
CHANGED
@@ -15,7 +15,29 @@ Sequitur is an algorithm that generates a set of rules representing a sequence o
|
|
15
15
|
It detects repeated token patterns and can represent them in a compact way.
|
16
16
|
|
17
17
|
|
18
|
-
|
18
|
+
## Synopsis
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
|
22
|
+
require 'sequitur' # Load the Sequitur library
|
23
|
+
|
24
|
+
input_sequence = 'abcabdab'
|
25
|
+
|
26
|
+
# The SEQUITUR algorithm will detect the repeated 'ab' pattern
|
27
|
+
# and will generate a context-free grammar that represents the input string
|
28
|
+
grammar = Sequitur.build_from(input_sequence)
|
29
|
+
|
30
|
+
# Display the grammar rules
|
31
|
+
# Each rule is displayed with the format:
|
32
|
+
# rule_id : a_sequence_grammar_symbols
|
33
|
+
# Where:
|
34
|
+
# - rule_id is the object id of a rule (in decimal)
|
35
|
+
# - a grammar symbol is either a terminal symbol
|
36
|
+
# (i.e. a character from the input) or the id of a production
|
37
|
+
puts grammar.to_string
|
38
|
+
```
|
39
|
+
|
40
|
+
### TODO: Add more documentation ###
|
19
41
|
|
20
42
|
|
21
43
|
Copyright
|
data/lib/sequitur.rb
CHANGED
@@ -5,4 +5,24 @@
|
|
5
5
|
require_relative './sequitur/constants'
|
6
6
|
require_relative './sequitur/sequitur_grammar'
|
7
7
|
|
8
|
+
|
9
|
+
module Sequitur
|
10
|
+
|
11
|
+
# Convenience method. Builds a Sequitur-generated grammar based
|
12
|
+
# on the sequence of input tokens
|
13
|
+
# @param tokens [StringOrEnumerator] The input sequence of input tokens.
|
14
|
+
# Can be a sequence of characters (i.e. a String) or an Enumerator
|
15
|
+
# Tokens returned by enumerator should respond to the :hash message.
|
16
|
+
# Returns a SequiturGrammar instance.
|
17
|
+
def self.build_from(tokens)
|
18
|
+
input_sequence = case tokens
|
19
|
+
when String then tokens.chars
|
20
|
+
when Enumerator then tokens
|
21
|
+
else tokens.to_enum
|
22
|
+
end
|
23
|
+
|
24
|
+
return SequiturGrammar.new(input_sequence)
|
25
|
+
end
|
26
|
+
end # module
|
27
|
+
|
8
28
|
# End of file
|
data/lib/sequitur/constants.rb
CHANGED
@@ -41,7 +41,7 @@ class SequiturGrammar < DynamicGrammar
|
|
41
41
|
msg << "\nOnce in production #{colliding.production_id}"
|
42
42
|
msg << "\nSecond in production #{a_prod.object_id}"
|
43
43
|
msg << "\n#{to_string}"
|
44
|
-
fail StandardError, msg
|
44
|
+
fail StandardError, msg unless colliding.production_id == a_prod.object_id
|
45
45
|
else
|
46
46
|
all_digrams[a_digram.key] = a_digram
|
47
47
|
end
|
@@ -74,6 +74,13 @@ describe SequiturGrammar do
|
|
74
74
|
expect(p_a.rhs).to eq([:a, :b, :c])
|
75
75
|
expect(instance.root.rhs).to eq([p_a, p_a])
|
76
76
|
end
|
77
|
+
|
78
|
+
it 'should cope with a pattern that caused an exception' do
|
79
|
+
input = 'aaac' # This sequence raised an exception
|
80
|
+
|
81
|
+
# Creation
|
82
|
+
expect {SequiturGrammar.new(input.chars)}.not_to raise_error
|
83
|
+
end
|
77
84
|
|
78
85
|
|
79
86
|
it 'should cope with the example from presentation' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequitur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|