rambling-trie 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -1
- data/README.md +23 -7
- data/Rakefile +4 -0
- data/lib/rambling/trie.rb +27 -21
- data/lib/rambling/trie/comparable.rb +3 -3
- data/lib/rambling/trie/compressible.rb +14 -0
- data/lib/rambling/trie/compressor.rb +37 -24
- data/lib/rambling/trie/configuration/properties.rb +8 -6
- data/lib/rambling/trie/configuration/provider_collection.rb +34 -16
- data/lib/rambling/trie/container.rb +156 -36
- data/lib/rambling/trie/enumerable.rb +4 -4
- data/lib/rambling/trie/nodes.rb +11 -0
- data/lib/rambling/trie/nodes/compressed.rb +115 -0
- data/lib/rambling/trie/nodes/missing.rb +10 -0
- data/lib/rambling/trie/nodes/node.rb +151 -0
- data/lib/rambling/trie/nodes/raw.rb +89 -0
- data/lib/rambling/trie/readers/plain_text.rb +1 -11
- data/lib/rambling/trie/serializers/marshal.rb +4 -4
- data/lib/rambling/trie/serializers/yaml.rb +4 -4
- data/lib/rambling/trie/serializers/zip.rb +9 -8
- data/lib/rambling/trie/version.rb +1 -1
- data/spec/assets/test_words.es_DO.txt +1 -0
- data/spec/integration/rambling/trie_spec.rb +40 -35
- data/spec/lib/rambling/trie/comparable_spec.rb +6 -15
- data/spec/lib/rambling/trie/compressor_spec.rb +88 -13
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +7 -7
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +8 -20
- data/spec/lib/rambling/trie/container_spec.rb +159 -168
- data/spec/lib/rambling/trie/enumerable_spec.rb +12 -9
- data/spec/lib/rambling/trie/inspectable_spec.rb +11 -11
- data/spec/lib/rambling/trie/nodes/compressed_spec.rb +35 -0
- data/spec/lib/rambling/trie/nodes/node_spec.rb +7 -0
- data/spec/lib/rambling/trie/nodes/raw_spec.rb +177 -0
- data/spec/lib/rambling/trie/serializers/file_spec.rb +4 -4
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +3 -7
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +3 -7
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +16 -20
- data/spec/lib/rambling/trie/stringifyable_spec.rb +7 -8
- data/spec/lib/rambling/trie_spec.rb +2 -2
- data/spec/spec_helper.rb +3 -1
- data/spec/support/config.rb +4 -0
- data/spec/support/helpers/add_word.rb +18 -0
- data/spec/support/shared_examples/{a_compressable_trie.rb → a_compressible_trie.rb} +13 -3
- data/spec/support/shared_examples/a_serializable_trie.rb +8 -6
- data/spec/support/shared_examples/a_serializer.rb +6 -0
- data/spec/{lib/rambling/trie/node_spec.rb → support/shared_examples/a_trie_node.rb} +61 -30
- data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +18 -69
- metadata +22 -15
- data/lib/rambling/trie/compressable.rb +0 -14
- data/lib/rambling/trie/compressed_node.rb +0 -120
- data/lib/rambling/trie/missing_node.rb +0 -8
- data/lib/rambling/trie/node.rb +0 -97
- data/lib/rambling/trie/raw_node.rb +0 -96
- data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
@@ -0,0 +1,89 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Nodes
|
4
|
+
# A representation of a node in an uncompressed trie data structure.
|
5
|
+
class Raw < Rambling::Trie::Nodes::Node
|
6
|
+
# Adds a word to the current raw (uncompressed) trie node.
|
7
|
+
# @param [Array<Symbol>] chars the char array to add to the trie.
|
8
|
+
# @return [Raw] the added/modified node based on the word added.
|
9
|
+
# @note This method clears the contents of the chars variable.
|
10
|
+
def add chars
|
11
|
+
if chars.empty?
|
12
|
+
terminal!
|
13
|
+
else
|
14
|
+
add_to_children_tree chars
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Checks if a path for a set of characters exists in the trie.
|
19
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
20
|
+
# @return [Boolean] `true` if the characters are found, `false`
|
21
|
+
# otherwise.
|
22
|
+
def partial_word? chars = []
|
23
|
+
return true if chars.empty?
|
24
|
+
|
25
|
+
letter = chars.slice!(0).to_sym
|
26
|
+
child = children_tree[letter]
|
27
|
+
!!child && child.partial_word?(chars)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Checks if a path for set of characters represents a word in the trie.
|
31
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
32
|
+
# @return [Boolean] `true` if the characters are found and form a word,
|
33
|
+
# `false` otherwise.
|
34
|
+
def word? chars = []
|
35
|
+
return terminal? if chars.empty?
|
36
|
+
|
37
|
+
letter = chars.slice!(0).to_sym
|
38
|
+
child = children_tree[letter]
|
39
|
+
!!child && child.word?(chars)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Always return `false` for a raw (uncompressed) node.
|
43
|
+
# @return [Boolean] always `false` for a raw (uncompressed) node.
|
44
|
+
def compressed?
|
45
|
+
false
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def add_to_children_tree chars
|
51
|
+
letter = chars.pop
|
52
|
+
child = children_tree[letter] || new_node(letter)
|
53
|
+
child.add chars
|
54
|
+
child
|
55
|
+
end
|
56
|
+
|
57
|
+
def new_node letter
|
58
|
+
node = Rambling::Trie::Nodes::Raw.new letter, self
|
59
|
+
children_tree[letter] = node
|
60
|
+
node
|
61
|
+
end
|
62
|
+
|
63
|
+
def closest_node chars
|
64
|
+
letter = chars.slice!(0).to_sym
|
65
|
+
child = children_tree[letter]
|
66
|
+
|
67
|
+
return Rambling::Trie::Nodes::Missing.new unless child
|
68
|
+
|
69
|
+
child.scan chars
|
70
|
+
end
|
71
|
+
|
72
|
+
def children_match_prefix chars
|
73
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
74
|
+
|
75
|
+
return if chars.empty?
|
76
|
+
|
77
|
+
letter = chars.slice!(0).to_sym
|
78
|
+
child = children_tree[letter]
|
79
|
+
|
80
|
+
return unless child
|
81
|
+
|
82
|
+
child.match_prefix chars do |word|
|
83
|
+
yield word
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -8,17 +8,7 @@ module Rambling
|
|
8
8
|
# from.
|
9
9
|
# @yield [String] Each line read from the file.
|
10
10
|
def each_word filepath
|
11
|
-
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def each_line filepath
|
17
|
-
open(filepath) { |file| file.each_line { |line| yield line } }
|
18
|
-
end
|
19
|
-
|
20
|
-
def open filepath
|
21
|
-
File.open(filepath) { |file| yield file }
|
11
|
+
File.foreach(filepath) { |line| yield line.chomp! }
|
22
12
|
end
|
23
13
|
end
|
24
14
|
end
|
@@ -11,17 +11,17 @@ module Rambling
|
|
11
11
|
end
|
12
12
|
|
13
13
|
# Loads marshaled object from contents in filepath and deserializes it
|
14
|
-
# into a {Node Node}.
|
14
|
+
# into a {Nodes::Node Node}.
|
15
15
|
# @param [String] filepath the full path of the file to load the
|
16
16
|
# marshaled object from.
|
17
|
-
# @return [Node] The deserialized {Node Node}.
|
17
|
+
# @return [Nodes::Node] The deserialized {Nodes::Node Node}.
|
18
18
|
def load filepath
|
19
19
|
::Marshal.load serializer.load filepath
|
20
20
|
end
|
21
21
|
|
22
|
-
# Serializes a {Node Node} and dumps it as a marshaled object into
|
22
|
+
# Serializes a {Nodes::Node Node} and dumps it as a marshaled object into
|
23
23
|
# filepath.
|
24
|
-
# @param [Node] node the node to serialize
|
24
|
+
# @param [Nodes::Node] node the node to serialize
|
25
25
|
# @param [String] filepath the full path of the file to dump the
|
26
26
|
# marshaled object into.
|
27
27
|
# @return [Numeric] number of bytes written to disk.
|
@@ -11,17 +11,17 @@ module Rambling
|
|
11
11
|
end
|
12
12
|
|
13
13
|
# Loads serialized object from YAML file in filepath and deserializes
|
14
|
-
# it into a {Node Node}.
|
14
|
+
# it into a {Nodes::Node Node}.
|
15
15
|
# @param [String] filepath the full path of the file to load the
|
16
16
|
# serialized YAML object from.
|
17
|
-
# @return [Node] The deserialized {Node Node}.
|
17
|
+
# @return [Nodes::Node] The deserialized {Nodes::Node Node}.
|
18
18
|
def load filepath
|
19
19
|
require 'yaml'
|
20
20
|
::YAML.load serializer.load filepath
|
21
21
|
end
|
22
22
|
|
23
|
-
# Serializes a {Node Node} and dumps it as a YAML object into filepath.
|
24
|
-
# @param [Node] node the node to serialize
|
23
|
+
# Serializes a {Nodes::Node Node} and dumps it as a YAML object into filepath.
|
24
|
+
# @param [Nodes::Node] node the node to serialize
|
25
25
|
# @param [String] filepath the full path of the file to dump the YAML
|
26
26
|
# object into.
|
27
27
|
# @return [Numeric] number of bytes written to disk.
|
@@ -4,11 +4,9 @@ module Rambling
|
|
4
4
|
# Zip file serializer. Dumps/loads contents from zip files. Automatically
|
5
5
|
# detects if zip file contains `.marshal` or `.yml` file
|
6
6
|
class Zip
|
7
|
-
extend ::Forwardable
|
8
|
-
|
9
7
|
# Creates a new Zip serializer.
|
10
|
-
# @param [Properties] properties the configuration
|
11
|
-
# far.
|
8
|
+
# @param [Configuration::Properties] properties the configuration
|
9
|
+
# properties set up so far.
|
12
10
|
def initialize properties
|
13
11
|
@properties = properties
|
14
12
|
end
|
@@ -52,10 +50,13 @@ module Rambling
|
|
52
50
|
|
53
51
|
attr_reader :properties
|
54
52
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
53
|
+
def serializers
|
54
|
+
properties.serializers
|
55
|
+
end
|
56
|
+
|
57
|
+
def tmp_path
|
58
|
+
properties.tmp_path
|
59
|
+
end
|
59
60
|
|
60
61
|
def path filename
|
61
62
|
require 'securerandom'
|
@@ -1,61 +1,68 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'zip'
|
2
3
|
|
3
4
|
describe Rambling::Trie do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
let(:assets_path) { File.join ::SPEC_ROOT, 'assets' }
|
6
|
+
|
7
|
+
context 'when providing words directly' do
|
8
|
+
it_behaves_like 'a compressible trie' do
|
9
|
+
let(:words) { %w(a couple of words for our full trie integration test) }
|
10
|
+
let(:trie) { Rambling::Trie.create }
|
11
|
+
|
12
|
+
before do
|
13
|
+
trie.concat words
|
14
|
+
end
|
8
15
|
end
|
9
16
|
end
|
10
17
|
|
11
|
-
|
12
|
-
it_behaves_like 'a
|
13
|
-
let(:
|
14
|
-
let(:
|
15
|
-
|
18
|
+
context 'when provided with words with unicode characters' do
|
19
|
+
it_behaves_like 'a compressible trie' do
|
20
|
+
let(:words) { %w(poquísimas palabras para nuestra prueba de integración completa 🙃) }
|
21
|
+
let(:trie) { Rambling::Trie.create }
|
22
|
+
|
23
|
+
before do
|
24
|
+
trie.concat words
|
25
|
+
end
|
16
26
|
end
|
17
27
|
end
|
18
28
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
29
|
+
context 'when provided with a filepath' do
|
30
|
+
let(:words) { File.readlines(filepath).map &:chomp! }
|
31
|
+
let(:trie) { Rambling::Trie.create filepath }
|
32
|
+
|
33
|
+
context 'with english words' do
|
34
|
+
it_behaves_like 'a compressible trie' do
|
35
|
+
let(:filepath) { File.join assets_path, 'test_words.en_US.txt' }
|
36
|
+
end
|
23
37
|
end
|
24
|
-
end
|
25
38
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
let(:trie) { Rambling::Trie.create filepath }
|
39
|
+
context 'with unicode characters' do
|
40
|
+
it_behaves_like 'a compressible trie' do
|
41
|
+
let(:filepath) { File.join assets_path, 'test_words.es_DO.txt' }
|
42
|
+
end
|
31
43
|
end
|
32
44
|
end
|
33
45
|
|
34
|
-
describe '
|
35
|
-
let(:words_filepath) { File.join
|
36
|
-
let(:words) { File.readlines(words_filepath).map &:chomp
|
37
|
-
let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
|
38
|
-
let(:trie_filename) { File.join ::SPEC_ROOT, '..', 'tmp', 'trie-root' }
|
46
|
+
describe 'dump and load' do
|
47
|
+
let(:words_filepath) { File.join assets_path, 'test_words.en_US.txt' }
|
48
|
+
let(:words) { File.readlines(words_filepath).map &:chomp }
|
39
49
|
|
40
50
|
context 'when serialized with Ruby marshal format (default)' do
|
41
51
|
it_behaves_like 'a serializable trie' do
|
42
|
-
let(:
|
43
|
-
let(:
|
44
|
-
let(:serializer) { nil }
|
52
|
+
let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
|
53
|
+
let(:format) { :marshal }
|
45
54
|
end
|
46
55
|
end
|
47
56
|
|
48
57
|
context 'when serialized with YAML' do
|
49
58
|
it_behaves_like 'a serializable trie' do
|
50
|
-
let(:
|
51
|
-
let(:
|
52
|
-
let(:serializer) { nil }
|
59
|
+
let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
|
60
|
+
let(:format) { :yml }
|
53
61
|
end
|
54
62
|
end
|
55
63
|
|
56
64
|
context 'when serialized with zipped Ruby marshal format' do
|
57
65
|
before do
|
58
|
-
require 'zip'
|
59
66
|
@original_on_exists_proc = ::Zip.on_exists_proc
|
60
67
|
@original_continue_on_exists_proc = ::Zip.continue_on_exists_proc
|
61
68
|
::Zip.on_exists_proc = true
|
@@ -63,15 +70,13 @@ describe Rambling::Trie do
|
|
63
70
|
end
|
64
71
|
|
65
72
|
after do
|
66
|
-
require 'zip'
|
67
73
|
::Zip.on_exists_proc = @original_on_exists_proc
|
68
74
|
::Zip.continue_on_exists_proc = @original_continue_on_exists_proc
|
69
75
|
end
|
70
76
|
|
71
77
|
it_behaves_like 'a serializable trie' do
|
72
|
-
let(:
|
73
|
-
let(:
|
74
|
-
let(:serializer) { nil }
|
78
|
+
let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
|
79
|
+
let(:format) { 'marshal.zip' }
|
75
80
|
end
|
76
81
|
end
|
77
82
|
end
|
@@ -2,8 +2,8 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Rambling::Trie::Comparable do
|
4
4
|
describe '#==' do
|
5
|
-
let(:node_1) { Rambling::Trie::
|
6
|
-
let(:node_2) { Rambling::Trie::
|
5
|
+
let(:node_1) { Rambling::Trie::Nodes::Raw.new }
|
6
|
+
let(:node_2) { Rambling::Trie::Nodes::Raw.new }
|
7
7
|
|
8
8
|
context 'when the nodes do not have the same letter' do
|
9
9
|
before do
|
@@ -67,37 +67,28 @@ describe Rambling::Trie::Comparable do
|
|
67
67
|
context 'when the nodes have the same letter and the same children' do
|
68
68
|
before do
|
69
69
|
node_1.letter = :t
|
70
|
-
node_1
|
71
|
-
node_1.add 'hree'
|
72
|
-
node_1.add 'hings'
|
70
|
+
add_words node_1, %w(hese hree hings)
|
73
71
|
|
74
72
|
node_2.letter = :t
|
75
|
-
node_2
|
76
|
-
node_2.add 'hree'
|
77
|
-
node_2.add 'hings'
|
73
|
+
add_words node_2, %w(hese hree hings)
|
78
74
|
end
|
79
75
|
|
80
76
|
it 'returns true' do
|
81
77
|
expect(node_1).to eq node_2
|
82
|
-
expect(node_1[:h][:e][:s][:e]).to eq node_2[:h][:e][:s][:e]
|
83
78
|
end
|
84
79
|
end
|
85
80
|
|
86
81
|
context 'when the nodes have the same letter but different children' do
|
87
82
|
before do
|
88
83
|
node_1.letter = :t
|
89
|
-
node_1
|
90
|
-
node_1.add 'wo'
|
84
|
+
add_words node_1, %w(hese wo)
|
91
85
|
|
92
86
|
node_2.letter = :t
|
93
|
-
node_2
|
94
|
-
node_2.add 'hree'
|
95
|
-
node_2.add 'hings'
|
87
|
+
add_words node_2, %w(hese hree hings)
|
96
88
|
end
|
97
89
|
|
98
90
|
it 'returns false' do
|
99
91
|
expect(node_1).not_to eq node_2
|
100
|
-
expect(node_1[:h][:e][:s][:e]).to eq node_2[:h][:e][:s][:e]
|
101
92
|
end
|
102
93
|
end
|
103
94
|
end
|
@@ -4,28 +4,103 @@ describe Rambling::Trie::Compressor do
|
|
4
4
|
let(:compressor) { Rambling::Trie::Compressor.new }
|
5
5
|
|
6
6
|
describe '#compress' do
|
7
|
-
let(:
|
8
|
-
|
9
|
-
|
7
|
+
let(:node) { Rambling::Trie::Nodes::Raw.new }
|
8
|
+
|
9
|
+
it 'compresses the node' do
|
10
|
+
add_words node, %w(a few words hello hell)
|
11
|
+
compressed = compressor.compress node
|
12
|
+
|
13
|
+
expect(compressed.children_tree.keys).to eq %i(a few words hell)
|
10
14
|
end
|
11
15
|
|
12
|
-
|
13
|
-
|
16
|
+
context 'with at least one word' do
|
17
|
+
before do
|
18
|
+
add_words node, %w(all the words)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'keeps the node letter nil' do
|
22
|
+
compressed = compressor.compress node
|
23
|
+
|
24
|
+
expect(compressed.letter).to be_nil
|
25
|
+
end
|
14
26
|
end
|
15
27
|
|
16
|
-
|
17
|
-
|
28
|
+
context 'with a single word' do
|
29
|
+
before do
|
30
|
+
add_word node, 'all'
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'compresses into a single node without children' do
|
34
|
+
compressed = compressor.compress node
|
18
35
|
|
19
|
-
|
20
|
-
|
21
|
-
expect(
|
36
|
+
expect(compressed[:all].letter).to eq :all
|
37
|
+
expect(compressed[:all].children.size).to eq 0
|
38
|
+
expect(compressed[:all]).to be_terminal
|
39
|
+
expect(compressed[:all]).to be_compressed
|
22
40
|
end
|
23
41
|
end
|
24
42
|
|
25
|
-
|
26
|
-
|
43
|
+
context 'with two words' do
|
44
|
+
before do
|
45
|
+
add_words node, %w(all ask)
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'compresses into corresponding three nodes' do
|
49
|
+
compressed = compressor.compress node
|
50
|
+
|
51
|
+
expect(compressed[:a].letter).to eq :a
|
52
|
+
expect(compressed[:a].children.size).to eq 2
|
53
|
+
|
54
|
+
expect(compressed[:a][:ll].letter).to eq :ll
|
55
|
+
expect(compressed[:a][:sk].letter).to eq :sk
|
56
|
+
|
57
|
+
expect(compressed[:a][:ll].children.size).to eq 0
|
58
|
+
expect(compressed[:a][:sk].children.size).to eq 0
|
59
|
+
|
60
|
+
expect(compressed[:a][:ll]).to be_terminal
|
61
|
+
expect(compressed[:a][:sk]).to be_terminal
|
62
|
+
|
63
|
+
expect(compressed[:a][:ll]).to be_compressed
|
64
|
+
expect(compressed[:a][:sk]).to be_compressed
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'reassigns the parent nodes correctly' do
|
69
|
+
add_words node, %w(repay rest repaint)
|
70
|
+
compressed = compressor.compress node
|
71
|
+
|
72
|
+
expect(compressed[:re].letter).to eq :re
|
73
|
+
expect(compressed[:re].parent).to eq compressed
|
74
|
+
expect(compressed[:re].children.size).to eq 2
|
75
|
+
|
76
|
+
expect(compressed[:re][:pa].letter).to eq :pa
|
77
|
+
expect(compressed[:re][:pa].parent).to eq compressed[:re]
|
78
|
+
expect(compressed[:re][:pa].children.size).to eq 2
|
79
|
+
|
80
|
+
expect(compressed[:re][:st].letter).to eq :st
|
81
|
+
expect(compressed[:re][:st].parent).to eq compressed[:re]
|
82
|
+
expect(compressed[:re][:st].children.size).to eq 0
|
83
|
+
|
84
|
+
expect(compressed[:re][:pa][:y].letter).to eq :y
|
85
|
+
expect(compressed[:re][:pa][:y].parent).to eq compressed[:re][:pa]
|
86
|
+
expect(compressed[:re][:pa][:y].children.size).to eq 0
|
87
|
+
|
88
|
+
expect(compressed[:re][:pa][:int].letter).to eq :int
|
89
|
+
expect(compressed[:re][:pa][:int].parent).to eq compressed[:re][:pa]
|
90
|
+
expect(compressed[:re][:pa][:int].children.size).to eq 0
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'does not compress terminal nodes' do
|
94
|
+
add_words node, %w(you your yours)
|
95
|
+
compressed = compressor.compress node
|
96
|
+
|
97
|
+
expect(compressed[:you].letter).to eq :you
|
98
|
+
|
99
|
+
expect(compressed[:you][:r].letter).to eq :r
|
100
|
+
expect(compressed[:you][:r]).to be_compressed
|
27
101
|
|
28
|
-
expect(
|
102
|
+
expect(compressed[:you][:r][:s].letter).to eq :s
|
103
|
+
expect(compressed[:you][:r][:s]).to be_compressed
|
29
104
|
end
|
30
105
|
end
|
31
106
|
end
|