rambling-trie 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -1
  3. data/README.md +23 -7
  4. data/Rakefile +4 -0
  5. data/lib/rambling/trie.rb +27 -21
  6. data/lib/rambling/trie/comparable.rb +3 -3
  7. data/lib/rambling/trie/compressible.rb +14 -0
  8. data/lib/rambling/trie/compressor.rb +37 -24
  9. data/lib/rambling/trie/configuration/properties.rb +8 -6
  10. data/lib/rambling/trie/configuration/provider_collection.rb +34 -16
  11. data/lib/rambling/trie/container.rb +156 -36
  12. data/lib/rambling/trie/enumerable.rb +4 -4
  13. data/lib/rambling/trie/nodes.rb +11 -0
  14. data/lib/rambling/trie/nodes/compressed.rb +115 -0
  15. data/lib/rambling/trie/nodes/missing.rb +10 -0
  16. data/lib/rambling/trie/nodes/node.rb +151 -0
  17. data/lib/rambling/trie/nodes/raw.rb +89 -0
  18. data/lib/rambling/trie/readers/plain_text.rb +1 -11
  19. data/lib/rambling/trie/serializers/marshal.rb +4 -4
  20. data/lib/rambling/trie/serializers/yaml.rb +4 -4
  21. data/lib/rambling/trie/serializers/zip.rb +9 -8
  22. data/lib/rambling/trie/version.rb +1 -1
  23. data/spec/assets/test_words.es_DO.txt +1 -0
  24. data/spec/integration/rambling/trie_spec.rb +40 -35
  25. data/spec/lib/rambling/trie/comparable_spec.rb +6 -15
  26. data/spec/lib/rambling/trie/compressor_spec.rb +88 -13
  27. data/spec/lib/rambling/trie/configuration/properties_spec.rb +7 -7
  28. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +8 -20
  29. data/spec/lib/rambling/trie/container_spec.rb +159 -168
  30. data/spec/lib/rambling/trie/enumerable_spec.rb +12 -9
  31. data/spec/lib/rambling/trie/inspectable_spec.rb +11 -11
  32. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +35 -0
  33. data/spec/lib/rambling/trie/nodes/node_spec.rb +7 -0
  34. data/spec/lib/rambling/trie/nodes/raw_spec.rb +177 -0
  35. data/spec/lib/rambling/trie/serializers/file_spec.rb +4 -4
  36. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +3 -7
  37. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +3 -7
  38. data/spec/lib/rambling/trie/serializers/zip_spec.rb +16 -20
  39. data/spec/lib/rambling/trie/stringifyable_spec.rb +7 -8
  40. data/spec/lib/rambling/trie_spec.rb +2 -2
  41. data/spec/spec_helper.rb +3 -1
  42. data/spec/support/config.rb +4 -0
  43. data/spec/support/helpers/add_word.rb +18 -0
  44. data/spec/support/shared_examples/{a_compressable_trie.rb → a_compressible_trie.rb} +13 -3
  45. data/spec/support/shared_examples/a_serializable_trie.rb +8 -6
  46. data/spec/support/shared_examples/a_serializer.rb +6 -0
  47. data/spec/{lib/rambling/trie/node_spec.rb → support/shared_examples/a_trie_node.rb} +61 -30
  48. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +18 -69
  49. metadata +22 -15
  50. data/lib/rambling/trie/compressable.rb +0 -14
  51. data/lib/rambling/trie/compressed_node.rb +0 -120
  52. data/lib/rambling/trie/missing_node.rb +0 -8
  53. data/lib/rambling/trie/node.rb +0 -97
  54. data/lib/rambling/trie/raw_node.rb +0 -96
  55. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
@@ -0,0 +1,89 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a node in an uncompressed trie data structure.
5
+ class Raw < Rambling::Trie::Nodes::Node
6
+ # Adds a word to the current raw (uncompressed) trie node.
7
+ # @param [Array<Symbol>] chars the char array to add to the trie.
8
+ # @return [Raw] the added/modified node based on the word added.
9
+ # @note This method clears the contents of the chars variable.
10
+ def add chars
11
+ if chars.empty?
12
+ terminal!
13
+ else
14
+ add_to_children_tree chars
15
+ end
16
+ end
17
+
18
+ # Checks if a path for a set of characters exists in the trie.
19
+ # @param [Array<String>] chars the characters to look for in the trie.
20
+ # @return [Boolean] `true` if the characters are found, `false`
21
+ # otherwise.
22
+ def partial_word? chars = []
23
+ return true if chars.empty?
24
+
25
+ letter = chars.slice!(0).to_sym
26
+ child = children_tree[letter]
27
+ !!child && child.partial_word?(chars)
28
+ end
29
+
30
+ # Checks if a path for set of characters represents a word in the trie.
31
+ # @param [Array<String>] chars the characters to look for in the trie.
32
+ # @return [Boolean] `true` if the characters are found and form a word,
33
+ # `false` otherwise.
34
+ def word? chars = []
35
+ return terminal? if chars.empty?
36
+
37
+ letter = chars.slice!(0).to_sym
38
+ child = children_tree[letter]
39
+ !!child && child.word?(chars)
40
+ end
41
+
42
+ # Always return `false` for a raw (uncompressed) node.
43
+ # @return [Boolean] always `false` for a raw (uncompressed) node.
44
+ def compressed?
45
+ false
46
+ end
47
+
48
+ private
49
+
50
+ def add_to_children_tree chars
51
+ letter = chars.pop
52
+ child = children_tree[letter] || new_node(letter)
53
+ child.add chars
54
+ child
55
+ end
56
+
57
+ def new_node letter
58
+ node = Rambling::Trie::Nodes::Raw.new letter, self
59
+ children_tree[letter] = node
60
+ node
61
+ end
62
+
63
+ def closest_node chars
64
+ letter = chars.slice!(0).to_sym
65
+ child = children_tree[letter]
66
+
67
+ return Rambling::Trie::Nodes::Missing.new unless child
68
+
69
+ child.scan chars
70
+ end
71
+
72
+ def children_match_prefix chars
73
+ return enum_for :children_match_prefix, chars unless block_given?
74
+
75
+ return if chars.empty?
76
+
77
+ letter = chars.slice!(0).to_sym
78
+ child = children_tree[letter]
79
+
80
+ return unless child
81
+
82
+ child.match_prefix chars do |word|
83
+ yield word
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -8,17 +8,7 @@ module Rambling
8
8
  # from.
9
9
  # @yield [String] Each line read from the file.
10
10
  def each_word filepath
11
- each_line(filepath) { |line| yield line.chomp! }
12
- end
13
-
14
- private
15
-
16
- def each_line filepath
17
- open(filepath) { |file| file.each_line { |line| yield line } }
18
- end
19
-
20
- def open filepath
21
- File.open(filepath) { |file| yield file }
11
+ File.foreach(filepath) { |line| yield line.chomp! }
22
12
  end
23
13
  end
24
14
  end
@@ -11,17 +11,17 @@ module Rambling
11
11
  end
12
12
 
13
13
  # Loads marshaled object from contents in filepath and deserializes it
14
- # into a {Node Node}.
14
+ # into a {Nodes::Node Node}.
15
15
  # @param [String] filepath the full path of the file to load the
16
16
  # marshaled object from.
17
- # @return [Node] The deserialized {Node Node}.
17
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
18
18
  def load filepath
19
19
  ::Marshal.load serializer.load filepath
20
20
  end
21
21
 
22
- # Serializes a {Node Node} and dumps it as a marshaled object into
22
+ # Serializes a {Nodes::Node Node} and dumps it as a marshaled object into
23
23
  # filepath.
24
- # @param [Node] node the node to serialize
24
+ # @param [Nodes::Node] node the node to serialize
25
25
  # @param [String] filepath the full path of the file to dump the
26
26
  # marshaled object into.
27
27
  # @return [Numeric] number of bytes written to disk.
@@ -11,17 +11,17 @@ module Rambling
11
11
  end
12
12
 
13
13
  # Loads serialized object from YAML file in filepath and deserializes
14
- # it into a {Node Node}.
14
+ # it into a {Nodes::Node Node}.
15
15
  # @param [String] filepath the full path of the file to load the
16
16
  # serialized YAML object from.
17
- # @return [Node] The deserialized {Node Node}.
17
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
18
18
  def load filepath
19
19
  require 'yaml'
20
20
  ::YAML.load serializer.load filepath
21
21
  end
22
22
 
23
- # Serializes a {Node Node} and dumps it as a YAML object into filepath.
24
- # @param [Node] node the node to serialize
23
+ # Serializes a {Nodes::Node Node} and dumps it as a YAML object into filepath.
24
+ # @param [Nodes::Node] node the node to serialize
25
25
  # @param [String] filepath the full path of the file to dump the YAML
26
26
  # object into.
27
27
  # @return [Numeric] number of bytes written to disk.
@@ -4,11 +4,9 @@ module Rambling
4
4
  # Zip file serializer. Dumps/loads contents from zip files. Automatically
5
5
  # detects if zip file contains `.marshal` or `.yml` file
6
6
  class Zip
7
- extend ::Forwardable
8
-
9
7
  # Creates a new Zip serializer.
10
- # @param [Properties] properties the configuration properties set up so
11
- # far.
8
+ # @param [Configuration::Properties] properties the configuration
9
+ # properties set up so far.
12
10
  def initialize properties
13
11
  @properties = properties
14
12
  end
@@ -52,10 +50,13 @@ module Rambling
52
50
 
53
51
  attr_reader :properties
54
52
 
55
- delegate [
56
- :serializers,
57
- :tmp_path
58
- ] => :properties
53
+ def serializers
54
+ properties.serializers
55
+ end
56
+
57
+ def tmp_path
58
+ properties.tmp_path
59
+ end
59
60
 
60
61
  def path filename
61
62
  require 'securerandom'
@@ -1,6 +1,6 @@
1
1
  module Rambling
2
2
  module Trie
3
3
  # Current version of the rambling-trie.
4
- VERSION = '1.0.2'.freeze
4
+ VERSION = '1.0.3'.freeze
5
5
  end
6
6
  end
@@ -21,3 +21,4 @@ tus
21
21
  tuyos
22
22
  verdad
23
23
  verdadero
24
+ 🙃
@@ -1,61 +1,68 @@
1
1
  require 'spec_helper'
2
+ require 'zip'
2
3
 
3
4
  describe Rambling::Trie do
4
- describe 'with words provided directly' do
5
- it_behaves_like 'a compressable trie' do
6
- let(:words) { %w[a couple of words for our full trie integration test] }
7
- let(:trie) { Rambling::Trie.create { |t| words.each { |w| t << w } } }
5
+ let(:assets_path) { File.join ::SPEC_ROOT, 'assets' }
6
+
7
+ context 'when providing words directly' do
8
+ it_behaves_like 'a compressible trie' do
9
+ let(:words) { %w(a couple of words for our full trie integration test) }
10
+ let(:trie) { Rambling::Trie.create }
11
+
12
+ before do
13
+ trie.concat words
14
+ end
8
15
  end
9
16
  end
10
17
 
11
- describe 'with words from a file' do
12
- it_behaves_like 'a compressable trie' do
13
- let(:filepath) { File.join ::SPEC_ROOT, 'assets', 'test_words.en_US.txt' }
14
- let(:words) { File.readlines(filepath).map &:chomp! }
15
- let(:trie) { Rambling::Trie.create filepath }
18
+ context 'when provided with words with unicode characters' do
19
+ it_behaves_like 'a compressible trie' do
20
+ let(:words) { %w(poquísimas palabras para nuestra prueba de integración completa 🙃) }
21
+ let(:trie) { Rambling::Trie.create }
22
+
23
+ before do
24
+ trie.concat words
25
+ end
16
26
  end
17
27
  end
18
28
 
19
- describe 'with words with unicode characters' do
20
- it_behaves_like 'a compressable trie' do
21
- let(:words) { %w[poquísimas palabras para nuestra prueba de integración completa] }
22
- let(:trie) { Rambling::Trie.create { |t| words.each { |w| t << w } } }
29
+ context 'when provided with a filepath' do
30
+ let(:words) { File.readlines(filepath).map &:chomp! }
31
+ let(:trie) { Rambling::Trie.create filepath }
32
+
33
+ context 'with english words' do
34
+ it_behaves_like 'a compressible trie' do
35
+ let(:filepath) { File.join assets_path, 'test_words.en_US.txt' }
36
+ end
23
37
  end
24
- end
25
38
 
26
- describe 'with words with unicode characters from a file' do
27
- it_behaves_like 'a compressable trie' do
28
- let(:filepath) { File.join ::SPEC_ROOT, 'assets', 'test_words.en_US.txt' }
29
- let(:words) { File.readlines(filepath).map &:chomp! }
30
- let(:trie) { Rambling::Trie.create filepath }
39
+ context 'with unicode characters' do
40
+ it_behaves_like 'a compressible trie' do
41
+ let(:filepath) { File.join assets_path, 'test_words.es_DO.txt' }
42
+ end
31
43
  end
32
44
  end
33
45
 
34
- describe 'saving and loading full trie from a file' do
35
- let(:words_filepath) { File.join ::SPEC_ROOT, 'assets', 'test_words.en_US.txt' }
36
- let(:words) { File.readlines(words_filepath).map &:chomp! }
37
- let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
38
- let(:trie_filename) { File.join ::SPEC_ROOT, '..', 'tmp', 'trie-root' }
46
+ describe 'dump and load' do
47
+ let(:words_filepath) { File.join assets_path, 'test_words.en_US.txt' }
48
+ let(:words) { File.readlines(words_filepath).map &:chomp }
39
49
 
40
50
  context 'when serialized with Ruby marshal format (default)' do
41
51
  it_behaves_like 'a serializable trie' do
42
- let(:trie_filepath) { "#{trie_filename}.marshal" }
43
- let(:loaded_trie) { Rambling::Trie.load trie_filepath }
44
- let(:serializer) { nil }
52
+ let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
53
+ let(:format) { :marshal }
45
54
  end
46
55
  end
47
56
 
48
57
  context 'when serialized with YAML' do
49
58
  it_behaves_like 'a serializable trie' do
50
- let(:trie_filepath) { "#{trie_filename}.yml" }
51
- let(:loaded_trie) { Rambling::Trie.load trie_filepath }
52
- let(:serializer) { nil }
59
+ let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
60
+ let(:format) { :yml }
53
61
  end
54
62
  end
55
63
 
56
64
  context 'when serialized with zipped Ruby marshal format' do
57
65
  before do
58
- require 'zip'
59
66
  @original_on_exists_proc = ::Zip.on_exists_proc
60
67
  @original_continue_on_exists_proc = ::Zip.continue_on_exists_proc
61
68
  ::Zip.on_exists_proc = true
@@ -63,15 +70,13 @@ describe Rambling::Trie do
63
70
  end
64
71
 
65
72
  after do
66
- require 'zip'
67
73
  ::Zip.on_exists_proc = @original_on_exists_proc
68
74
  ::Zip.continue_on_exists_proc = @original_continue_on_exists_proc
69
75
  end
70
76
 
71
77
  it_behaves_like 'a serializable trie' do
72
- let(:trie_filepath) { "#{trie_filename}.marshal.zip" }
73
- let(:loaded_trie) { Rambling::Trie.load trie_filepath }
74
- let(:serializer) { nil }
78
+ let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
79
+ let(:format) { 'marshal.zip' }
75
80
  end
76
81
  end
77
82
  end
@@ -2,8 +2,8 @@ require 'spec_helper'
2
2
 
3
3
  describe Rambling::Trie::Comparable do
4
4
  describe '#==' do
5
- let(:node_1) { Rambling::Trie::RawNode.new }
6
- let(:node_2) { Rambling::Trie::RawNode.new }
5
+ let(:node_1) { Rambling::Trie::Nodes::Raw.new }
6
+ let(:node_2) { Rambling::Trie::Nodes::Raw.new }
7
7
 
8
8
  context 'when the nodes do not have the same letter' do
9
9
  before do
@@ -67,37 +67,28 @@ describe Rambling::Trie::Comparable do
67
67
  context 'when the nodes have the same letter and the same children' do
68
68
  before do
69
69
  node_1.letter = :t
70
- node_1.add 'hese'
71
- node_1.add 'hree'
72
- node_1.add 'hings'
70
+ add_words node_1, %w(hese hree hings)
73
71
 
74
72
  node_2.letter = :t
75
- node_2.add 'hese'
76
- node_2.add 'hree'
77
- node_2.add 'hings'
73
+ add_words node_2, %w(hese hree hings)
78
74
  end
79
75
 
80
76
  it 'returns true' do
81
77
  expect(node_1).to eq node_2
82
- expect(node_1[:h][:e][:s][:e]).to eq node_2[:h][:e][:s][:e]
83
78
  end
84
79
  end
85
80
 
86
81
  context 'when the nodes have the same letter but different children' do
87
82
  before do
88
83
  node_1.letter = :t
89
- node_1.add 'hese'
90
- node_1.add 'wo'
84
+ add_words node_1, %w(hese wo)
91
85
 
92
86
  node_2.letter = :t
93
- node_2.add 'hese'
94
- node_2.add 'hree'
95
- node_2.add 'hings'
87
+ add_words node_2, %w(hese hree hings)
96
88
  end
97
89
 
98
90
  it 'returns false' do
99
91
  expect(node_1).not_to eq node_2
100
- expect(node_1[:h][:e][:s][:e]).to eq node_2[:h][:e][:s][:e]
101
92
  end
102
93
  end
103
94
  end
@@ -4,28 +4,103 @@ describe Rambling::Trie::Compressor do
4
4
  let(:compressor) { Rambling::Trie::Compressor.new }
5
5
 
6
6
  describe '#compress' do
7
- let(:words) { %w(a few words hello hell) }
8
- let(:root) do
9
- Rambling::Trie::RawNode.new
7
+ let(:node) { Rambling::Trie::Nodes::Raw.new }
8
+
9
+ it 'compresses the node' do
10
+ add_words node, %w(a few words hello hell)
11
+ compressed = compressor.compress node
12
+
13
+ expect(compressed.children_tree.keys).to eq %i(a few words hell)
10
14
  end
11
15
 
12
- before do
13
- words.each { |w| root.add w.clone }
16
+ context 'with at least one word' do
17
+ before do
18
+ add_words node, %w(all the words)
19
+ end
20
+
21
+ it 'keeps the node letter nil' do
22
+ compressed = compressor.compress node
23
+
24
+ expect(compressed.letter).to be_nil
25
+ end
14
26
  end
15
27
 
16
- it 'generates a new root with the words from the passed root' do
17
- new_root = compressor.compress root
28
+ context 'with a single word' do
29
+ before do
30
+ add_word node, 'all'
31
+ end
32
+
33
+ it 'compresses into a single node without children' do
34
+ compressed = compressor.compress node
18
35
 
19
- expect(words).not_to be_empty
20
- words.each do |word|
21
- expect(new_root).to include word
36
+ expect(compressed[:all].letter).to eq :all
37
+ expect(compressed[:all].children.size).to eq 0
38
+ expect(compressed[:all]).to be_terminal
39
+ expect(compressed[:all]).to be_compressed
22
40
  end
23
41
  end
24
42
 
25
- it 'compresses the new root' do
26
- new_root = compressor.compress root
43
+ context 'with two words' do
44
+ before do
45
+ add_words node, %w(all ask)
46
+ end
47
+
48
+ it 'compresses into corresponding three nodes' do
49
+ compressed = compressor.compress node
50
+
51
+ expect(compressed[:a].letter).to eq :a
52
+ expect(compressed[:a].children.size).to eq 2
53
+
54
+ expect(compressed[:a][:ll].letter).to eq :ll
55
+ expect(compressed[:a][:sk].letter).to eq :sk
56
+
57
+ expect(compressed[:a][:ll].children.size).to eq 0
58
+ expect(compressed[:a][:sk].children.size).to eq 0
59
+
60
+ expect(compressed[:a][:ll]).to be_terminal
61
+ expect(compressed[:a][:sk]).to be_terminal
62
+
63
+ expect(compressed[:a][:ll]).to be_compressed
64
+ expect(compressed[:a][:sk]).to be_compressed
65
+ end
66
+ end
67
+
68
+ it 'reassigns the parent nodes correctly' do
69
+ add_words node, %w(repay rest repaint)
70
+ compressed = compressor.compress node
71
+
72
+ expect(compressed[:re].letter).to eq :re
73
+ expect(compressed[:re].parent).to eq compressed
74
+ expect(compressed[:re].children.size).to eq 2
75
+
76
+ expect(compressed[:re][:pa].letter).to eq :pa
77
+ expect(compressed[:re][:pa].parent).to eq compressed[:re]
78
+ expect(compressed[:re][:pa].children.size).to eq 2
79
+
80
+ expect(compressed[:re][:st].letter).to eq :st
81
+ expect(compressed[:re][:st].parent).to eq compressed[:re]
82
+ expect(compressed[:re][:st].children.size).to eq 0
83
+
84
+ expect(compressed[:re][:pa][:y].letter).to eq :y
85
+ expect(compressed[:re][:pa][:y].parent).to eq compressed[:re][:pa]
86
+ expect(compressed[:re][:pa][:y].children.size).to eq 0
87
+
88
+ expect(compressed[:re][:pa][:int].letter).to eq :int
89
+ expect(compressed[:re][:pa][:int].parent).to eq compressed[:re][:pa]
90
+ expect(compressed[:re][:pa][:int].children.size).to eq 0
91
+ end
92
+
93
+ it 'does not compress terminal nodes' do
94
+ add_words node, %w(you your yours)
95
+ compressed = compressor.compress node
96
+
97
+ expect(compressed[:you].letter).to eq :you
98
+
99
+ expect(compressed[:you][:r].letter).to eq :r
100
+ expect(compressed[:you][:r]).to be_compressed
27
101
 
28
- expect(new_root.children_tree.keys).to eq %i(a few words hell)
102
+ expect(compressed[:you][:r][:s].letter).to eq :s
103
+ expect(compressed[:you][:r][:s]).to be_compressed
29
104
  end
30
105
  end
31
106
  end