rambling-trie 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -1
  3. data/README.md +23 -7
  4. data/Rakefile +4 -0
  5. data/lib/rambling/trie.rb +27 -21
  6. data/lib/rambling/trie/comparable.rb +3 -3
  7. data/lib/rambling/trie/compressible.rb +14 -0
  8. data/lib/rambling/trie/compressor.rb +37 -24
  9. data/lib/rambling/trie/configuration/properties.rb +8 -6
  10. data/lib/rambling/trie/configuration/provider_collection.rb +34 -16
  11. data/lib/rambling/trie/container.rb +156 -36
  12. data/lib/rambling/trie/enumerable.rb +4 -4
  13. data/lib/rambling/trie/nodes.rb +11 -0
  14. data/lib/rambling/trie/nodes/compressed.rb +115 -0
  15. data/lib/rambling/trie/nodes/missing.rb +10 -0
  16. data/lib/rambling/trie/nodes/node.rb +151 -0
  17. data/lib/rambling/trie/nodes/raw.rb +89 -0
  18. data/lib/rambling/trie/readers/plain_text.rb +1 -11
  19. data/lib/rambling/trie/serializers/marshal.rb +4 -4
  20. data/lib/rambling/trie/serializers/yaml.rb +4 -4
  21. data/lib/rambling/trie/serializers/zip.rb +9 -8
  22. data/lib/rambling/trie/version.rb +1 -1
  23. data/spec/assets/test_words.es_DO.txt +1 -0
  24. data/spec/integration/rambling/trie_spec.rb +40 -35
  25. data/spec/lib/rambling/trie/comparable_spec.rb +6 -15
  26. data/spec/lib/rambling/trie/compressor_spec.rb +88 -13
  27. data/spec/lib/rambling/trie/configuration/properties_spec.rb +7 -7
  28. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +8 -20
  29. data/spec/lib/rambling/trie/container_spec.rb +159 -168
  30. data/spec/lib/rambling/trie/enumerable_spec.rb +12 -9
  31. data/spec/lib/rambling/trie/inspectable_spec.rb +11 -11
  32. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +35 -0
  33. data/spec/lib/rambling/trie/nodes/node_spec.rb +7 -0
  34. data/spec/lib/rambling/trie/nodes/raw_spec.rb +177 -0
  35. data/spec/lib/rambling/trie/serializers/file_spec.rb +4 -4
  36. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +3 -7
  37. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +3 -7
  38. data/spec/lib/rambling/trie/serializers/zip_spec.rb +16 -20
  39. data/spec/lib/rambling/trie/stringifyable_spec.rb +7 -8
  40. data/spec/lib/rambling/trie_spec.rb +2 -2
  41. data/spec/spec_helper.rb +3 -1
  42. data/spec/support/config.rb +4 -0
  43. data/spec/support/helpers/add_word.rb +18 -0
  44. data/spec/support/shared_examples/{a_compressable_trie.rb → a_compressible_trie.rb} +13 -3
  45. data/spec/support/shared_examples/a_serializable_trie.rb +8 -6
  46. data/spec/support/shared_examples/a_serializer.rb +6 -0
  47. data/spec/{lib/rambling/trie/node_spec.rb → support/shared_examples/a_trie_node.rb} +61 -30
  48. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +18 -69
  49. metadata +22 -15
  50. data/lib/rambling/trie/compressable.rb +0 -14
  51. data/lib/rambling/trie/compressed_node.rb +0 -120
  52. data/lib/rambling/trie/missing_node.rb +0 -8
  53. data/lib/rambling/trie/node.rb +0 -97
  54. data/lib/rambling/trie/raw_node.rb +0 -96
  55. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
@@ -0,0 +1,89 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a node in an uncompressed trie data structure.
5
+ class Raw < Rambling::Trie::Nodes::Node
6
+ # Adds a word to the current raw (uncompressed) trie node.
7
+ # @param [Array<Symbol>] chars the char array to add to the trie.
8
+ # @return [Raw] the added/modified node based on the word added.
9
+ # @note This method clears the contents of the chars variable.
10
+ def add chars
11
+ if chars.empty?
12
+ terminal!
13
+ else
14
+ add_to_children_tree chars
15
+ end
16
+ end
17
+
18
+ # Checks if a path for a set of characters exists in the trie.
19
+ # @param [Array<String>] chars the characters to look for in the trie.
20
+ # @return [Boolean] `true` if the characters are found, `false`
21
+ # otherwise.
22
+ def partial_word? chars = []
23
+ return true if chars.empty?
24
+
25
+ letter = chars.slice!(0).to_sym
26
+ child = children_tree[letter]
27
+ !!child && child.partial_word?(chars)
28
+ end
29
+
30
+ # Checks if a path for set of characters represents a word in the trie.
31
+ # @param [Array<String>] chars the characters to look for in the trie.
32
+ # @return [Boolean] `true` if the characters are found and form a word,
33
+ # `false` otherwise.
34
+ def word? chars = []
35
+ return terminal? if chars.empty?
36
+
37
+ letter = chars.slice!(0).to_sym
38
+ child = children_tree[letter]
39
+ !!child && child.word?(chars)
40
+ end
41
+
42
+ # Always return `false` for a raw (uncompressed) node.
43
+ # @return [Boolean] always `false` for a raw (uncompressed) node.
44
+ def compressed?
45
+ false
46
+ end
47
+
48
+ private
49
+
50
+ def add_to_children_tree chars
51
+ letter = chars.pop
52
+ child = children_tree[letter] || new_node(letter)
53
+ child.add chars
54
+ child
55
+ end
56
+
57
+ def new_node letter
58
+ node = Rambling::Trie::Nodes::Raw.new letter, self
59
+ children_tree[letter] = node
60
+ node
61
+ end
62
+
63
+ def closest_node chars
64
+ letter = chars.slice!(0).to_sym
65
+ child = children_tree[letter]
66
+
67
+ return Rambling::Trie::Nodes::Missing.new unless child
68
+
69
+ child.scan chars
70
+ end
71
+
72
+ def children_match_prefix chars
73
+ return enum_for :children_match_prefix, chars unless block_given?
74
+
75
+ return if chars.empty?
76
+
77
+ letter = chars.slice!(0).to_sym
78
+ child = children_tree[letter]
79
+
80
+ return unless child
81
+
82
+ child.match_prefix chars do |word|
83
+ yield word
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -8,17 +8,7 @@ module Rambling
8
8
  # from.
9
9
  # @yield [String] Each line read from the file.
10
10
  def each_word filepath
11
- each_line(filepath) { |line| yield line.chomp! }
12
- end
13
-
14
- private
15
-
16
- def each_line filepath
17
- open(filepath) { |file| file.each_line { |line| yield line } }
18
- end
19
-
20
- def open filepath
21
- File.open(filepath) { |file| yield file }
11
+ File.foreach(filepath) { |line| yield line.chomp! }
22
12
  end
23
13
  end
24
14
  end
@@ -11,17 +11,17 @@ module Rambling
11
11
  end
12
12
 
13
13
  # Loads marshaled object from contents in filepath and deserializes it
14
- # into a {Node Node}.
14
+ # into a {Nodes::Node Node}.
15
15
  # @param [String] filepath the full path of the file to load the
16
16
  # marshaled object from.
17
- # @return [Node] The deserialized {Node Node}.
17
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
18
18
  def load filepath
19
19
  ::Marshal.load serializer.load filepath
20
20
  end
21
21
 
22
- # Serializes a {Node Node} and dumps it as a marshaled object into
22
+ # Serializes a {Nodes::Node Node} and dumps it as a marshaled object into
23
23
  # filepath.
24
- # @param [Node] node the node to serialize
24
+ # @param [Nodes::Node] node the node to serialize
25
25
  # @param [String] filepath the full path of the file to dump the
26
26
  # marshaled object into.
27
27
  # @return [Numeric] number of bytes written to disk.
@@ -11,17 +11,17 @@ module Rambling
11
11
  end
12
12
 
13
13
  # Loads serialized object from YAML file in filepath and deserializes
14
- # it into a {Node Node}.
14
+ # it into a {Nodes::Node Node}.
15
15
  # @param [String] filepath the full path of the file to load the
16
16
  # serialized YAML object from.
17
- # @return [Node] The deserialized {Node Node}.
17
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
18
18
  def load filepath
19
19
  require 'yaml'
20
20
  ::YAML.load serializer.load filepath
21
21
  end
22
22
 
23
- # Serializes a {Node Node} and dumps it as a YAML object into filepath.
24
- # @param [Node] node the node to serialize
23
+ # Serializes a {Nodes::Node Node} and dumps it as a YAML object into filepath.
24
+ # @param [Nodes::Node] node the node to serialize
25
25
  # @param [String] filepath the full path of the file to dump the YAML
26
26
  # object into.
27
27
  # @return [Numeric] number of bytes written to disk.
@@ -4,11 +4,9 @@ module Rambling
4
4
  # Zip file serializer. Dumps/loads contents from zip files. Automatically
5
5
  # detects if zip file contains `.marshal` or `.yml` file
6
6
  class Zip
7
- extend ::Forwardable
8
-
9
7
  # Creates a new Zip serializer.
10
- # @param [Properties] properties the configuration properties set up so
11
- # far.
8
+ # @param [Configuration::Properties] properties the configuration
9
+ # properties set up so far.
12
10
  def initialize properties
13
11
  @properties = properties
14
12
  end
@@ -52,10 +50,13 @@ module Rambling
52
50
 
53
51
  attr_reader :properties
54
52
 
55
- delegate [
56
- :serializers,
57
- :tmp_path
58
- ] => :properties
53
+ def serializers
54
+ properties.serializers
55
+ end
56
+
57
+ def tmp_path
58
+ properties.tmp_path
59
+ end
59
60
 
60
61
  def path filename
61
62
  require 'securerandom'
@@ -1,6 +1,6 @@
1
1
  module Rambling
2
2
  module Trie
3
3
  # Current version of the rambling-trie.
4
- VERSION = '1.0.2'.freeze
4
+ VERSION = '1.0.3'.freeze
5
5
  end
6
6
  end
@@ -21,3 +21,4 @@ tus
21
21
  tuyos
22
22
  verdad
23
23
  verdadero
24
+ 🙃
@@ -1,61 +1,68 @@
1
1
  require 'spec_helper'
2
+ require 'zip'
2
3
 
3
4
  describe Rambling::Trie do
4
- describe 'with words provided directly' do
5
- it_behaves_like 'a compressable trie' do
6
- let(:words) { %w[a couple of words for our full trie integration test] }
7
- let(:trie) { Rambling::Trie.create { |t| words.each { |w| t << w } } }
5
+ let(:assets_path) { File.join ::SPEC_ROOT, 'assets' }
6
+
7
+ context 'when providing words directly' do
8
+ it_behaves_like 'a compressible trie' do
9
+ let(:words) { %w(a couple of words for our full trie integration test) }
10
+ let(:trie) { Rambling::Trie.create }
11
+
12
+ before do
13
+ trie.concat words
14
+ end
8
15
  end
9
16
  end
10
17
 
11
- describe 'with words from a file' do
12
- it_behaves_like 'a compressable trie' do
13
- let(:filepath) { File.join ::SPEC_ROOT, 'assets', 'test_words.en_US.txt' }
14
- let(:words) { File.readlines(filepath).map &:chomp! }
15
- let(:trie) { Rambling::Trie.create filepath }
18
+ context 'when provided with words with unicode characters' do
19
+ it_behaves_like 'a compressible trie' do
20
+ let(:words) { %w(poquísimas palabras para nuestra prueba de integración completa 🙃) }
21
+ let(:trie) { Rambling::Trie.create }
22
+
23
+ before do
24
+ trie.concat words
25
+ end
16
26
  end
17
27
  end
18
28
 
19
- describe 'with words with unicode characters' do
20
- it_behaves_like 'a compressable trie' do
21
- let(:words) { %w[poquísimas palabras para nuestra prueba de integración completa] }
22
- let(:trie) { Rambling::Trie.create { |t| words.each { |w| t << w } } }
29
+ context 'when provided with a filepath' do
30
+ let(:words) { File.readlines(filepath).map &:chomp! }
31
+ let(:trie) { Rambling::Trie.create filepath }
32
+
33
+ context 'with english words' do
34
+ it_behaves_like 'a compressible trie' do
35
+ let(:filepath) { File.join assets_path, 'test_words.en_US.txt' }
36
+ end
23
37
  end
24
- end
25
38
 
26
- describe 'with words with unicode characters from a file' do
27
- it_behaves_like 'a compressable trie' do
28
- let(:filepath) { File.join ::SPEC_ROOT, 'assets', 'test_words.en_US.txt' }
29
- let(:words) { File.readlines(filepath).map &:chomp! }
30
- let(:trie) { Rambling::Trie.create filepath }
39
+ context 'with unicode characters' do
40
+ it_behaves_like 'a compressible trie' do
41
+ let(:filepath) { File.join assets_path, 'test_words.es_DO.txt' }
42
+ end
31
43
  end
32
44
  end
33
45
 
34
- describe 'saving and loading full trie from a file' do
35
- let(:words_filepath) { File.join ::SPEC_ROOT, 'assets', 'test_words.en_US.txt' }
36
- let(:words) { File.readlines(words_filepath).map &:chomp! }
37
- let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
38
- let(:trie_filename) { File.join ::SPEC_ROOT, '..', 'tmp', 'trie-root' }
46
+ describe 'dump and load' do
47
+ let(:words_filepath) { File.join assets_path, 'test_words.en_US.txt' }
48
+ let(:words) { File.readlines(words_filepath).map &:chomp }
39
49
 
40
50
  context 'when serialized with Ruby marshal format (default)' do
41
51
  it_behaves_like 'a serializable trie' do
42
- let(:trie_filepath) { "#{trie_filename}.marshal" }
43
- let(:loaded_trie) { Rambling::Trie.load trie_filepath }
44
- let(:serializer) { nil }
52
+ let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
53
+ let(:format) { :marshal }
45
54
  end
46
55
  end
47
56
 
48
57
  context 'when serialized with YAML' do
49
58
  it_behaves_like 'a serializable trie' do
50
- let(:trie_filepath) { "#{trie_filename}.yml" }
51
- let(:loaded_trie) { Rambling::Trie.load trie_filepath }
52
- let(:serializer) { nil }
59
+ let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
60
+ let(:format) { :yml }
53
61
  end
54
62
  end
55
63
 
56
64
  context 'when serialized with zipped Ruby marshal format' do
57
65
  before do
58
- require 'zip'
59
66
  @original_on_exists_proc = ::Zip.on_exists_proc
60
67
  @original_continue_on_exists_proc = ::Zip.continue_on_exists_proc
61
68
  ::Zip.on_exists_proc = true
@@ -63,15 +70,13 @@ describe Rambling::Trie do
63
70
  end
64
71
 
65
72
  after do
66
- require 'zip'
67
73
  ::Zip.on_exists_proc = @original_on_exists_proc
68
74
  ::Zip.continue_on_exists_proc = @original_continue_on_exists_proc
69
75
  end
70
76
 
71
77
  it_behaves_like 'a serializable trie' do
72
- let(:trie_filepath) { "#{trie_filename}.marshal.zip" }
73
- let(:loaded_trie) { Rambling::Trie.load trie_filepath }
74
- let(:serializer) { nil }
78
+ let(:trie_to_serialize) { Rambling::Trie.create words_filepath }
79
+ let(:format) { 'marshal.zip' }
75
80
  end
76
81
  end
77
82
  end
@@ -2,8 +2,8 @@ require 'spec_helper'
2
2
 
3
3
  describe Rambling::Trie::Comparable do
4
4
  describe '#==' do
5
- let(:node_1) { Rambling::Trie::RawNode.new }
6
- let(:node_2) { Rambling::Trie::RawNode.new }
5
+ let(:node_1) { Rambling::Trie::Nodes::Raw.new }
6
+ let(:node_2) { Rambling::Trie::Nodes::Raw.new }
7
7
 
8
8
  context 'when the nodes do not have the same letter' do
9
9
  before do
@@ -67,37 +67,28 @@ describe Rambling::Trie::Comparable do
67
67
  context 'when the nodes have the same letter and the same children' do
68
68
  before do
69
69
  node_1.letter = :t
70
- node_1.add 'hese'
71
- node_1.add 'hree'
72
- node_1.add 'hings'
70
+ add_words node_1, %w(hese hree hings)
73
71
 
74
72
  node_2.letter = :t
75
- node_2.add 'hese'
76
- node_2.add 'hree'
77
- node_2.add 'hings'
73
+ add_words node_2, %w(hese hree hings)
78
74
  end
79
75
 
80
76
  it 'returns true' do
81
77
  expect(node_1).to eq node_2
82
- expect(node_1[:h][:e][:s][:e]).to eq node_2[:h][:e][:s][:e]
83
78
  end
84
79
  end
85
80
 
86
81
  context 'when the nodes have the same letter but different children' do
87
82
  before do
88
83
  node_1.letter = :t
89
- node_1.add 'hese'
90
- node_1.add 'wo'
84
+ add_words node_1, %w(hese wo)
91
85
 
92
86
  node_2.letter = :t
93
- node_2.add 'hese'
94
- node_2.add 'hree'
95
- node_2.add 'hings'
87
+ add_words node_2, %w(hese hree hings)
96
88
  end
97
89
 
98
90
  it 'returns false' do
99
91
  expect(node_1).not_to eq node_2
100
- expect(node_1[:h][:e][:s][:e]).to eq node_2[:h][:e][:s][:e]
101
92
  end
102
93
  end
103
94
  end
@@ -4,28 +4,103 @@ describe Rambling::Trie::Compressor do
4
4
  let(:compressor) { Rambling::Trie::Compressor.new }
5
5
 
6
6
  describe '#compress' do
7
- let(:words) { %w(a few words hello hell) }
8
- let(:root) do
9
- Rambling::Trie::RawNode.new
7
+ let(:node) { Rambling::Trie::Nodes::Raw.new }
8
+
9
+ it 'compresses the node' do
10
+ add_words node, %w(a few words hello hell)
11
+ compressed = compressor.compress node
12
+
13
+ expect(compressed.children_tree.keys).to eq %i(a few words hell)
10
14
  end
11
15
 
12
- before do
13
- words.each { |w| root.add w.clone }
16
+ context 'with at least one word' do
17
+ before do
18
+ add_words node, %w(all the words)
19
+ end
20
+
21
+ it 'keeps the node letter nil' do
22
+ compressed = compressor.compress node
23
+
24
+ expect(compressed.letter).to be_nil
25
+ end
14
26
  end
15
27
 
16
- it 'generates a new root with the words from the passed root' do
17
- new_root = compressor.compress root
28
+ context 'with a single word' do
29
+ before do
30
+ add_word node, 'all'
31
+ end
32
+
33
+ it 'compresses into a single node without children' do
34
+ compressed = compressor.compress node
18
35
 
19
- expect(words).not_to be_empty
20
- words.each do |word|
21
- expect(new_root).to include word
36
+ expect(compressed[:all].letter).to eq :all
37
+ expect(compressed[:all].children.size).to eq 0
38
+ expect(compressed[:all]).to be_terminal
39
+ expect(compressed[:all]).to be_compressed
22
40
  end
23
41
  end
24
42
 
25
- it 'compresses the new root' do
26
- new_root = compressor.compress root
43
+ context 'with two words' do
44
+ before do
45
+ add_words node, %w(all ask)
46
+ end
47
+
48
+ it 'compresses into corresponding three nodes' do
49
+ compressed = compressor.compress node
50
+
51
+ expect(compressed[:a].letter).to eq :a
52
+ expect(compressed[:a].children.size).to eq 2
53
+
54
+ expect(compressed[:a][:ll].letter).to eq :ll
55
+ expect(compressed[:a][:sk].letter).to eq :sk
56
+
57
+ expect(compressed[:a][:ll].children.size).to eq 0
58
+ expect(compressed[:a][:sk].children.size).to eq 0
59
+
60
+ expect(compressed[:a][:ll]).to be_terminal
61
+ expect(compressed[:a][:sk]).to be_terminal
62
+
63
+ expect(compressed[:a][:ll]).to be_compressed
64
+ expect(compressed[:a][:sk]).to be_compressed
65
+ end
66
+ end
67
+
68
+ it 'reassigns the parent nodes correctly' do
69
+ add_words node, %w(repay rest repaint)
70
+ compressed = compressor.compress node
71
+
72
+ expect(compressed[:re].letter).to eq :re
73
+ expect(compressed[:re].parent).to eq compressed
74
+ expect(compressed[:re].children.size).to eq 2
75
+
76
+ expect(compressed[:re][:pa].letter).to eq :pa
77
+ expect(compressed[:re][:pa].parent).to eq compressed[:re]
78
+ expect(compressed[:re][:pa].children.size).to eq 2
79
+
80
+ expect(compressed[:re][:st].letter).to eq :st
81
+ expect(compressed[:re][:st].parent).to eq compressed[:re]
82
+ expect(compressed[:re][:st].children.size).to eq 0
83
+
84
+ expect(compressed[:re][:pa][:y].letter).to eq :y
85
+ expect(compressed[:re][:pa][:y].parent).to eq compressed[:re][:pa]
86
+ expect(compressed[:re][:pa][:y].children.size).to eq 0
87
+
88
+ expect(compressed[:re][:pa][:int].letter).to eq :int
89
+ expect(compressed[:re][:pa][:int].parent).to eq compressed[:re][:pa]
90
+ expect(compressed[:re][:pa][:int].children.size).to eq 0
91
+ end
92
+
93
+ it 'does not compress terminal nodes' do
94
+ add_words node, %w(you your yours)
95
+ compressed = compressor.compress node
96
+
97
+ expect(compressed[:you].letter).to eq :you
98
+
99
+ expect(compressed[:you][:r].letter).to eq :r
100
+ expect(compressed[:you][:r]).to be_compressed
27
101
 
28
- expect(new_root.children_tree.keys).to eq %i(a few words hell)
102
+ expect(compressed[:you][:r][:s].letter).to eq :s
103
+ expect(compressed[:you][:r][:s]).to be_compressed
29
104
  end
30
105
  end
31
106
  end