gaddag 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +38 -0
  8. data/Rakefile +1 -0
  9. data/gaddag.gemspec +31 -0
  10. data/lib/gaddag.rb +47 -0
  11. data/lib/gaddag/arc.rb +41 -0
  12. data/lib/gaddag/node.rb +115 -0
  13. data/lib/gaddag/path.rb +72 -0
  14. data/lib/gaddag/word.rb +39 -0
  15. data/spec/shared/unit/gaddag/arc_context.rb +6 -0
  16. data/spec/shared/unit/gaddag/node/create_arc_behaviour.rb +41 -0
  17. data/spec/shared/unit/gaddag/node/create_final_path_behaviour.rb +33 -0
  18. data/spec/shared/unit/gaddag/node/create_path_behaviour.rb +15 -0
  19. data/spec/shared/unit/gaddag/node/create_path_context.rb +6 -0
  20. data/spec/unit/gaddag/add_spec.rb +46 -0
  21. data/spec/unit/gaddag/arc/add_final_letter_spec.rb +24 -0
  22. data/spec/unit/gaddag/arc/final_paths_spec.rb +49 -0
  23. data/spec/unit/gaddag/arc/initialize_spec.rb +16 -0
  24. data/spec/unit/gaddag/find_spec.rb +66 -0
  25. data/spec/unit/gaddag/initialize_spec.rb +11 -0
  26. data/spec/unit/gaddag/node/arc_spec.rb +23 -0
  27. data/spec/unit/gaddag/node/create_arc_spec.rb +8 -0
  28. data/spec/unit/gaddag/node/create_final_arc_spec.rb +18 -0
  29. data/spec/unit/gaddag/node/create_final_path_spec.rb +43 -0
  30. data/spec/unit/gaddag/node/create_path_spec.rb +44 -0
  31. data/spec/unit/gaddag/node/final_path_spec.rb +30 -0
  32. data/spec/unit/gaddag/node/final_paths_spec.rb +48 -0
  33. data/spec/unit/gaddag/node/follow_arc_spec.rb +24 -0
  34. data/spec/unit/gaddag/node/follow_path_spec.rb +41 -0
  35. data/spec/unit/gaddag/node/path_spec.rb +30 -0
  36. data/spec/unit/gaddag/path/equal_value_spec.rb +17 -0
  37. data/spec/unit/gaddag/path/include_delimiter_spec.rb +15 -0
  38. data/spec/unit/gaddag/path/initialize_spec.rb +12 -0
  39. data/spec/unit/gaddag/path/reversed_prefix_letters_spec.rb +34 -0
  40. data/spec/unit/gaddag/path/start_with_spec.rb +39 -0
  41. data/spec/unit/gaddag/path/suffix_letters_spec.rb +34 -0
  42. data/spec/unit/gaddag/path/to_ary_spec.rb +15 -0
  43. data/spec/unit/gaddag/path/to_s_spec.rb +18 -0
  44. data/spec/unit/gaddag/path/to_word_spec.rb +37 -0
  45. data/spec/unit/gaddag/word/equal_value_spec.rb +17 -0
  46. data/spec/unit/gaddag/word/initialize_spec.rb +12 -0
  47. data/spec/unit/gaddag/word/to_delimited_paths_spec.rb +29 -0
  48. data/spec/unit/gaddag/word/to_s_spec.rb +18 -0
  49. metadata +252 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 99e393cc0cfea04c5b5f46c757d30d0081b0985e
4
+ data.tar.gz: 505ae94f2907cbe881f5684ac06d6e25bac72ed6
5
+ SHA512:
6
+ metadata.gz: 2871a06741e0ec9d9c5a8dc634d12a5877bc4049c6be0838fd28744383a12d2a659e800b2f01f4b003fd0e92f1d55ea16f3b12df68cfeb5e4454b07ea7da5457
7
+ data.tar.gz: a126efac0f8a4533485a690399db473ec1b0756c2c6b6185ca53650489f001fa123a33c5724f50afc474eb454ff16dcb36de5b91318e67ee12239a53fe2472cb
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ doc
5
+ .yardoc
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --order random
@@ -0,0 +1 @@
1
+ 2.0.0-p353
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Thomas Brus
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,38 @@
1
+ # GADDAG data structure in Ruby
2
+
3
+ A [GADDAG](http://en.wikipedia.org/wiki/GADDAG) is a data structure that
4
+ allows for fast lookup of words by substring. It is a directed acyclic graph, where
5
+ each word can be constructed from the root via any of its reversed prefixes. Its main application
6
+ is move generation in Scrabble. The data structure is explained in more detail in
7
+ [the original research paper](http://www.ericsink.com/downloads/faster-scrabble-gordon.pdf).
8
+
9
+ ## Usage
10
+
11
+ Initializing the GADDAG is simple:
12
+
13
+ ```ruby
14
+ require 'gaddag'
15
+ gaddag = GADDAG.new
16
+ ```
17
+
18
+ Adding words is done via the `add` method. This will expand the graph with paths for all
19
+ the reversed prefixes of the word. Note that this may take some time when adding
20
+ a large number of words.
21
+
22
+ ```ruby
23
+ IO.foreach('/usr/share/dict/words').map(&:chomp).each do |word|
24
+ if word.length == 10
25
+ gaddag.add(word) # => #<Gaddag:0x007fc6c24367b0 ... >
26
+ end
27
+ end
28
+ ```
29
+
30
+ In order to find all words that contain a given substring, use the `find` method:
31
+
32
+ ```ruby
33
+ gaddag.find('elevi') => # ["televiewer", "television", "televisual"]
34
+ ```
35
+
36
+ ## License
37
+
38
+ See [LICENSE.txt](LICENSE.txt).
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'gaddag'
5
+ spec.version = '0.1.1'
6
+ spec.authors = ['Thomas Brus']
7
+ spec.email = ['thomas.brus@me.com']
8
+ spec.summary = %q{Implementation of the GADDAG data structure}
9
+ spec.homepage = 'https://github.com/thomasbrus/gaddag'
10
+ spec.license = 'MIT'
11
+
12
+ spec.files = `git ls-files`.split($/)
13
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
14
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
15
+ spec.require_paths = ['lib']
16
+
17
+ spec.add_dependency 'equalizer', '~> 0.0.9'
18
+
19
+ spec.add_development_dependency 'bundler', '~> 1.5.1'
20
+
21
+ spec.add_development_dependency 'yard', '~> 0.8.7'
22
+ spec.add_development_dependency 'inch', '~> 0.4.5'
23
+
24
+ spec.add_development_dependency 'rspec', '~> 2.14.1'
25
+
26
+ spec.add_development_dependency 'mutant', '~> 0.5.11'
27
+ spec.add_development_dependency 'mutant-rspec', '~> 0.5.10'
28
+
29
+ spec.add_development_dependency 'rake'
30
+ spec.add_development_dependency 'pry'
31
+ end
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+
3
+ require 'equalizer'
4
+
5
+ require_relative 'gaddag/arc'
6
+ require_relative 'gaddag/node'
7
+ require_relative 'gaddag/path'
8
+ require_relative 'gaddag/word'
9
+
10
+ # Implementation of the GADDAG data structure
11
+ class GADDAG
12
+ # The root node
13
+ attr_reader :root
14
+
15
+ # Initializes a GADDAG
16
+ # @return [GADDAG]
17
+ def initialize
18
+ @root = Node.new
19
+ end
20
+
21
+ # Adds a word to the GADDAG
22
+ # @param word [String] the word to be added
23
+ # @return [GADDAG] the GADDAG instance
24
+ def add(word)
25
+ @root.create_final_path(word.chars.reverse + [Path::DELIMITER])
26
+
27
+ Word.new(word.chars).to_delimited_paths.each do |path|
28
+ @root.create_final_path(path.letters)
29
+ end
30
+
31
+ self
32
+ end
33
+
34
+ # Finds all words that contain the given substring
35
+ # @param substring [String] the substring to search for
36
+ # @return [Array<String>] all matching words
37
+ def find(substring)
38
+ first_letter, second_letter, *last_letters = *substring.chars
39
+ return [] unless @root.path?(last_letters.reverse)
40
+
41
+ @root.follow_path(last_letters.reverse).final_paths.select do |path|
42
+ path.start_with?([second_letter, first_letter])
43
+ end.map do |path|
44
+ Path.new(last_letters.reverse + path).to_word.to_s
45
+ end.uniq
46
+ end
47
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ require 'set'
4
+
5
+ class GADDAG
6
+
7
+ # Represents an arc pointing to a destination node with optional final letters
8
+ class Arc
9
+ # The destination node
10
+ attr_reader :destination
11
+
12
+ # A set of letters which form a word after being appended to the letter path
13
+ attr_reader :final_letters
14
+
15
+ # Two arcs are equal if they point to the same destination node and contain
16
+ # the same set of final letters
17
+ send :include, Equalizer.new(:destination, :final_letters)
18
+
19
+ # Initializes a GADDAG arc
20
+ # @param destination [Node] the destination node
21
+ # @return [Arc]
22
+ def initialize(destination)
23
+ @destination = destination
24
+ @final_letters = Set.new
25
+ end
26
+
27
+ # Adds a final letter to the arc. A final letter is a letter that,
28
+ # when appended to the letter path, forms a valid word.
29
+ # @param letter [String] the letter that is to be marked as final
30
+ def add_final_letter(letter)
31
+ @final_letters.add(letter)
32
+ end
33
+
34
+ # Returns all paths starting at this arc that are final
35
+ # @return [Array<Path>] a list of final paths that start at this arc
36
+ def final_paths
37
+ final_letters.map { |fl| Path.new([fl]) } + destination.final_paths
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,115 @@
1
+ # encoding: utf-8
2
+
3
+ class GADDAG
4
+
5
+ # Represents a node in the GADDAG data structure
6
+ class Node
7
+ # A mapping of letters to arcs
8
+ attr_reader :outgoing_arcs
9
+
10
+ # Two nodes are equal if they have the same set of outgoing arcs
11
+ send :include, Equalizer.new(:outgoing_arcs)
12
+
13
+ # Initializes a GADDAG node
14
+ # @return [Node]
15
+ def initialize
16
+ @outgoing_arcs = {}
17
+ end
18
+
19
+ # Creates an outgoing arc for a letter to a destination node
20
+ # @param letter [String] the letter to be added to the path when this
21
+ # arc is followed
22
+ # @param destination [Node] the node to which this arc should point
23
+ # @return [Arc] the newly created arc or an existing arc if one already
24
+ # exists for this letter
25
+ def create_arc(letter, destination = Node.new)
26
+ @outgoing_arcs[letter.to_sym] ||= Arc.new(destination)
27
+ end
28
+
29
+ # Checks whether an outgoing arc for the given letter exists
30
+ # @param letter [String] the letter to check for
31
+ # @return [Boolean] whether the outgoing arc exists
32
+ def arc?(letter)
33
+ @outgoing_arcs.key?(letter.to_sym)
34
+ end
35
+
36
+ # Creates a final outgoing arc for a letter to a destination node. Effectively
37
+ # this will add a final letter to the outgoing arc, indicating that a valid
38
+ # word can be formed with it.
39
+ # @see #create_arc
40
+ def create_final_arc(letter, final_letter, destination = Node.new)
41
+ create_arc(letter, destination).tap { |arc| arc.add_final_letter(final_letter) }
42
+ end
43
+
44
+ # Creates a path for a list of letters and optional destination nodes
45
+ # @param letters [Array<String>] the letters for which the path should be build
46
+ # @param destinations [Array<Node>] the destination nodes which the path should visit
47
+ # @return [Node] the lastly created destination ode
48
+ def create_path(letters, destinations = [])
49
+ letters.zip(destinations).inject(self) do |node, (letter, destination)|
50
+ node.create_arc(letter, destination || Node.new).destination
51
+ end
52
+ end
53
+
54
+ # Checks whether a path exists for the given list of letters
55
+ # @param letters [Array<String>] the letter path to check for
56
+ # @return [Boolean] whether the path exists
57
+ def path?(letters)
58
+ return true if letters.empty?
59
+ return false unless arc?(letters.first)
60
+ follow_arc(letters.first).path?(letters[1..-1])
61
+ end
62
+
63
+ # Creates a path for a list of letters and optional destination nodes,
64
+ # ommiting the last node, and marking the last letter as final
65
+ # @see #create_path
66
+ def create_final_path(letters, destinations = [])
67
+ *initial_letters, second_last_letter, last_letter = *letters
68
+ second_last_node = create_path(initial_letters, destinations)
69
+
70
+ (destinations[initial_letters.length] || Node.new).tap do |final_destination|
71
+ second_last_node.create_final_arc(second_last_letter, last_letter, final_destination)
72
+ end
73
+ end
74
+
75
+ # Checks whether a final path exists for the given list of letters
76
+ # @param letters [Array<String>] the letter path to check for
77
+ # @return [Boolean] whether the final path exists
78
+ def final_path?(letters)
79
+ *initial_letters, second_last_letter, last_letter = *letters
80
+
81
+ path?(initial_letters) && follow_path(initial_letters).final_paths.any? do |path|
82
+ path == Path.new([second_last_letter, last_letter])
83
+ end
84
+ end
85
+
86
+ # Follows a single outgoing arc for a given letter
87
+ # @param letter [String] the letter that should be followed
88
+ # @raise [KeyError] if no outgoing arc exists for the given letter
89
+ # @return [Node] the destination node that the arc for this letter leads to
90
+ def follow_arc(letter)
91
+ @outgoing_arcs.fetch(letter.to_sym).destination
92
+ end
93
+
94
+ # Recursively follows a list of letters
95
+ # @param letters [Array<String>] the letters to be followed
96
+ # @raise [KeyError] if an outgoing arc does not exist for a given letter
97
+ # at the corresponding node
98
+ # @return [Node] the destination node that the path of letters leads to
99
+ def follow_path(letters)
100
+ return self if letters.empty?
101
+ follow_arc(letters[0]).follow_path(letters[1..-1])
102
+ end
103
+
104
+ # Returns all paths from this node that are final. The set of final paths are
105
+ # all paths for which the last arc includes a final letter. For each final letter
106
+ # a seperate path is created.
107
+ # @return [Array<Path>] a list of final paths
108
+ def final_paths
109
+ @outgoing_arcs.reduce([]) do |paths, (letter_sym, arc)|
110
+ paths += arc.final_paths.map { |path| Path.new([letter_sym.to_s] + path) }
111
+ end
112
+ end
113
+ end
114
+
115
+ end
@@ -0,0 +1,72 @@
1
+ # encoding: utf-8
2
+
3
+ class GADDAG
4
+
5
+ # Represents a (final) path within the GADDAG data structure
6
+ class Path
7
+ # The letters that make up this GADDAG path
8
+ attr_reader :letters
9
+
10
+ # Two paths are equal if they contain the same letters in the same order
11
+ send :include, Equalizer.new(:letters)
12
+
13
+ # The path delimiter that seperates the reversed prefix and the suffix
14
+ DELIMITER = '♢'.freeze
15
+
16
+ # Initializes a GADDAG path
17
+ # @param letters [Array<String>] a list of letters, containing a reversed prefix,
18
+ # a delimiter, and an optional suffix: REV(PREFIX) ♢ SUFFIX
19
+ # @return [Path]
20
+ def initialize(letters)
21
+ @letters = letters
22
+ end
23
+
24
+ # Returns the reversed prefix of this path
25
+ # @return [Array<String>] the first portion of this path: the reversed prefix
26
+ def reversed_prefix_letters
27
+ return [] if @letters.empty?
28
+ @letters.join.split(DELIMITER).first.chars
29
+ end
30
+
31
+ # Returns the suffix of this path
32
+ # @return [Array<String>] the last portion of this path: the suffix
33
+ def suffix_letters
34
+ return [] if !include_delimiter? || @letters.last == DELIMITER
35
+ @letters.join.split(DELIMITER).last.chars
36
+ end
37
+
38
+ # Tells whether the path includes {GADDAG::DELIMITER}
39
+ # @return [Boolean] whether this path includes the delimiter
40
+ def include_delimiter?
41
+ @letters.include?(DELIMITER)
42
+ end
43
+
44
+ # Returns a string presentation of this path
45
+ # @return the string represtentation, letters are delimited with '>'
46
+ def to_s
47
+ @letters.join(' > ')
48
+ end
49
+
50
+ # Coerces into an Array.
51
+ # @example
52
+ # ['K', 'E'] + Path.new(%w(A R B)) # => ['K', 'E', 'A', 'R', 'B']
53
+ # @return [Array<String>] the letters in this path
54
+ def to_ary
55
+ @letters
56
+ end
57
+
58
+ # Tells whether the path starts with the given letters
59
+ # @param letters [Array<String>] the letters to check for
60
+ # @return [Boolean] whether the path starts with the letters given
61
+ def start_with?(letters)
62
+ @letters.join.start_with?(letters.join)
63
+ end
64
+
65
+ # Constructs a word from the partially reversed letter path
66
+ # @return [Word] the word that is encoded within this path
67
+ def to_word
68
+ Word.new(reversed_prefix_letters.reverse + suffix_letters)
69
+ end
70
+ end
71
+
72
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+
3
+ class GADDAG
4
+
5
+ # Represents a word in the GADDAG data structure
6
+ class Word
7
+ # The letters that make up this word
8
+ attr_reader :letters
9
+
10
+ # Two words are equal if they contain the same letters in the same order
11
+ send :include, Equalizer.new(:letters)
12
+
13
+ # Initializes a GADDAG word
14
+ # @param letters [Array<String>] an ordered list of letters of which
15
+ # the word consists
16
+ # @return [Word]
17
+ def initialize(letters)
18
+ @letters = letters
19
+ end
20
+
21
+ # Returns the word as string
22
+ # @return a string representation of the word
23
+ def to_s
24
+ @letters.join
25
+ end
26
+
27
+ # Constructs a list of delimited GADDAG paths from this word
28
+ # @return [Array<Path>] a list of paths, each containing a reversed prefix,
29
+ # a delimiter, and a suffix: REV(PREFIX) ♢ SUFFIX
30
+ def to_delimited_paths
31
+ 1.upto(letters.length - 1).map do |index|
32
+ reversed_prefix = @letters.slice(0, index).reverse
33
+ suffix = @letters.slice(index, @letters.count)
34
+ Path.new(reversed_prefix + [Path::DELIMITER] + suffix)
35
+ end
36
+ end
37
+ end
38
+
39
+ end
@@ -0,0 +1,6 @@
1
+ # encoding: utf-8
2
+
3
+ shared_context 'GADDAG::Arc/context' do
4
+ subject { GADDAG::Arc.new(destination) }
5
+ let(:destination) { GADDAG::Node.new }
6
+ end