picky 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/lib/picky/backends/file/basic.rb +10 -80
  2. data/lib/picky/backends/file/json.rb +56 -15
  3. data/lib/picky/backends/file.rb +62 -0
  4. data/lib/picky/backends/memory/basic.rb +111 -0
  5. data/lib/picky/backends/memory/json.rb +41 -0
  6. data/lib/picky/backends/{file → memory}/marshal.rb +4 -1
  7. data/lib/picky/backends/{file → memory}/text.rb +5 -1
  8. data/lib/picky/backends/memory.rb +16 -6
  9. data/lib/picky/backends/redis/{float_hash.rb → float.rb} +1 -1
  10. data/lib/picky/backends/redis/{list_hash.rb → list.rb} +1 -1
  11. data/lib/picky/backends/redis/{string_hash.rb → string.rb} +1 -1
  12. data/lib/picky/backends/redis.rb +16 -6
  13. data/lib/picky/bundle.rb +5 -2
  14. data/lib/picky/category.rb +1 -1
  15. data/lib/picky/cores.rb +7 -0
  16. data/lib/picky/extensions/symbol.rb +22 -0
  17. data/lib/picky/generators/partial/infix.rb +93 -0
  18. data/lib/picky/generators/partial/substring.rb +2 -2
  19. data/lib/picky/indexes_indexing.rb +2 -0
  20. data/lib/picky/indexing/bundle.rb +1 -1
  21. data/lib/picky/loader.rb +11 -6
  22. data/lib/picky/migrations/from_30_to_31.rb +2 -2
  23. data/lib/picky/query/allocation.rb +1 -0
  24. data/lib/picky/query/combinations.rb +3 -1
  25. data/spec/lib/backends/{file → memory}/basic_spec.rb +3 -3
  26. data/spec/lib/backends/{file → memory}/json_spec.rb +3 -3
  27. data/spec/lib/backends/{file → memory}/marshal_spec.rb +3 -3
  28. data/spec/lib/backends/{file → memory}/text_spec.rb +1 -1
  29. data/spec/lib/backends/memory_spec.rb +4 -4
  30. data/spec/lib/backends/redis/{float_hash_spec.rb → float_spec.rb} +2 -2
  31. data/spec/lib/backends/redis/{list_hash_spec.rb → list_spec.rb} +2 -2
  32. data/spec/lib/backends/redis/{string_hash_spec.rb → string_spec.rb} +2 -2
  33. data/spec/lib/backends/redis_spec.rb +4 -4
  34. data/spec/lib/bundle_spec.rb +27 -0
  35. data/spec/lib/extensions/symbol_spec.rb +237 -1
  36. data/spec/lib/generators/partial/infix_spec.rb +233 -0
  37. data/spec/lib/indexed/memory_spec.rb +8 -8
  38. data/spec/lib/query/allocation_spec.rb +7 -5
  39. metadata +30 -22
@@ -2,11 +2,9 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- # Handles all aspects of index files, such as dumping/loading.
6
- #
7
- module File
5
+ class File
8
6
 
9
- # Base class for all index files.
7
+ # Base class for all file-based index files.
10
8
  #
11
9
  # Provides necessary helper methods for its
12
10
  # subclasses.
@@ -15,15 +13,19 @@ module Picky
15
13
  #
16
14
  class Basic
17
15
 
18
- # This file's location.
19
- #
20
- attr_reader :cache_path
16
+ attr_reader :cache_path, # This index file's location.
17
+ :mapping_file # The index file's mapping file (loaded into memory for quick access).
21
18
 
22
19
  # An index cache takes a path, without file extension,
23
20
  # which will be provided by the subclasses.
24
21
  #
25
22
  def initialize cache_path
26
- @cache_path = "#{cache_path}.#{extension}"
23
+ @cache_path = "#{cache_path}.file.#{extension}"
24
+
25
+ # This is the mapping file with the in-memory hash for the
26
+ # file position/offset mappings.
27
+ #
28
+ @mapping_file = Memory::JSON.new "#{cache_path}.file_mapping.#{extension}"
27
29
  end
28
30
 
29
31
  # The default extension for index files is "index".
@@ -32,78 +34,6 @@ module Picky
32
34
  :index
33
35
  end
34
36
 
35
- # Will copy the index file to a location that
36
- # is in a directory named "backup" right under
37
- # the directory the index file is in.
38
- #
39
- def backup
40
- prepare_backup backup_directory
41
- FileUtils.cp cache_path, target, verbose: true
42
- end
43
-
44
- # The backup directory of this file.
45
- # Equal to the file's dirname plus /backup
46
- #
47
-
48
- def backup_directory
49
- ::File.join ::File.dirname(cache_path), 'backup'
50
- end
51
-
52
- # Prepares the backup directory for the file.
53
- #
54
- def prepare_backup target
55
- FileUtils.mkdir target unless Dir.exists?(target)
56
- end
57
-
58
- # Copies the file from its backup location back
59
- # to the original location.
60
- #
61
- def restore
62
- FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
63
- end
64
-
65
- # The backup filename.
66
- #
67
- def backup_file_path_of path
68
- dir, name = ::File.split path
69
- ::File.join dir, 'backup', name
70
- end
71
-
72
- # Deletes the file.
73
- #
74
- def delete
75
- `rm -Rf #{cache_path}`
76
- end
77
-
78
- # Checks.
79
- #
80
-
81
- # Is this cache file suspiciously small?
82
- # (less than 8 Bytes of size)
83
- #
84
- def cache_small?
85
- size_of(cache_path) < 8
86
- end
87
- # Is the cache ok? (existing and larger than
88
- # zero Bytes in size)
89
- #
90
- # A small cache is still ok.
91
- #
92
- def cache_ok?
93
- size_of(cache_path) > 0
94
- end
95
- # Extracts the size of the file in Bytes.
96
- #
97
- def size_of path
98
- `ls -l #{path} | awk '{print $5}'`.to_i
99
- end
100
-
101
- #
102
- #
103
- def to_s
104
- "#{self.class}(#{cache_path})"
105
- end
106
-
107
37
  end
108
38
 
109
39
  end
@@ -2,40 +2,81 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- module File
5
+ class File
6
6
 
7
- # Index files dumped in the JSON format.
7
+ # File-based index files dumped in the JSON format.
8
8
  #
9
9
  class JSON < Basic
10
10
 
11
- # Uses the extension "json".
11
+ # The in-memory mapping hash, mapping
12
+ # a Symbol key to [length, offset] of
13
+ # the JSON data in the file.
12
14
  #
13
- def extension
14
- :json
15
+ attr_accessor :mapping
16
+
17
+ # See lib/picky/backends/file.rb for what this should return.
18
+ #
19
+ # 1. Gets the length and offset for the key.
20
+ # 2. Extracts and decodes the object from the file.
21
+ #
22
+ def [] key
23
+ length, offset = mapping[key]
24
+ return unless length
25
+ result = Yajl::Parser.parse IO.read(cache_path, length, offset)
26
+ result
15
27
  end
16
- # Loads the index hash from json format.
28
+
29
+ # Clears the currently loaded index.
30
+ #
31
+ # Note: This only clears the in-memory mapping,
32
+ # but this is enough for the index to not exist
33
+ # anymore, at least to the application.
34
+ #
35
+ def clear
36
+ self.mapping.clear
37
+ end
38
+
39
+ # Loads the mapping hash from json format.
17
40
  #
18
41
  def load
19
- Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
20
-
21
- # Note: Circumvents the yajl symbolize utf-8 characters problem.
22
- #
23
- # Yajl::Parser.parse(::File.open(cache_path, 'r')).inject({}) do |hash, (k, v)|
24
- # hash[k.to_sym] = v
25
- # hash
26
- # end
42
+ self.mapping = mapping_file.load
43
+ self
27
44
  end
45
+
28
46
  # Dumps the index hash in json format.
29
47
  #
48
+ # 1. Dump actual data.
49
+ # 2. Dumps mapping key => [length, offset].
50
+ #
30
51
  def dump hash
31
- hash.dump_json cache_path
52
+ offset = 0
53
+ mapping = {}
54
+
55
+ ::File.open(cache_path, 'w:utf-8') do |out_file|
56
+ hash.each do |(key, object)|
57
+ encoded = Yajl::Encoder.encode object
58
+ length = encoded.size
59
+ mapping[key] = [length, offset]
60
+ offset += length
61
+ out_file.write encoded
62
+ end
63
+ end
64
+
65
+ mapping_file.dump mapping
32
66
  end
67
+
33
68
  # A json file does not provide retrieve functionality.
34
69
  #
35
70
  def retrieve
36
71
  raise "Can't retrieve from JSON file. Use text file."
37
72
  end
38
73
 
74
+ # Uses the extension "json".
75
+ #
76
+ def extension
77
+ :json
78
+ end
79
+
39
80
  end
40
81
 
41
82
  end
@@ -0,0 +1,62 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ # Naive implementation of a file-based index.
6
+ # In-Memory Hash with length, offset:
7
+ # { :bla => [20, 312] }
8
+ # That map to positions the File, encoded in JSON:
9
+ # ...[1,2,3,21,7,4,13,15]...
10
+ #
11
+ class File < Backend
12
+
13
+ # Returns an object that responds to:
14
+ # [:token] # => [id, id, id, id, id] (an array of ids)
15
+ #
16
+ def create_inverted bundle
17
+ JSON.new bundle.index_path(:inverted)
18
+ end
19
+ # Returns an object that responds to:
20
+ # [:token] # => 1.23 (a weight)
21
+ #
22
+ def create_weights bundle
23
+ JSON.new bundle.index_path(:weights)
24
+ end
25
+ # Returns an object that responds to:
26
+ # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
27
+ #
28
+ def create_similarity bundle
29
+ JSON.new bundle.index_path(:similarity)
30
+ end
31
+ # Returns an object that responds to:
32
+ # [:key] # => value (a value for this config key)
33
+ #
34
+ def create_configuration bundle
35
+ JSON.new bundle.index_path(:configuration)
36
+ end
37
+
38
+ # Currently, the loaded ids are intersected using
39
+ # the fast C-based intersection.
40
+ #
41
+ # However, if we could come up with a clever way
42
+ # to do this faster, it would be most welcome.
43
+ #
44
+ def ids combinations, _, _
45
+ # Get the ids for each combination.
46
+ #
47
+ id_arrays = combinations.inject([]) do |total, combination|
48
+ total << combination.ids
49
+ end
50
+
51
+ # Call the optimized C algorithm.
52
+ #
53
+ # Note: It orders the passed arrays by size.
54
+ #
55
+ Performant::Array.memory_efficient_intersect id_arrays
56
+ end
57
+
58
+ end
59
+
60
+ end
61
+
62
+ end
@@ -0,0 +1,111 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class Memory
6
+
7
+ # Base class for all memory-based index files.
8
+ #
9
+ # Provides necessary helper methods for its
10
+ # subclasses.
11
+ # Not directly useable, as it does not provide
12
+ # dump/load methods.
13
+ #
14
+ class Basic
15
+
16
+ # This file's location.
17
+ #
18
+ attr_reader :cache_path
19
+
20
+ # An index cache takes a path, without file extension,
21
+ # which will be provided by the subclasses.
22
+ #
23
+ def initialize cache_path
24
+ @cache_path = "#{cache_path}.memory.#{extension}"
25
+ end
26
+
27
+ # The default extension for index files is "index".
28
+ #
29
+ def extension
30
+ :index
31
+ end
32
+
33
+ # Will copy the index file to a location that
34
+ # is in a directory named "backup" right under
35
+ # the directory the index file is in.
36
+ #
37
+ def backup
38
+ prepare_backup backup_directory
39
+ FileUtils.cp cache_path, target, verbose: true
40
+ end
41
+
42
+ # The backup directory of this file.
43
+ # Equal to the file's dirname plus /backup
44
+ #
45
+
46
+ def backup_directory
47
+ ::File.join ::File.dirname(cache_path), 'backup'
48
+ end
49
+
50
+ # Prepares the backup directory for the file.
51
+ #
52
+ def prepare_backup target
53
+ FileUtils.mkdir target unless Dir.exists?(target)
54
+ end
55
+
56
+ # Copies the file from its backup location back
57
+ # to the original location.
58
+ #
59
+ def restore
60
+ FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
61
+ end
62
+
63
+ # The backup filename.
64
+ #
65
+ def backup_file_path_of path
66
+ dir, name = ::File.split path
67
+ ::File.join dir, 'backup', name
68
+ end
69
+
70
+ # Deletes the file.
71
+ #
72
+ def delete
73
+ `rm -Rf #{cache_path}`
74
+ end
75
+
76
+ # Checks.
77
+ #
78
+
79
+ # Is this cache file suspiciously small?
80
+ # (less than 8 Bytes of size)
81
+ #
82
+ def cache_small?
83
+ size_of(cache_path) < 8
84
+ end
85
+ # Is the cache ok? (existing and larger than
86
+ # zero Bytes in size)
87
+ #
88
+ # A small cache is still ok.
89
+ #
90
+ def cache_ok?
91
+ size_of(cache_path) > 0
92
+ end
93
+ # Extracts the size of the file in Bytes.
94
+ #
95
+ def size_of path
96
+ `ls -l #{path} | awk '{print $5}'`.to_i
97
+ end
98
+
99
+ #
100
+ #
101
+ def to_s
102
+ "#{self.class}(#{cache_path})"
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+
111
+ end
@@ -0,0 +1,41 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class Memory
6
+
7
+ # Memory-based index files dumped in the JSON format.
8
+ #
9
+ class JSON < Basic
10
+
11
+ # Uses the extension "json".
12
+ #
13
+ def extension
14
+ :json
15
+ end
16
+
17
+ # Loads the index hash from json format.
18
+ #
19
+ def load
20
+ Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
21
+ end
22
+
23
+ # Dumps the index hash in json format.
24
+ #
25
+ def dump hash
26
+ hash.dump_json cache_path
27
+ end
28
+
29
+ # A json file does not provide retrieve functionality.
30
+ #
31
+ def retrieve
32
+ raise "Can't retrieve from JSON file. Use text file."
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -2,7 +2,7 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- module File
5
+ class Memory
6
6
 
7
7
  # Index data in the Ruby Marshal format.
8
8
  #
@@ -13,16 +13,19 @@ module Picky
13
13
  def extension
14
14
  :dump
15
15
  end
16
+
16
17
  # Loads the index hash from marshal format.
17
18
  #
18
19
  def load
19
20
  ::Marshal.load ::File.open(cache_path, 'r:binary')
20
21
  end
22
+
21
23
  # Dumps the index hash in marshal format.
22
24
  #
23
25
  def dump hash
24
26
  hash.dump_marshal cache_path
25
27
  end
28
+
26
29
  # A marshal file does not provide retrieve functionality.
27
30
  #
28
31
  def retrieve
@@ -2,10 +2,12 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- module File
5
+ class Memory
6
6
 
7
7
  # Index data dumped in the text format.
8
8
  #
9
+ # TODO Should this really be Memory::Text?
10
+ #
9
11
  class Text < Basic
10
12
 
11
13
  # Uses the extension "txt".
@@ -13,12 +15,14 @@ module Picky
13
15
  def extension
14
16
  :txt
15
17
  end
18
+
16
19
  # Text files are used exclusively for
17
20
  # prepared data files.
18
21
  #
19
22
  def load
20
23
  raise "Can't load from text file. Use JSON or Marshal."
21
24
  end
25
+
22
26
  # Text files are used exclusively for
23
27
  # prepared data files.
24
28
  #
@@ -4,17 +4,29 @@ module Picky
4
4
 
5
5
  class Memory < Backend
6
6
 
7
+ # Returns an object that responds to:
8
+ # [:token] # => [id, id, id, id, id] (an array of ids)
9
+ #
7
10
  def create_inverted bundle
8
- File::JSON.new bundle.index_path(:inverted)
11
+ JSON.new bundle.index_path(:inverted)
9
12
  end
13
+ # Returns an object that responds to:
14
+ # [:token] # => 1.23 (a weight)
15
+ #
10
16
  def create_weights bundle
11
- File::JSON.new bundle.index_path(:weights)
17
+ JSON.new bundle.index_path(:weights)
12
18
  end
19
+ # Returns an object that responds to:
20
+ # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
21
+ #
13
22
  def create_similarity bundle
14
- File::Marshal.new bundle.index_path(:similarity)
23
+ Marshal.new bundle.index_path(:similarity)
15
24
  end
25
+ # Returns an object that responds to:
26
+ # [:key] # => value (a value for this config key)
27
+ #
16
28
  def create_configuration bundle
17
- File::JSON.new bundle.index_path(:configuration)
29
+ JSON.new bundle.index_path(:configuration)
18
30
  end
19
31
 
20
32
  # Returns the result ids for the allocation.
@@ -31,8 +43,6 @@ module Picky
31
43
  # We cannot use the information to speed up the algorithm, unfortunately.
32
44
  #
33
45
  def ids combinations, _, _
34
- return [] if combinations.empty?
35
-
36
46
  # Get the ids for each combination.
37
47
  #
38
48
  id_arrays = combinations.inject([]) do |total, combination|
@@ -4,7 +4,7 @@ module Picky
4
4
 
5
5
  class Redis
6
6
 
7
- class FloatHash < StringHash
7
+ class Float < String
8
8
 
9
9
  # Get a single value.
10
10
  #
@@ -4,7 +4,7 @@ module Picky
4
4
 
5
5
  class Redis
6
6
 
7
- class ListHash < Basic
7
+ class List < Basic
8
8
 
9
9
  # Writes the hash into Redis.
10
10
  #
@@ -4,7 +4,7 @@ module Picky
4
4
 
5
5
  class Redis
6
6
 
7
- class StringHash < Basic
7
+ class String < Basic
8
8
 
9
9
  # Writes the hash into Redis.
10
10
  #
@@ -12,17 +12,29 @@ module Picky
12
12
  @client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
13
13
  end
14
14
 
15
+ # Returns an object that responds to:
16
+ # [:token] # => [id, id, id, id, id] (an array of ids)
17
+ #
15
18
  def create_inverted bundle
16
- Redis::ListHash.new client, "#{bundle.identifier}:inverted"
19
+ List.new client, "#{bundle.identifier}:inverted"
17
20
  end
21
+ # Returns an object that responds to:
22
+ # [:token] # => 1.23 (a weight)
23
+ #
18
24
  def create_weights bundle
19
- Redis::FloatHash.new client, "#{bundle.identifier}:weights"
25
+ Float.new client, "#{bundle.identifier}:weights"
20
26
  end
27
+ # Returns an object that responds to:
28
+ # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
29
+ #
21
30
  def create_similarity bundle
22
- Redis::ListHash.new client, "#{bundle.identifier}:similarity"
31
+ List.new client, "#{bundle.identifier}:similarity"
23
32
  end
33
+ # Returns an object that responds to:
34
+ # [:key] # => value (a value for this config key)
35
+ #
24
36
  def create_configuration bundle
25
- Redis::StringHash.new client, "#{bundle.identifier}:configuration"
37
+ String.new client, "#{bundle.identifier}:configuration"
26
38
  end
27
39
 
28
40
  # Returns the result ids for the allocation.
@@ -34,8 +46,6 @@ module Picky
34
46
  # Note: We use the amount and offset hints to speed Redis up.
35
47
  #
36
48
  def ids combinations, amount, offset
37
- return [] if combinations.empty?
38
-
39
49
  identifiers = combinations.inject([]) do |identifiers, combination|
40
50
  identifiers << "#{combination.identifier}"
41
51
  end
data/lib/picky/bundle.rb CHANGED
@@ -84,8 +84,11 @@ module Picky
84
84
  # * partial index
85
85
  # * similarity index
86
86
  #
87
- def index_path type
88
- ::File.join index_directory, "#{category.name}_#{name}_#{type}"
87
+ # Returns just the part without subindex type,
88
+ # if none given.
89
+ #
90
+ def index_path type = nil
91
+ ::File.join index_directory, "#{category.name}_#{name}#{ "_#{type}" if type }"
89
92
  end
90
93
 
91
94
  # Copies the indexes to the "backup" directory.
@@ -92,7 +92,7 @@ module Picky
92
92
  # Note: If you don't use it with the block, do not forget to close it.
93
93
  #
94
94
  def prepared_index_file &block
95
- @prepared_index_file ||= Backends::File::Text.new prepared_index_path
95
+ @prepared_index_file ||= Backends::Memory::Text.new prepared_index_path
96
96
  @prepared_index_file.open &block
97
97
  end
98
98
  # Creates the index directory including all necessary paths above it.
data/lib/picky/cores.rb CHANGED
@@ -23,6 +23,13 @@ module Picky
23
23
  ary_or_generator = ary_or_generator.sort_by { rand } if options[:randomly]
24
24
  generator = ary_or_generator.each
25
25
 
26
+ # Don't fork if there's just one element.
27
+ #
28
+ if generator.inject(0) { |total, element| total + 1 } == 1
29
+ yield generator.next
30
+ return
31
+ end
32
+
26
33
  # Get the maximum number of processors.
27
34
  #
28
35
  max = max_processors options
@@ -17,4 +17,26 @@ class Symbol # :nodoc:all
17
17
  size.downto(from_length + 1) { yield sub.chop!.intern }
18
18
  end
19
19
 
20
+ # :keys.each_intoken # => yields each of [:keys, :key, :eys, :ke, :ey, :ys, :k, :e, :y, :s]
21
+ # :keys.each_intoken(2) # => yields each of [:keys, :key, :eys, :ke, :ey, :ys]
22
+ # :keys.each_intoken(2, 3) # => yields each of [:keys, :key, :eys]
23
+ # :keys.each_intoken(10, 12) # => yields nothing (min larger than sym)
24
+ #
25
+ def each_intoken min_length = 1, max_length = -1
26
+ max_length = size + max_length + 1 if max_length < 0
27
+ max_length = size if size < max_length
28
+ max_length = 1 if max_length < 1
29
+
30
+ min_length = size + min_length + 1 if min_length < 0
31
+ min_length = 1 if min_length < 1
32
+
33
+ this_many = size - max_length + 1
34
+ max_length.downto(min_length) do |length|
35
+ this_many.times do |offset|
36
+ yield self[offset, length].intern
37
+ end
38
+ this_many += 1
39
+ end
40
+ end
41
+
20
42
  end