picky 3.1.0 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/lib/picky/backends/file/basic.rb +10 -80
  2. data/lib/picky/backends/file/json.rb +56 -15
  3. data/lib/picky/backends/file.rb +62 -0
  4. data/lib/picky/backends/memory/basic.rb +111 -0
  5. data/lib/picky/backends/memory/json.rb +41 -0
  6. data/lib/picky/backends/{file → memory}/marshal.rb +4 -1
  7. data/lib/picky/backends/{file → memory}/text.rb +5 -1
  8. data/lib/picky/backends/memory.rb +16 -6
  9. data/lib/picky/backends/redis/{float_hash.rb → float.rb} +1 -1
  10. data/lib/picky/backends/redis/{list_hash.rb → list.rb} +1 -1
  11. data/lib/picky/backends/redis/{string_hash.rb → string.rb} +1 -1
  12. data/lib/picky/backends/redis.rb +16 -6
  13. data/lib/picky/bundle.rb +5 -2
  14. data/lib/picky/category.rb +1 -1
  15. data/lib/picky/cores.rb +7 -0
  16. data/lib/picky/extensions/symbol.rb +22 -0
  17. data/lib/picky/generators/partial/infix.rb +93 -0
  18. data/lib/picky/generators/partial/substring.rb +2 -2
  19. data/lib/picky/indexes_indexing.rb +2 -0
  20. data/lib/picky/indexing/bundle.rb +1 -1
  21. data/lib/picky/loader.rb +11 -6
  22. data/lib/picky/migrations/from_30_to_31.rb +2 -2
  23. data/lib/picky/query/allocation.rb +1 -0
  24. data/lib/picky/query/combinations.rb +3 -1
  25. data/spec/lib/backends/{file → memory}/basic_spec.rb +3 -3
  26. data/spec/lib/backends/{file → memory}/json_spec.rb +3 -3
  27. data/spec/lib/backends/{file → memory}/marshal_spec.rb +3 -3
  28. data/spec/lib/backends/{file → memory}/text_spec.rb +1 -1
  29. data/spec/lib/backends/memory_spec.rb +4 -4
  30. data/spec/lib/backends/redis/{float_hash_spec.rb → float_spec.rb} +2 -2
  31. data/spec/lib/backends/redis/{list_hash_spec.rb → list_spec.rb} +2 -2
  32. data/spec/lib/backends/redis/{string_hash_spec.rb → string_spec.rb} +2 -2
  33. data/spec/lib/backends/redis_spec.rb +4 -4
  34. data/spec/lib/bundle_spec.rb +27 -0
  35. data/spec/lib/extensions/symbol_spec.rb +237 -1
  36. data/spec/lib/generators/partial/infix_spec.rb +233 -0
  37. data/spec/lib/indexed/memory_spec.rb +8 -8
  38. data/spec/lib/query/allocation_spec.rb +7 -5
  39. metadata +30 -22
@@ -2,11 +2,9 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- # Handles all aspects of index files, such as dumping/loading.
6
- #
7
- module File
5
+ class File
8
6
 
9
- # Base class for all index files.
7
+ # Base class for all file-based index files.
10
8
  #
11
9
  # Provides necessary helper methods for its
12
10
  # subclasses.
@@ -15,15 +13,19 @@ module Picky
15
13
  #
16
14
  class Basic
17
15
 
18
- # This file's location.
19
- #
20
- attr_reader :cache_path
16
+ attr_reader :cache_path, # This index file's location.
17
+ :mapping_file # The index file's mapping file (loaded into memory for quick access).
21
18
 
22
19
  # An index cache takes a path, without file extension,
23
20
  # which will be provided by the subclasses.
24
21
  #
25
22
  def initialize cache_path
26
- @cache_path = "#{cache_path}.#{extension}"
23
+ @cache_path = "#{cache_path}.file.#{extension}"
24
+
25
+ # This is the mapping file with the in-memory hash for the
26
+ # file position/offset mappings.
27
+ #
28
+ @mapping_file = Memory::JSON.new "#{cache_path}.file_mapping.#{extension}"
27
29
  end
28
30
 
29
31
  # The default extension for index files is "index".
@@ -32,78 +34,6 @@ module Picky
32
34
  :index
33
35
  end
34
36
 
35
- # Will copy the index file to a location that
36
- # is in a directory named "backup" right under
37
- # the directory the index file is in.
38
- #
39
- def backup
40
- prepare_backup backup_directory
41
- FileUtils.cp cache_path, target, verbose: true
42
- end
43
-
44
- # The backup directory of this file.
45
- # Equal to the file's dirname plus /backup
46
- #
47
-
48
- def backup_directory
49
- ::File.join ::File.dirname(cache_path), 'backup'
50
- end
51
-
52
- # Prepares the backup directory for the file.
53
- #
54
- def prepare_backup target
55
- FileUtils.mkdir target unless Dir.exists?(target)
56
- end
57
-
58
- # Copies the file from its backup location back
59
- # to the original location.
60
- #
61
- def restore
62
- FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
63
- end
64
-
65
- # The backup filename.
66
- #
67
- def backup_file_path_of path
68
- dir, name = ::File.split path
69
- ::File.join dir, 'backup', name
70
- end
71
-
72
- # Deletes the file.
73
- #
74
- def delete
75
- `rm -Rf #{cache_path}`
76
- end
77
-
78
- # Checks.
79
- #
80
-
81
- # Is this cache file suspiciously small?
82
- # (less than 8 Bytes of size)
83
- #
84
- def cache_small?
85
- size_of(cache_path) < 8
86
- end
87
- # Is the cache ok? (existing and larger than
88
- # zero Bytes in size)
89
- #
90
- # A small cache is still ok.
91
- #
92
- def cache_ok?
93
- size_of(cache_path) > 0
94
- end
95
- # Extracts the size of the file in Bytes.
96
- #
97
- def size_of path
98
- `ls -l #{path} | awk '{print $5}'`.to_i
99
- end
100
-
101
- #
102
- #
103
- def to_s
104
- "#{self.class}(#{cache_path})"
105
- end
106
-
107
37
  end
108
38
 
109
39
  end
@@ -2,40 +2,81 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- module File
5
+ class File
6
6
 
7
- # Index files dumped in the JSON format.
7
+ # File-based index files dumped in the JSON format.
8
8
  #
9
9
  class JSON < Basic
10
10
 
11
- # Uses the extension "json".
11
+ # The in-memory mapping hash, mapping
12
+ # a Symbol key to [length, offset] of
13
+ # the JSON data in the file.
12
14
  #
13
- def extension
14
- :json
15
+ attr_accessor :mapping
16
+
17
+ # See lib/picky/backends/file.rb for what this should return.
18
+ #
19
+ # 1. Gets the length and offset for the key.
20
+ # 2. Extracts and decodes the object from the file.
21
+ #
22
+ def [] key
23
+ length, offset = mapping[key]
24
+ return unless length
25
+ result = Yajl::Parser.parse IO.read(cache_path, length, offset)
26
+ result
15
27
  end
16
- # Loads the index hash from json format.
28
+
29
+ # Clears the currently loaded index.
30
+ #
31
+ # Note: This only clears the in-memory mapping,
32
+ # but this is enough for the index to not exist
33
+ # anymore, at least to the application.
34
+ #
35
+ def clear
36
+ self.mapping.clear
37
+ end
38
+
39
+ # Loads the mapping hash from json format.
17
40
  #
18
41
  def load
19
- Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
20
-
21
- # Note: Circumvents the yajl symbolize utf-8 characters problem.
22
- #
23
- # Yajl::Parser.parse(::File.open(cache_path, 'r')).inject({}) do |hash, (k, v)|
24
- # hash[k.to_sym] = v
25
- # hash
26
- # end
42
+ self.mapping = mapping_file.load
43
+ self
27
44
  end
45
+
28
46
  # Dumps the index hash in json format.
29
47
  #
48
+ # 1. Dump actual data.
49
+ # 2. Dumps mapping key => [length, offset].
50
+ #
30
51
  def dump hash
31
- hash.dump_json cache_path
52
+ offset = 0
53
+ mapping = {}
54
+
55
+ ::File.open(cache_path, 'w:utf-8') do |out_file|
56
+ hash.each do |(key, object)|
57
+ encoded = Yajl::Encoder.encode object
58
+ length = encoded.size
59
+ mapping[key] = [length, offset]
60
+ offset += length
61
+ out_file.write encoded
62
+ end
63
+ end
64
+
65
+ mapping_file.dump mapping
32
66
  end
67
+
33
68
  # A json file does not provide retrieve functionality.
34
69
  #
35
70
  def retrieve
36
71
  raise "Can't retrieve from JSON file. Use text file."
37
72
  end
38
73
 
74
+ # Uses the extension "json".
75
+ #
76
+ def extension
77
+ :json
78
+ end
79
+
39
80
  end
40
81
 
41
82
  end
@@ -0,0 +1,62 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ # Naive implementation of a file-based index.
6
+ # In-Memory Hash with length, offset:
7
+ # { :bla => [20, 312] }
8
+ # That map to positions the File, encoded in JSON:
9
+ # ...[1,2,3,21,7,4,13,15]...
10
+ #
11
+ class File < Backend
12
+
13
+ # Returns an object that responds to:
14
+ # [:token] # => [id, id, id, id, id] (an array of ids)
15
+ #
16
+ def create_inverted bundle
17
+ JSON.new bundle.index_path(:inverted)
18
+ end
19
+ # Returns an object that responds to:
20
+ # [:token] # => 1.23 (a weight)
21
+ #
22
+ def create_weights bundle
23
+ JSON.new bundle.index_path(:weights)
24
+ end
25
+ # Returns an object that responds to:
26
+ # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
27
+ #
28
+ def create_similarity bundle
29
+ JSON.new bundle.index_path(:similarity)
30
+ end
31
+ # Returns an object that responds to:
32
+ # [:key] # => value (a value for this config key)
33
+ #
34
+ def create_configuration bundle
35
+ JSON.new bundle.index_path(:configuration)
36
+ end
37
+
38
+ # Currently, the loaded ids are intersected using
39
+ # the fast C-based intersection.
40
+ #
41
+ # However, if we could come up with a clever way
42
+ # to do this faster, it would be most welcome.
43
+ #
44
+ def ids combinations, _, _
45
+ # Get the ids for each combination.
46
+ #
47
+ id_arrays = combinations.inject([]) do |total, combination|
48
+ total << combination.ids
49
+ end
50
+
51
+ # Call the optimized C algorithm.
52
+ #
53
+ # Note: It orders the passed arrays by size.
54
+ #
55
+ Performant::Array.memory_efficient_intersect id_arrays
56
+ end
57
+
58
+ end
59
+
60
+ end
61
+
62
+ end
@@ -0,0 +1,111 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class Memory
6
+
7
+ # Base class for all memory-based index files.
8
+ #
9
+ # Provides necessary helper methods for its
10
+ # subclasses.
11
+ # Not directly useable, as it does not provide
12
+ # dump/load methods.
13
+ #
14
+ class Basic
15
+
16
+ # This file's location.
17
+ #
18
+ attr_reader :cache_path
19
+
20
+ # An index cache takes a path, without file extension,
21
+ # which will be provided by the subclasses.
22
+ #
23
+ def initialize cache_path
24
+ @cache_path = "#{cache_path}.memory.#{extension}"
25
+ end
26
+
27
+ # The default extension for index files is "index".
28
+ #
29
+ def extension
30
+ :index
31
+ end
32
+
33
+ # Will copy the index file to a location that
34
+ # is in a directory named "backup" right under
35
+ # the directory the index file is in.
36
+ #
37
+ def backup
38
+ prepare_backup backup_directory
39
+ FileUtils.cp cache_path, target, verbose: true
40
+ end
41
+
42
+ # The backup directory of this file.
43
+ # Equal to the file's dirname plus /backup
44
+ #
45
+
46
+ def backup_directory
47
+ ::File.join ::File.dirname(cache_path), 'backup'
48
+ end
49
+
50
+ # Prepares the backup directory for the file.
51
+ #
52
+ def prepare_backup target
53
+ FileUtils.mkdir target unless Dir.exists?(target)
54
+ end
55
+
56
+ # Copies the file from its backup location back
57
+ # to the original location.
58
+ #
59
+ def restore
60
+ FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
61
+ end
62
+
63
+ # The backup filename.
64
+ #
65
+ def backup_file_path_of path
66
+ dir, name = ::File.split path
67
+ ::File.join dir, 'backup', name
68
+ end
69
+
70
+ # Deletes the file.
71
+ #
72
+ def delete
73
+ `rm -Rf #{cache_path}`
74
+ end
75
+
76
+ # Checks.
77
+ #
78
+
79
+ # Is this cache file suspiciously small?
80
+ # (less than 8 Bytes of size)
81
+ #
82
+ def cache_small?
83
+ size_of(cache_path) < 8
84
+ end
85
+ # Is the cache ok? (existing and larger than
86
+ # zero Bytes in size)
87
+ #
88
+ # A small cache is still ok.
89
+ #
90
+ def cache_ok?
91
+ size_of(cache_path) > 0
92
+ end
93
+ # Extracts the size of the file in Bytes.
94
+ #
95
+ def size_of path
96
+ `ls -l #{path} | awk '{print $5}'`.to_i
97
+ end
98
+
99
+ #
100
+ #
101
+ def to_s
102
+ "#{self.class}(#{cache_path})"
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+
111
+ end
@@ -0,0 +1,41 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class Memory
6
+
7
+ # Memory-based index files dumped in the JSON format.
8
+ #
9
+ class JSON < Basic
10
+
11
+ # Uses the extension "json".
12
+ #
13
+ def extension
14
+ :json
15
+ end
16
+
17
+ # Loads the index hash from json format.
18
+ #
19
+ def load
20
+ Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
21
+ end
22
+
23
+ # Dumps the index hash in json format.
24
+ #
25
+ def dump hash
26
+ hash.dump_json cache_path
27
+ end
28
+
29
+ # A json file does not provide retrieve functionality.
30
+ #
31
+ def retrieve
32
+ raise "Can't retrieve from JSON file. Use text file."
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -2,7 +2,7 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- module File
5
+ class Memory
6
6
 
7
7
  # Index data in the Ruby Marshal format.
8
8
  #
@@ -13,16 +13,19 @@ module Picky
13
13
  def extension
14
14
  :dump
15
15
  end
16
+
16
17
  # Loads the index hash from marshal format.
17
18
  #
18
19
  def load
19
20
  ::Marshal.load ::File.open(cache_path, 'r:binary')
20
21
  end
22
+
21
23
  # Dumps the index hash in marshal format.
22
24
  #
23
25
  def dump hash
24
26
  hash.dump_marshal cache_path
25
27
  end
28
+
26
29
  # A marshal file does not provide retrieve functionality.
27
30
  #
28
31
  def retrieve
@@ -2,10 +2,12 @@ module Picky
2
2
 
3
3
  module Backends
4
4
 
5
- module File
5
+ class Memory
6
6
 
7
7
  # Index data dumped in the text format.
8
8
  #
9
+ # TODO Should this really be Memory::Text?
10
+ #
9
11
  class Text < Basic
10
12
 
11
13
  # Uses the extension "txt".
@@ -13,12 +15,14 @@ module Picky
13
15
  def extension
14
16
  :txt
15
17
  end
18
+
16
19
  # Text files are used exclusively for
17
20
  # prepared data files.
18
21
  #
19
22
  def load
20
23
  raise "Can't load from text file. Use JSON or Marshal."
21
24
  end
25
+
22
26
  # Text files are used exclusively for
23
27
  # prepared data files.
24
28
  #
@@ -4,17 +4,29 @@ module Picky
4
4
 
5
5
  class Memory < Backend
6
6
 
7
+ # Returns an object that responds to:
8
+ # [:token] # => [id, id, id, id, id] (an array of ids)
9
+ #
7
10
  def create_inverted bundle
8
- File::JSON.new bundle.index_path(:inverted)
11
+ JSON.new bundle.index_path(:inverted)
9
12
  end
13
+ # Returns an object that responds to:
14
+ # [:token] # => 1.23 (a weight)
15
+ #
10
16
  def create_weights bundle
11
- File::JSON.new bundle.index_path(:weights)
17
+ JSON.new bundle.index_path(:weights)
12
18
  end
19
+ # Returns an object that responds to:
20
+ # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
21
+ #
13
22
  def create_similarity bundle
14
- File::Marshal.new bundle.index_path(:similarity)
23
+ Marshal.new bundle.index_path(:similarity)
15
24
  end
25
+ # Returns an object that responds to:
26
+ # [:key] # => value (a value for this config key)
27
+ #
16
28
  def create_configuration bundle
17
- File::JSON.new bundle.index_path(:configuration)
29
+ JSON.new bundle.index_path(:configuration)
18
30
  end
19
31
 
20
32
  # Returns the result ids for the allocation.
@@ -31,8 +43,6 @@ module Picky
31
43
  # We cannot use the information to speed up the algorithm, unfortunately.
32
44
  #
33
45
  def ids combinations, _, _
34
- return [] if combinations.empty?
35
-
36
46
  # Get the ids for each combination.
37
47
  #
38
48
  id_arrays = combinations.inject([]) do |total, combination|
@@ -4,7 +4,7 @@ module Picky
4
4
 
5
5
  class Redis
6
6
 
7
- class FloatHash < StringHash
7
+ class Float < String
8
8
 
9
9
  # Get a single value.
10
10
  #
@@ -4,7 +4,7 @@ module Picky
4
4
 
5
5
  class Redis
6
6
 
7
- class ListHash < Basic
7
+ class List < Basic
8
8
 
9
9
  # Writes the hash into Redis.
10
10
  #
@@ -4,7 +4,7 @@ module Picky
4
4
 
5
5
  class Redis
6
6
 
7
- class StringHash < Basic
7
+ class String < Basic
8
8
 
9
9
  # Writes the hash into Redis.
10
10
  #
@@ -12,17 +12,29 @@ module Picky
12
12
  @client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
13
13
  end
14
14
 
15
+ # Returns an object that responds to:
16
+ # [:token] # => [id, id, id, id, id] (an array of ids)
17
+ #
15
18
  def create_inverted bundle
16
- Redis::ListHash.new client, "#{bundle.identifier}:inverted"
19
+ List.new client, "#{bundle.identifier}:inverted"
17
20
  end
21
+ # Returns an object that responds to:
22
+ # [:token] # => 1.23 (a weight)
23
+ #
18
24
  def create_weights bundle
19
- Redis::FloatHash.new client, "#{bundle.identifier}:weights"
25
+ Float.new client, "#{bundle.identifier}:weights"
20
26
  end
27
+ # Returns an object that responds to:
28
+ # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
29
+ #
21
30
  def create_similarity bundle
22
- Redis::ListHash.new client, "#{bundle.identifier}:similarity"
31
+ List.new client, "#{bundle.identifier}:similarity"
23
32
  end
33
+ # Returns an object that responds to:
34
+ # [:key] # => value (a value for this config key)
35
+ #
24
36
  def create_configuration bundle
25
- Redis::StringHash.new client, "#{bundle.identifier}:configuration"
37
+ String.new client, "#{bundle.identifier}:configuration"
26
38
  end
27
39
 
28
40
  # Returns the result ids for the allocation.
@@ -34,8 +46,6 @@ module Picky
34
46
  # Note: We use the amount and offset hints to speed Redis up.
35
47
  #
36
48
  def ids combinations, amount, offset
37
- return [] if combinations.empty?
38
-
39
49
  identifiers = combinations.inject([]) do |identifiers, combination|
40
50
  identifiers << "#{combination.identifier}"
41
51
  end
data/lib/picky/bundle.rb CHANGED
@@ -84,8 +84,11 @@ module Picky
84
84
  # * partial index
85
85
  # * similarity index
86
86
  #
87
- def index_path type
88
- ::File.join index_directory, "#{category.name}_#{name}_#{type}"
87
+ # Returns just the part without subindex type,
88
+ # if none given.
89
+ #
90
+ def index_path type = nil
91
+ ::File.join index_directory, "#{category.name}_#{name}#{ "_#{type}" if type }"
89
92
  end
90
93
 
91
94
  # Copies the indexes to the "backup" directory.
@@ -92,7 +92,7 @@ module Picky
92
92
  # Note: If you don't use it with the block, do not forget to close it.
93
93
  #
94
94
  def prepared_index_file &block
95
- @prepared_index_file ||= Backends::File::Text.new prepared_index_path
95
+ @prepared_index_file ||= Backends::Memory::Text.new prepared_index_path
96
96
  @prepared_index_file.open &block
97
97
  end
98
98
  # Creates the index directory including all necessary paths above it.
data/lib/picky/cores.rb CHANGED
@@ -23,6 +23,13 @@ module Picky
23
23
  ary_or_generator = ary_or_generator.sort_by { rand } if options[:randomly]
24
24
  generator = ary_or_generator.each
25
25
 
26
+ # Don't fork if there's just one element.
27
+ #
28
+ if generator.inject(0) { |total, element| total + 1 } == 1
29
+ yield generator.next
30
+ return
31
+ end
32
+
26
33
  # Get the maximum number of processors.
27
34
  #
28
35
  max = max_processors options
@@ -17,4 +17,26 @@ class Symbol # :nodoc:all
17
17
  size.downto(from_length + 1) { yield sub.chop!.intern }
18
18
  end
19
19
 
20
+ # :keys.each_intoken # => yields each of [:keys, :key, :eys, :ke, :ey, :ys, :k, :e, :y, :s]
21
+ # :keys.each_intoken(2) # => yields each of [:keys, :key, :eys, :ke, :ey, :ys]
22
+ # :keys.each_intoken(2, 3) # => yields each of [:keys, :key, :eys]
23
+ # :keys.each_intoken(10, 12) # => yields nothing (min larger than sym)
24
+ #
25
+ def each_intoken min_length = 1, max_length = -1
26
+ max_length = size + max_length + 1 if max_length < 0
27
+ max_length = size if size < max_length
28
+ max_length = 1 if max_length < 1
29
+
30
+ min_length = size + min_length + 1 if min_length < 0
31
+ min_length = 1 if min_length < 1
32
+
33
+ this_many = size - max_length + 1
34
+ max_length.downto(min_length) do |length|
35
+ this_many.times do |offset|
36
+ yield self[offset, length].intern
37
+ end
38
+ this_many += 1
39
+ end
40
+ end
41
+
20
42
  end