RubyGems - picky - Versions diffs - 3.1.0 → 3.1.1 - Mend

picky 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

data/lib/picky/backends/file/basic.rb +10 -80
data/lib/picky/backends/file/json.rb +56 -15
data/lib/picky/backends/file.rb +62 -0
data/lib/picky/backends/memory/basic.rb +111 -0
data/lib/picky/backends/memory/json.rb +41 -0
data/lib/picky/backends/{file → memory}/marshal.rb +4 -1
data/lib/picky/backends/{file → memory}/text.rb +5 -1
data/lib/picky/backends/memory.rb +16 -6
data/lib/picky/backends/redis/{float_hash.rb → float.rb} +1 -1
data/lib/picky/backends/redis/{list_hash.rb → list.rb} +1 -1
data/lib/picky/backends/redis/{string_hash.rb → string.rb} +1 -1
data/lib/picky/backends/redis.rb +16 -6
data/lib/picky/bundle.rb +5 -2
data/lib/picky/category.rb +1 -1
data/lib/picky/cores.rb +7 -0
data/lib/picky/extensions/symbol.rb +22 -0
data/lib/picky/generators/partial/infix.rb +93 -0
data/lib/picky/generators/partial/substring.rb +2 -2
data/lib/picky/indexes_indexing.rb +2 -0
data/lib/picky/indexing/bundle.rb +1 -1
data/lib/picky/loader.rb +11 -6
data/lib/picky/migrations/from_30_to_31.rb +2 -2
data/lib/picky/query/allocation.rb +1 -0
data/lib/picky/query/combinations.rb +3 -1
data/spec/lib/backends/{file → memory}/basic_spec.rb +3 -3
data/spec/lib/backends/{file → memory}/json_spec.rb +3 -3
data/spec/lib/backends/{file → memory}/marshal_spec.rb +3 -3
data/spec/lib/backends/{file → memory}/text_spec.rb +1 -1
data/spec/lib/backends/memory_spec.rb +4 -4
data/spec/lib/backends/redis/{float_hash_spec.rb → float_spec.rb} +2 -2
data/spec/lib/backends/redis/{list_hash_spec.rb → list_spec.rb} +2 -2
data/spec/lib/backends/redis/{string_hash_spec.rb → string_spec.rb} +2 -2
data/spec/lib/backends/redis_spec.rb +4 -4
data/spec/lib/bundle_spec.rb +27 -0
data/spec/lib/extensions/symbol_spec.rb +237 -1
data/spec/lib/generators/partial/infix_spec.rb +233 -0
data/spec/lib/indexed/memory_spec.rb +8 -8
data/spec/lib/query/allocation_spec.rb +7 -5
metadata +30 -22

data/lib/picky/backends/file/basic.rb CHANGED Viewed

@@ -2,11 +2,9 @@ module Picky
   module Backends
-    # Handles all aspects of index files, such as dumping/loading.
-    #
-    module File
+    class File
-      # Base class for all index files.
+      # Base class for all file-based index files.
       #
       # Provides necessary helper methods for its
       # subclasses.
@@ -15,15 +13,19 @@ module Picky
       #
       class Basic
-        # This file's location.
-        #
-        attr_reader :cache_path
+        attr_reader :cache_path,  # This index file's location.
+                    :mapping_file # The index file's mapping file (loaded into memory for quick access).
         # An index cache takes a path, without file extension,
         # which will be provided by the subclasses.
         #
         def initialize cache_path
-          @cache_path = "#{cache_path}.#{extension}"
+          @cache_path = "#{cache_path}.file.#{extension}"
+          # This is the mapping file with the in-memory hash for the
+          # file position/offset mappings.
+          #
+          @mapping_file = Memory::JSON.new "#{cache_path}.file_mapping.#{extension}"
         end
         # The default extension for index files is "index".
@@ -32,78 +34,6 @@ module Picky
           :index
         end
-        # Will copy the index file to a location that
-        # is in a directory named "backup" right under
-        # the directory the index file is in.
-        #
-        def backup
-          prepare_backup backup_directory
-          FileUtils.cp cache_path, target, verbose: true
-        end
-        # The backup directory of this file.
-        # Equal to the file's dirname plus /backup
-        #
-        def backup_directory
-          ::File.join ::File.dirname(cache_path), 'backup'
-        end
-        # Prepares the backup directory for the file.
-        #
-        def prepare_backup target
-          FileUtils.mkdir target unless Dir.exists?(target)
-        end
-        # Copies the file from its backup location back
-        # to the original location.
-        #
-        def restore
-          FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
-        end
-        # The backup filename.
-        #
-        def backup_file_path_of path
-          dir, name = ::File.split path
-          ::File.join dir, 'backup', name
-        end
-        # Deletes the file.
-        #
-        def delete
-          `rm -Rf #{cache_path}`
-        end
-        # Checks.
-        #
-        # Is this cache file suspiciously small?
-        # (less than 8 Bytes of size)
-        #
-        def cache_small?
-          size_of(cache_path) < 8
-        end
-        # Is the cache ok? (existing and larger than
-        # zero Bytes in size)
-        #
-        # A small cache is still ok.
-        #
-        def cache_ok?
-          size_of(cache_path) > 0
-        end
-        # Extracts the size of the file in Bytes.
-        #
-        def size_of path
-          `ls -l #{path} | awk '{print $5}'`.to_i
-        end
-        #
-        #
-        def to_s
-          "#{self.class}(#{cache_path})"
-        end
       end
     end

data/lib/picky/backends/file/json.rb CHANGED Viewed

@@ -2,40 +2,81 @@ module Picky
   module Backends
-    module File
+    class File
-      # Index files dumped in the JSON format.
+      # File-based index files dumped in the JSON format.
       #
       class JSON < Basic
-        # Uses the extension "json".
+        # The in-memory mapping hash, mapping
+        # a Symbol key to [length, offset] of
+        # the JSON data in the file.
         #
-        def extension
-          :json
+        attr_accessor :mapping
+        # See lib/picky/backends/file.rb for what this should return.
+        #
+        # 1. Gets the length and offset for the key.
+        # 2. Extracts and decodes the object from the file.
+        #
+        def [] key
+          length, offset = mapping[key]
+          return unless length
+          result = Yajl::Parser.parse IO.read(cache_path, length, offset)
+          result
         end
-        # Loads the index hash from json format.
+        # Clears the currently loaded index.
+        #
+        # Note: This only clears the in-memory mapping,
+        #       but this is enough for the index to not exist
+        #       anymore, at least to the application.
+        #
+        def clear
+          self.mapping.clear
+        end
+        # Loads the mapping hash from json format.
         #
         def load
-          Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
-          # Note: Circumvents the yajl symbolize utf-8 characters problem.
-          #
-          # Yajl::Parser.parse(::File.open(cache_path, 'r')).inject({}) do |hash, (k, v)|
-          #   hash[k.to_sym] = v
-          #   hash
-          # end
+          self.mapping = mapping_file.load
+          self
         end
         # Dumps the index hash in json format.
         #
+        # 1. Dump actual data.
+        # 2. Dumps mapping key => [length, offset].
+        #
         def dump hash
-          hash.dump_json cache_path
+          offset = 0
+          mapping = {}
+          ::File.open(cache_path, 'w:utf-8') do |out_file|
+            hash.each do |(key, object)|
+              encoded = Yajl::Encoder.encode object
+              length  = encoded.size
+              mapping[key] = [length, offset]
+              offset += length
+              out_file.write encoded
+            end
+          end
+          mapping_file.dump mapping
         end
         # A json file does not provide retrieve functionality.
         #
         def retrieve
           raise "Can't retrieve from JSON file. Use text file."
         end
+        # Uses the extension "json".
+        #
+        def extension
+          :json
+        end
       end
     end

data/lib/picky/backends/file.rb ADDED Viewed

@@ -0,0 +1,62 @@
+module Picky
+  module Backends
+    # Naive implementation of a file-based index.
+    # In-Memory Hash with length, offset:
+    #   { :bla => [20, 312] }
+    # That map to positions the File, encoded in JSON:
+    #   ...[1,2,3,21,7,4,13,15]...
+    #
+    class File < Backend
+      # Returns an object that responds to:
+      #   [:token] # => [id, id, id, id, id] (an array of ids)
+      #
+      def create_inverted bundle
+        JSON.new bundle.index_path(:inverted)
+      end
+      # Returns an object that responds to:
+      #   [:token] # => 1.23 (a weight)
+      #
+      def create_weights bundle
+        JSON.new bundle.index_path(:weights)
+      end
+      # Returns an object that responds to:
+      #   [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
+      #
+      def create_similarity bundle
+        JSON.new bundle.index_path(:similarity)
+      end
+      # Returns an object that responds to:
+      #   [:key] # => value (a value for this config key)
+      #
+      def create_configuration bundle
+        JSON.new bundle.index_path(:configuration)
+      end
+      # Currently, the loaded ids are intersected using
+      # the fast C-based intersection.
+      #
+      # However, if we could come up with a clever way
+      # to do this faster, it would be most welcome.
+      #
+      def ids combinations, _, _
+        # Get the ids for each combination.
+        #
+        id_arrays = combinations.inject([]) do |total, combination|
+          total << combination.ids
+        end
+        # Call the optimized C algorithm.
+        #
+        # Note: It orders the passed arrays by size.
+        #
+        Performant::Array.memory_efficient_intersect id_arrays
+      end
+    end
+  end
+end

data/lib/picky/backends/memory/basic.rb ADDED Viewed

@@ -0,0 +1,111 @@
+module Picky
+  module Backends
+    class Memory
+      # Base class for all memory-based index files.
+      #
+      # Provides necessary helper methods for its
+      # subclasses.
+      # Not directly useable, as it does not provide
+      # dump/load methods.
+      #
+      class Basic
+        # This file's location.
+        #
+        attr_reader :cache_path
+        # An index cache takes a path, without file extension,
+        # which will be provided by the subclasses.
+        #
+        def initialize cache_path
+          @cache_path = "#{cache_path}.memory.#{extension}"
+        end
+        # The default extension for index files is "index".
+        #
+        def extension
+          :index
+        end
+        # Will copy the index file to a location that
+        # is in a directory named "backup" right under
+        # the directory the index file is in.
+        #
+        def backup
+          prepare_backup backup_directory
+          FileUtils.cp cache_path, target, verbose: true
+        end
+        # The backup directory of this file.
+        # Equal to the file's dirname plus /backup
+        #
+        def backup_directory
+          ::File.join ::File.dirname(cache_path), 'backup'
+        end
+        # Prepares the backup directory for the file.
+        #
+        def prepare_backup target
+          FileUtils.mkdir target unless Dir.exists?(target)
+        end
+        # Copies the file from its backup location back
+        # to the original location.
+        #
+        def restore
+          FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
+        end
+        # The backup filename.
+        #
+        def backup_file_path_of path
+          dir, name = ::File.split path
+          ::File.join dir, 'backup', name
+        end
+        # Deletes the file.
+        #
+        def delete
+          `rm -Rf #{cache_path}`
+        end
+        # Checks.
+        #
+        # Is this cache file suspiciously small?
+        # (less than 8 Bytes of size)
+        #
+        def cache_small?
+          size_of(cache_path) < 8
+        end
+        # Is the cache ok? (existing and larger than
+        # zero Bytes in size)
+        #
+        # A small cache is still ok.
+        #
+        def cache_ok?
+          size_of(cache_path) > 0
+        end
+        # Extracts the size of the file in Bytes.
+        #
+        def size_of path
+          `ls -l #{path} | awk '{print $5}'`.to_i
+        end
+        #
+        #
+        def to_s
+          "#{self.class}(#{cache_path})"
+        end
+      end
+    end
+  end
+end

data/lib/picky/backends/memory/json.rb ADDED Viewed

@@ -0,0 +1,41 @@
+module Picky
+  module Backends
+    class Memory
+      # Memory-based index files dumped in the JSON format.
+      #
+      class JSON < Basic
+        # Uses the extension "json".
+        #
+        def extension
+          :json
+        end
+        # Loads the index hash from json format.
+        #
+        def load
+          Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
+        end
+        # Dumps the index hash in json format.
+        #
+        def dump hash
+          hash.dump_json cache_path
+        end
+        # A json file does not provide retrieve functionality.
+        #
+        def retrieve
+          raise "Can't retrieve from JSON file. Use text file."
+        end
+      end
+    end
+  end
+end

data/lib/picky/backends/{file → memory}/marshal.rb RENAMED Viewed

@@ -2,7 +2,7 @@ module Picky
   module Backends
-    module File
+    class Memory
       # Index data in the Ruby Marshal format.
       #
@@ -13,16 +13,19 @@ module Picky
         def extension
           :dump
         end
         # Loads the index hash from marshal format.
         #
         def load
           ::Marshal.load ::File.open(cache_path, 'r:binary')
         end
         # Dumps the index hash in marshal format.
         #
         def dump hash
           hash.dump_marshal cache_path
         end
         # A marshal file does not provide retrieve functionality.
         #
         def retrieve

data/lib/picky/backends/{file → memory}/text.rb RENAMED Viewed

@@ -2,10 +2,12 @@ module Picky
   module Backends
-    module File
+    class Memory
       # Index data dumped in the text format.
       #
+      # TODO Should this really be Memory::Text?
+      #
       class Text < Basic
         # Uses the extension "txt".
@@ -13,12 +15,14 @@ module Picky
         def extension
           :txt
         end
         # Text files are used exclusively for
         # prepared data files.
         #
         def load
           raise "Can't load from text file. Use JSON or Marshal."
         end
         # Text files are used exclusively for
         # prepared data files.
         #

data/lib/picky/backends/memory.rb CHANGED Viewed

@@ -4,17 +4,29 @@ module Picky
     class Memory < Backend
+      # Returns an object that responds to:
+      #   [:token] # => [id, id, id, id, id] (an array of ids)
+      #
       def create_inverted bundle
-        File::JSON.new bundle.index_path(:inverted)
+        JSON.new bundle.index_path(:inverted)
       end
+      # Returns an object that responds to:
+      #   [:token] # => 1.23 (a weight)
+      #
       def create_weights bundle
-        File::JSON.new bundle.index_path(:weights)
+        JSON.new bundle.index_path(:weights)
       end
+      # Returns an object that responds to:
+      #   [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
+      #
       def create_similarity bundle
-        File::Marshal.new bundle.index_path(:similarity)
+        Marshal.new bundle.index_path(:similarity)
       end
+      # Returns an object that responds to:
+      #   [:key] # => value (a value for this config key)
+      #
       def create_configuration bundle
-        File::JSON.new bundle.index_path(:configuration)
+        JSON.new bundle.index_path(:configuration)
       end
       # Returns the result ids for the allocation.
@@ -31,8 +43,6 @@ module Picky
       #       We cannot use the information to speed up the algorithm, unfortunately.
       #
       def ids combinations, _, _
-        return [] if combinations.empty?
         # Get the ids for each combination.
         #
         id_arrays = combinations.inject([]) do |total, combination|

data/lib/picky/backends/redis/{float_hash.rb → float.rb} RENAMED Viewed

@@ -4,7 +4,7 @@ module Picky
     class Redis
-      class FloatHash < StringHash
+      class Float < String
         # Get a single value.
         #

data/lib/picky/backends/redis/{list_hash.rb → list.rb} RENAMED Viewed

@@ -4,7 +4,7 @@ module Picky
     class Redis
-      class ListHash < Basic
+      class List < Basic
         # Writes the hash into Redis.
         #

data/lib/picky/backends/redis/{string_hash.rb → string.rb} RENAMED Viewed

@@ -4,7 +4,7 @@ module Picky
     class Redis
-      class StringHash < Basic
+      class String < Basic
         # Writes the hash into Redis.
         #

data/lib/picky/backends/redis.rb CHANGED Viewed

@@ -12,17 +12,29 @@ module Picky
         @client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
       end
+      # Returns an object that responds to:
+      #   [:token] # => [id, id, id, id, id] (an array of ids)
+      #
       def create_inverted bundle
-        Redis::ListHash.new client, "#{bundle.identifier}:inverted"
+        List.new client, "#{bundle.identifier}:inverted"
       end
+      # Returns an object that responds to:
+      #   [:token] # => 1.23 (a weight)
+      #
       def create_weights bundle
-        Redis::FloatHash.new client, "#{bundle.identifier}:weights"
+        Float.new client, "#{bundle.identifier}:weights"
       end
+      # Returns an object that responds to:
+      #   [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
+      #
       def create_similarity bundle
-        Redis::ListHash.new client, "#{bundle.identifier}:similarity"
+        List.new client, "#{bundle.identifier}:similarity"
       end
+      # Returns an object that responds to:
+      #   [:key] # => value (a value for this config key)
+      #
       def create_configuration bundle
-        Redis::StringHash.new client, "#{bundle.identifier}:configuration"
+        String.new client, "#{bundle.identifier}:configuration"
       end
       # Returns the result ids for the allocation.
@@ -34,8 +46,6 @@ module Picky
       # Note: We use the amount and offset hints to speed Redis up.
       #
       def ids combinations, amount, offset
-        return [] if combinations.empty?
         identifiers = combinations.inject([]) do |identifiers, combination|
           identifiers << "#{combination.identifier}"
         end

data/lib/picky/bundle.rb CHANGED Viewed

@@ -84,8 +84,11 @@ module Picky
     #  * partial index
     #  * similarity index
     #
-    def index_path type
-      ::File.join index_directory, "#{category.name}_#{name}_#{type}"
+    # Returns just the part without subindex type,
+    # if none given.
+    #
+    def index_path type = nil
+      ::File.join index_directory, "#{category.name}_#{name}#{ "_#{type}" if type }"
     end
     # Copies the indexes to the "backup" directory.

data/lib/picky/category.rb CHANGED Viewed

@@ -92,7 +92,7 @@ module Picky
     # Note: If you don't use it with the block, do not forget to close it.
     #
     def prepared_index_file &block
-      @prepared_index_file ||= Backends::File::Text.new prepared_index_path
+      @prepared_index_file ||= Backends::Memory::Text.new prepared_index_path
       @prepared_index_file.open &block
     end
     # Creates the index directory including all necessary paths above it.

data/lib/picky/cores.rb CHANGED Viewed

@@ -23,6 +23,13 @@ module Picky
       ary_or_generator = ary_or_generator.sort_by { rand } if options[:randomly]
       generator        = ary_or_generator.each
+      # Don't fork if there's just one element.
+      #
+      if generator.inject(0) { |total, element| total + 1 } == 1
+        yield generator.next
+        return
+      end
       # Get the maximum number of processors.
       #
       max                  = max_processors options

data/lib/picky/extensions/symbol.rb CHANGED Viewed

@@ -17,4 +17,26 @@ class Symbol # :nodoc:all
     size.downto(from_length + 1) { yield sub.chop!.intern }
   end
+  # :keys.each_intoken         # => yields each of [:keys, :key, :eys, :ke, :ey, :ys, :k, :e, :y, :s]
+  # :keys.each_intoken(2)      # => yields each of [:keys, :key, :eys, :ke, :ey, :ys]
+  # :keys.each_intoken(2, 3)   # => yields each of [:keys, :key, :eys]
+  # :keys.each_intoken(10, 12) # => yields nothing (min larger than sym)
+  #
+  def each_intoken min_length = 1, max_length = -1
+    max_length = size + max_length + 1 if max_length < 0
+    max_length = size if size < max_length
+    max_length = 1 if max_length < 1
+    min_length = size + min_length + 1 if min_length < 0
+    min_length = 1 if min_length < 1
+    this_many = size - max_length + 1
+    max_length.downto(min_length) do |length|
+      this_many.times do |offset|
+        yield self[offset, length].intern
+      end
+      this_many += 1
+    end
+  end
 end