RubyGems - picky - Versions diffs - 3.0.1 → 3.1.0 - Mend

picky 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

data/lib/picky/application.rb +12 -12
data/lib/picky/backends/backend.rb +17 -0
data/lib/picky/{backend → backends}/file/basic.rb +1 -1
data/lib/picky/{backend → backends}/file/json.rb +1 -1
data/lib/picky/{backend → backends}/file/marshal.rb +1 -1
data/lib/picky/{backend → backends}/file/text.rb +1 -1
data/lib/picky/backends/memory.rb +53 -0
data/lib/picky/{backend → backends}/redis/basic.rb +9 -14
data/lib/picky/backends/redis/float_hash.rb +26 -0
data/lib/picky/{backend → backends}/redis/list_hash.rb +7 -11
data/lib/picky/{backend → backends}/redis/string_hash.rb +7 -11
data/lib/picky/backends/redis.rb +87 -0
data/lib/picky/bundle.rb +107 -11
data/lib/picky/category.rb +5 -5
data/lib/picky/index.rb +329 -0
data/lib/picky/index_indexed.rb +31 -0
data/lib/picky/index_indexing.rb +161 -0
data/lib/picky/indexed/bundle.rb +112 -0
data/lib/picky/indexed/wrappers/exact_first.rb +1 -1
data/lib/picky/indexers/parallel.rb +2 -1
data/lib/picky/indexers/serial.rb +2 -1
data/lib/picky/indexes_indexing.rb +1 -1
data/lib/picky/indexing/bundle.rb +188 -0
data/lib/picky/indexing/wrappers/category/location.rb +1 -1
data/lib/picky/interfaces/live_parameters.rb +8 -8
data/lib/picky/loader.rb +24 -38
data/lib/picky/migrations/from_30_to_31.rb +61 -0
data/lib/picky/query/allocation.rb +10 -5
data/lib/picky/query/combinations.rb +70 -0
data/lib/picky/query/indexes.rb +8 -7
data/lib/picky/query/indexes_check.rb +47 -0
data/lib/picky/query/token.rb +16 -29
data/lib/picky/query/tokens.rb +4 -20
data/lib/picky/search.rb +51 -58
data/lib/picky/tokenizer.rb +231 -0
data/lib/picky/tokenizers/location.rb +1 -1
data/lib/tasks/try.rake +4 -12
data/lib/tasks/try.rb +37 -0
data/spec/lib/application_spec.rb +5 -5
data/spec/lib/{backend → backends}/file/basic_spec.rb +2 -2
data/spec/lib/{backend → backends}/file/json_spec.rb +2 -2
data/spec/lib/{backend → backends}/file/marshal_spec.rb +2 -2
data/spec/lib/{backend → backends}/file/text_spec.rb +1 -1
data/spec/lib/backends/memory_spec.rb +77 -0
data/spec/lib/{backend → backends}/redis/basic_spec.rb +19 -21
data/spec/lib/backends/redis/float_hash_spec.rb +38 -0
data/spec/lib/backends/redis/list_hash_spec.rb +27 -0
data/spec/lib/backends/redis/string_hash_spec.rb +38 -0
data/spec/lib/backends/redis_spec.rb +79 -0
data/spec/lib/categories_indexed_spec.rb +3 -3
data/spec/lib/category_indexed_spec.rb +6 -6
data/spec/lib/category_indexing_spec.rb +1 -1
data/spec/lib/category_spec.rb +1 -1
data/spec/lib/frontend_adapters/rack_spec.rb +2 -2
data/spec/lib/{indexes/index_indexed_spec.rb → index_indexed_spec.rb} +1 -1
data/spec/lib/{indexes/index_indexing_spec.rb → index_indexing_spec.rb} +1 -1
data/spec/lib/{indexes/index_spec.rb → index_spec.rb} +1 -1
data/spec/lib/indexed/{bundle/memory_spec.rb → memory_spec.rb} +18 -18
data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
data/spec/lib/indexing/{bundle/memory_partial_generation_speed_spec.rb → bundle_partial_generation_speed_spec.rb} +3 -3
data/spec/lib/indexing/bundle_spec.rb +302 -0
data/spec/lib/query/allocation_spec.rb +21 -11
data/spec/lib/query/combination_spec.rb +2 -2
data/spec/lib/query/{combinations/base_spec.rb → combinations_spec.rb} +1 -1
data/spec/lib/query/indexes_check_spec.rb +25 -0
data/spec/lib/query/indexes_spec.rb +5 -1
data/spec/lib/query/token_spec.rb +18 -20
data/spec/lib/query/tokens_spec.rb +14 -65
data/spec/lib/search_spec.rb +36 -37
data/spec/lib/tasks/try_spec.rb +51 -0
data/spec/lib/{tokenizers/base_spec.rb → tokenizer_spec.rb} +15 -44
metadata +64 -81
data/lib/picky/backend/base.rb +0 -121
data/lib/picky/backend/files.rb +0 -28
data/lib/picky/backend/redis.rb +0 -44
data/lib/picky/indexed/bundle/base.rb +0 -47
data/lib/picky/indexed/bundle/memory.rb +0 -88
data/lib/picky/indexed/bundle/redis.rb +0 -91
data/lib/picky/indexes/index.rb +0 -328
data/lib/picky/indexes/index_indexed.rb +0 -35
data/lib/picky/indexes/index_indexing.rb +0 -165
data/lib/picky/indexes/memory.rb +0 -20
data/lib/picky/indexes/redis.rb +0 -20
data/lib/picky/indexing/bundle/base.rb +0 -242
data/lib/picky/indexing/bundle/memory.rb +0 -26
data/lib/picky/indexing/bundle/redis.rb +0 -26
data/lib/picky/query/combinations/base.rb +0 -74
data/lib/picky/query/combinations/memory.rb +0 -52
data/lib/picky/query/combinations/redis.rb +0 -90
data/lib/picky/query.rb +0 -6
data/lib/picky/tokenizers/base.rb +0 -231
data/lib/picky/tokenizers/index.rb +0 -34
data/lib/picky/tokenizers/query.rb +0 -61
data/spec/lib/backend/files_spec.rb +0 -189
data/spec/lib/backend/redis/list_hash_spec.rb +0 -40
data/spec/lib/backend/redis/string_hash_spec.rb +0 -47
data/spec/lib/backend/redis_spec.rb +0 -170
data/spec/lib/indexed/bundle/redis_spec.rb +0 -41
data/spec/lib/indexes/redis_spec.rb +0 -15
data/spec/lib/indexing/bundle/base_spec.rb +0 -38
data/spec/lib/indexing/bundle/memory_spec.rb +0 -287
data/spec/lib/indexing/bundle/redis_spec.rb +0 -283
data/spec/lib/query/combinations/memory_spec.rb +0 -158
data/spec/lib/query/combinations/redis_spec.rb +0 -172
data/spec/lib/tokenizers/index_spec.rb +0 -69
data/spec/lib/tokenizers/query_spec.rb +0 -121

data/lib/picky/indexing/bundle/base.rb DELETED Viewed

@@ -1,242 +0,0 @@
-module Picky
-  module Indexing # :nodoc:all
-    # A Bundle is a number of indexes
-    # per [index, category] combination.
-    #
-    # At most, there are three indexes:
-    # * *core* index (always used)
-    # * *weights* index (always used)
-    # * *similarity* index (used with similarity)
-    #
-    # In Picky, indexing is separated from the index
-    # handling itself through a parallel structure.
-    #
-    # Both use methods provided by this base class, but
-    # have very different goals:
-    #
-    # * *Indexing*::*Bundle* is just concerned with creating index files
-    #   and providing helper functions to e.g. check the indexes.
-    #
-    # * *Index*::*Bundle* is concerned with loading these index files into
-    #   memory and looking up search data as fast as possible.
-    #
-    module Bundle
-      # This is the indexing bundle.
-      #
-      # It does all menial tasks that have nothing to do
-      # with the actual index running etc.
-      #
-      class Base < Picky::Bundle
-        attr_reader :backend,
-                    :prepared
-        attr_accessor :partial_strategy,
-                      :weights_strategy
-        def initialize name, category, weights_strategy, partial_strategy, similarity_strategy, options = {}
-          super name, category, similarity_strategy, options
-          @weights_strategy = weights_strategy
-          @partial_strategy = partial_strategy
-          @key_format       = options[:key_format]
-          @prepared         = Backend::File::Text.new category.prepared_index_path
-        end
-        # Sets up a piece of the index for the given token.
-        #
-        def initialize_inverted_index_for token
-          self.inverted[token] ||= []
-        end
-        # Generation
-        #
-        # This method
-        # * Loads the base index from the "prepared..." file.
-        # * Generates derived indexes.
-        # * Dumps all the indexes into files.
-        #
-        def generate_caches_from_source
-          load_from_prepared_index_file
-          generate_caches_from_memory
-        end
-        # Generates derived indexes from the index and dumps.
-        #
-        # Note: assumes that there is something in the index
-        #
-        def generate_caches_from_memory
-          cache_from_memory_generation_message
-          generate_derived
-        end
-        def cache_from_memory_generation_message
-          timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
-        end
-        # Generates the weights and similarity from the main index.
-        #
-        def generate_derived
-          generate_weights
-          generate_similarity
-        end
-        # Load the data from the db.
-        #
-        def load_from_prepared_index_file
-          load_from_prepared_index_generation_message
-          clear
-          retrieve
-        end
-        def load_from_prepared_index_generation_message
-          timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
-        end
-        # Retrieves the prepared index data into the index.
-        #
-        # This is in preparation for generating
-        # derived indexes (like weights, similarity)
-        # and later dumping the optimized index.
-        #
-        # TODO Move this out to the category?
-        #
-        def retrieve
-          format = category.key_format || :to_i # Optimization.
-          prepared.retrieve do |id, token|
-            initialize_inverted_index_for token
-            self.inverted[token] << id.send(format)
-          end
-        end
-        # Generates a new index (writes its index) using the
-        # partial caching strategy of this bundle.
-        #
-        def generate_partial
-          generator = Generators::PartialGenerator.new self.inverted
-          self.inverted = generator.generate self.partial_strategy
-        end
-        # Generate a partial index from the given exact inverted index.
-        #
-        def generate_partial_from exact_inverted_index
-          timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
-          self.inverted = exact_inverted_index
-          self.generate_partial
-          self
-        end
-        # Generates a new weights index (writes its index) using the
-        # given weight caching strategy.
-        #
-        def generate_weights
-          generator = Generators::WeightsGenerator.new self.inverted
-          self.weights = generator.generate self.weights_strategy
-        end
-        # Generates a new similarity index (writes its index) using the
-        # given similarity caching strategy.
-        #
-        def generate_similarity
-          generator = Generators::SimilarityGenerator.new self.inverted
-          self.similarity = generator.generate self.similarity_strategy
-        end
-        # Saves the indexes in a dump file.
-        #
-        def dump
-          timed_exclaim %Q{"#{identifier}": Dumping data.}
-          dump_inverted
-          dump_similarity
-          dump_weights
-          dump_configuration
-        end
-        # Dumps the core index.
-        #
-        def dump_inverted
-          # timed_exclaim %Q{"#{identifier}": Dumping inverted index.}
-          backend.dump_inverted self.inverted
-        end
-        # Dumps the weights index.
-        #
-        def dump_weights
-          # timed_exclaim %Q{"#{identifier}": Dumping index weights.}
-          backend.dump_weights self.weights
-        end
-        # Dumps the similarity index.
-        #
-        def dump_similarity
-          # timed_exclaim %Q{"#{identifier}": Dumping similarity index.}
-          backend.dump_similarity self.similarity
-        end
-        # Dumps the similarity index.
-        #
-        def dump_configuration
-          # timed_exclaim %Q{"#{identifier}": Dumping configuration.}
-          backend.dump_configuration self.configuration
-        end
-        # Alerts the user if an index is missing.
-        #
-        def raise_unless_cache_exists
-          raise_unless_index_exists
-          raise_unless_similarity_exists
-        end
-        # Alerts the user if one of the necessary indexes
-        # (core, weights) is missing.
-        #
-        def raise_unless_index_exists
-          if partial_strategy.saved?
-            warn_if_index_small
-            raise_unless_index_ok
-          end
-        end
-        # Alerts the user if the similarity
-        # index is missing (given that it's used).
-        #
-        def raise_unless_similarity_exists
-          if similarity_strategy.saved?
-            warn_if_similarity_small
-            raise_unless_similarity_ok
-          end
-        end
-        # Outputs a warning for the given cache.
-        #
-        def warn_cache_small what
-          warn "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
-        end
-        # Raises an appropriate error message for the given cache.
-        #
-        def raise_cache_missing what
-          raise "Error: The #{what} cache for #{identifier} is missing."
-        end
-        # Warns the user if the similarity index is small.
-        #
-        def warn_if_similarity_small
-          warn_cache_small :similarity if backend.similarity_cache_small?
-        end
-        # Alerts the user if the similarity index is not there.
-        #
-        def raise_unless_similarity_ok
-          raise_cache_missing :similarity unless backend.similarity_cache_ok?
-        end
-        # Warns the user if the core or weights indexes are small.
-        #
-        def warn_if_index_small
-          warn_cache_small :inverted if backend.inverted_cache_small?
-          warn_cache_small :weights  if backend.weights_cache_small?
-        end
-        # Alerts the user if the core or weights indexes are not there.
-        #
-        def raise_unless_index_ok
-          raise_cache_missing :inverted unless backend.inverted_cache_ok?
-          raise_cache_missing :weights  unless backend.weights_cache_ok?
-        end
-      end
-    end
-  end
-end

data/lib/picky/indexing/bundle/memory.rb DELETED Viewed

@@ -1,26 +0,0 @@
-module Picky
-  # encoding: utf-8
-  #
-  module Indexing # :nodoc:all
-    module Bundle
-      # The memory version dumps its generated indexes to disk
-      # (mostly JSON) to load them into memory on startup.
-      #
-      class Memory < Base
-        def initialize name, category, *args
-          super name, category, *args
-          @backend = Backend::Files.new self
-        end
-      end
-    end
-  end
-end

data/lib/picky/indexing/bundle/redis.rb DELETED Viewed

@@ -1,26 +0,0 @@
-module Picky
-  # encoding: utf-8
-  #
-  module Indexing # :nodoc:all
-    module Bundle
-      # The Redis version dumps its generated indexes to
-      # the Redis backend.
-      #
-      class Redis < Base
-        def initialize name, category, *args
-          super name, category, *args
-          @backend = Backend::Redis.new self
-        end
-      end
-    end
-  end
-end

data/lib/picky/query/combinations/base.rb DELETED Viewed

@@ -1,74 +0,0 @@
-module Picky
-  module Query
-    # Combinations are a number of Combination-s.
-    #
-    # They are the core of an allocation.
-    # An allocation consists of a number of combinations.
-    #
-    module Combinations # :nodoc:all
-      # Base Combinations contain methods for calculating score and ids.
-      #
-      class Base
-        attr_reader :combinations
-        delegate :empty?, :to => :@combinations
-        def initialize combinations = []
-          @combinations = combinations
-        end
-        def hash
-          @combinations.hash
-        end
-        # Uses user specific weights to calculate a score for the combinations.
-        #
-        def calculate_score weights
-          total_score + weighted_score(weights)
-        end
-        def total_score
-          @combinations.sum &:weight
-        end
-        def weighted_score weights
-          weights.score_for @combinations
-        end
-        # Filters the tokens and identifiers such that only identifiers
-        # that are passed in, remain, including their tokens.
-        #
-        # Note: This method is not totally independent of the calculate_ids one.
-        #       Since identifiers are only nullified, we need to not include the
-        #       ids that have an associated identifier that is nil.
-        #
-        def keep identifiers = []
-          @combinations.reject! { |combination| !combination.in?(identifiers) }
-        end
-        # Filters the tokens and identifiers such that identifiers
-        # that are passed in, are removed, including their tokens.
-        #
-        # Note: This method is not totally independent of the calculate_ids one.
-        #       Since identifiers are only nullified, we need to not include the
-        #       ids that have an associated identifier that is nil.
-        #
-        def remove identifiers = []
-          @combinations.reject! { |combination| combination.in?(identifiers) }
-        end
-        #
-        #
-        def to_result
-          @combinations.map &:to_result
-        end
-      end
-    end
-  end
-end

data/lib/picky/query/combinations/memory.rb DELETED Viewed

@@ -1,52 +0,0 @@
-module Picky
-  module Query
-    # Combinations are a number of Combination-s.
-    #
-    # They are the core of an allocation.
-    # An allocation consists of a number of combinations.
-    #
-    module Combinations # :nodoc:all
-      # Memory Combinations contain specific methods for
-      # calculating score and ids in memory.
-      #
-      class Memory < Base
-        # Returns the result ids for the allocation.
-        #
-        # Sorts the ids by size and & through them in the following order (sizes):
-        # 0. [100_000, 400, 30, 2]
-        # 1. [2, 30, 400, 100_000]
-        # 2. (100_000 & (400 & (30 & 2))) # => result
-        #
-        # Note: Uses a C-optimized intersection routine (in performant.c)
-        #       for speed and memory efficiency.
-        #
-        # Note: In the memory based version we ignore the (amount) needed hint.
-        #       We cannot use the information to speed up the algorithm, unfortunately.
-        #
-        def ids _, _
-          return [] if @combinations.empty?
-          # Get the ids for each combination.
-          #
-          id_arrays = @combinations.inject([]) do |total, combination|
-            total << combination.ids
-          end
-          # Call the optimized C algorithm.
-          #
-          # Note: It orders the passed arrays by size.
-          #
-          Performant::Array.memory_efficient_intersect id_arrays
-        end
-      end
-    end
-  end
-end

data/lib/picky/query/combinations/redis.rb DELETED Viewed

@@ -1,90 +0,0 @@
-module Picky
-  module Query
-    # Combinations are a number of Combination-s.
-    #
-    # They are the core of an allocation.
-    # An allocation consists of a number of combinations.
-    #
-    module Combinations # :nodoc:all
-      # Redis Combinations contain specific methods for
-      # calculating score and ids in memory.
-      #
-      class Redis < Base
-        # Connect to the backend.
-        #
-        # TODO Use specific Picky Redis wrapper.
-        #
-        def self.redis
-          @redis ||= ::Redis.new :db => 15
-        end
-        attr_reader :redis
-        #
-        #
-        def initialize combinations
-          super combinations
-          @redis = self.class.redis
-        end
-        # Returns the result ids for the allocation.
-        #
-        def ids amount, offset
-          return [] if @combinations.empty?
-          identifiers = @combinations.inject([]) do |identifiers, combination|
-            identifiers << "#{combination.identifier}"
-          end
-          result_id = generate_intermediate_result_id
-          # Intersect and store.
-          #
-          redis.zinterstore result_id, identifiers
-          # Get the stored result.
-          #
-          results = redis.zrange result_id, offset, (offset + amount)
-          # Delete the stored result as it was only for temporary purposes.
-          #
-          # Note: I could also not delete it, but that would not be clean at all.
-          #
-          redis.del result_id
-          results
-        end
-        # Generate a multiple host/process safe result id.
-        #
-        # Note: Generated when this class loads.
-        #
-        require 'socket'
-        def self.extract_host
-          @host ||= Socket.gethostname
-        end
-        def host
-          self.class.extract_host
-        end
-        extract_host
-        def pid
-          @pid ||= Process.pid
-        end
-        # Use the host and pid (generated lazily in child processes) for the result.
-        #
-        def generate_intermediate_result_id
-          :"#{host}:#{pid}:picky:result"
-        end
-      end
-    end
-  end
-end

data/lib/picky/query.rb DELETED Viewed

@@ -1,6 +0,0 @@
-module Picky
-  module Query # :nodoc:all
-  end
-end