RubyGems - picky - Versions diffs - 3.6.16 → 4.0.0pre1 - Mend

picky 3.6.16 → 4.0.0pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

data/lib/picky/application.rb +1 -1
data/lib/picky/backends/backend.rb +2 -0
data/lib/picky/backends/memory.rb +14 -7
data/lib/picky/backends/{memory → prepared}/text.rb +10 -4
data/lib/picky/backends/redis/directly_manipulable.rb +3 -5
data/lib/picky/backends/redis/list.rb +5 -1
data/lib/picky/backends/sqlite/basic.rb +4 -2
data/lib/picky/bundle.rb +6 -7
data/lib/picky/bundle_indexed.rb +2 -2
data/lib/picky/bundle_realtime.rb +8 -7
data/lib/picky/categories.rb +0 -1
data/lib/picky/categories_indexing.rb +14 -0
data/lib/picky/category.rb +3 -5
data/lib/picky/category_indexed.rb +2 -5
data/lib/picky/category_indexing.rb +28 -16
data/lib/picky/constants.rb +3 -1
data/lib/picky/frontend_adapters/rack.rb +2 -2
data/lib/picky/generators/similarity/phonetic.rb +6 -14
data/lib/picky/generators/strategy.rb +1 -1
data/lib/picky/generators/weights/runtime.rb +2 -2
data/lib/picky/helpers/indexing.rb +20 -0
data/lib/picky/index.rb +7 -10
data/lib/picky/index_indexed.rb +1 -8
data/lib/picky/index_indexing.rb +44 -42
data/lib/picky/indexers/base.rb +5 -6
data/lib/picky/indexers/parallel.rb +35 -32
data/lib/picky/indexers/serial.rb +38 -15
data/lib/picky/indexes_indexed.rb +0 -7
data/lib/picky/indexes_indexing.rb +16 -19
data/lib/picky/loader.rb +6 -4
data/lib/picky/query/allocation.rb +7 -2
data/lib/picky/query/combination.rb +1 -1
data/lib/picky/query/indexes.rb +1 -1
data/lib/picky/query/indexes_check.rb +12 -14
data/lib/picky/query/token.rb +33 -15
data/lib/picky/results/exact_first.rb +53 -0
data/lib/picky/scheduler.rb +43 -0
data/lib/picky/search.rb +0 -2
data/lib/picky/sources/csv.rb +2 -3
data/lib/picky/sources/db.rb +4 -3
data/lib/picky/sources/mongo.rb +1 -1
data/lib/picky/tokenizer.rb +0 -4
data/lib/picky/wrappers/bundle/location.rb +1 -1
data/lib/picky.rb +2 -2
data/lib/tasks/index.rake +13 -14
data/spec/functional/backends/file_spec.rb +2 -4
data/spec/functional/backends/memory_spec.rb +2 -2
data/spec/functional/backends/redis_spec.rb +1 -1
data/spec/functional/exact_first_spec.rb +24 -4
data/spec/functional/realtime_spec.rb +7 -3
data/spec/lib/application_spec.rb +30 -30
data/spec/lib/backends/backend_spec.rb +25 -27
data/spec/lib/backends/{memory → prepared}/text_spec.rb +1 -1
data/spec/lib/category_indexing_spec.rb +1 -1
data/spec/lib/extensions/symbol_spec.rb +1 -1
data/spec/lib/generators/similarity/phonetic_spec.rb +46 -0
data/spec/lib/index_indexed_spec.rb +5 -5
data/spec/lib/index_indexing_spec.rb +13 -12
data/spec/lib/index_spec.rb +8 -8
data/spec/lib/indexers/base_spec.rb +5 -6
data/spec/lib/indexers/parallel_spec.rb +10 -10
data/spec/lib/indexes_indexed_spec.rb +1 -7
data/spec/lib/indexes_indexing_spec.rb +10 -5
data/spec/lib/query/indexes_check_spec.rb +44 -15
data/spec/lib/query/indexes_spec.rb +11 -11
data/spec/lib/query/token_spec.rb +10 -0
data/spec/lib/{indexed/wrappers → results}/exact_first_spec.rb +18 -21
data/spec/lib/scheduler_spec.rb +92 -0
metadata +45 -34
data/lib/picky/cores.rb +0 -127
data/lib/picky/tokenizers/location.rb +0 -53
data/lib/picky/wrappers/category/exact_first.rb +0 -94
data/spec/lib/cores_spec.rb +0 -185

data/lib/picky/index_indexing.rb CHANGED Viewed

@@ -4,30 +4,14 @@ module Picky
   #
   class Index
+    include Helpers::Indexing
     # Delegators for indexing.
     #
     delegate :cache,
              :clear,
-             :prepare,
              :to => :categories
-    # Calling index on an index will call index
-    # on every category.
-    #
-    # Decides whether to use a parallel indexer or whether to
-    # delegate to each category to index themselves.
-    #
-    def index
-      if source.respond_to?(:each)
-        check_source_empty
-        index_in_parallel
-      else
-        with_data_snapshot do
-          categories.index
-        end
-      end
-    end
     # Define an index tokenizer on the index.
     #
     # Parameters are the exact same as for indexing.
@@ -39,7 +23,35 @@ module Picky
         options && Tokenizer.new(options)
       end
     end
-    alias define_indexing indexing
+    #
+    #
+    def index scheduler = Scheduler.new
+      timed_indexing scheduler do
+        prepare scheduler
+        scheduler.finish
+        cache scheduler
+        scheduler.finish
+      end
+    end
+    # Calling prepare on an index will call prepare
+    # on every category.
+    #
+    # Decides whether to use a parallel indexer or whether to
+    # delegate to each category to prepare themselves.
+    #
+    def prepare scheduler = Scheduler.new
+      if source.respond_to?(:each)
+        check_source_empty
+        prepare_in_parallel scheduler
+      else
+        with_data_snapshot do
+          categories.prepare scheduler
+        end
+      end
+    end
     # Check if the given enumerable source is empty.
     #
@@ -50,6 +62,15 @@ module Picky
       warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
     end
+    # Indexes the categories in parallel.
+    #
+    # Only use where the category does have a #each source defined.
+    #
+    def prepare_in_parallel scheduler
+      indexer = Indexers::Parallel.new self
+      indexer.prepare categories, scheduler
+    end
     # Note: Duplicated in category_indexing.rb.
     #
     # Take a data snapshot if the source offers it.
@@ -64,15 +85,6 @@ module Picky
       end
     end
-    # Indexes the categories in parallel.
-    #
-    # Only use where the category does have a #each source defined.
-    #
-    def index_in_parallel
-      indexer = Indexers::Parallel.new self
-      indexer.index categories
-    end
     # Returns the installed tokenizer or the default.
     #
     def tokenizer
@@ -87,7 +99,7 @@ module Picky
     #
     def source some_source = nil, &block
       some_source ||= block
-      some_source ? define_source(some_source) : (@source && extract_source)
+      some_source ? (check_source(some_source); @source = some_source) : (@source && extract_source)
     end
     # Extract the actual source if it is wrapped in a time
     # capsule, i.e. a block/lambda.
@@ -97,10 +109,6 @@ module Picky
     def extract_source
       @source = @source.respond_to?(:call) ? @source.call : @source
     end
-    def define_source source
-      check_source source
-      @source = source
-    end
     def check_source source # :nodoc:
       raise ArgumentError.new(<<-SOURCE
@@ -119,21 +127,15 @@ SOURCE
     #
     # Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
     #
-    def key_format format = nil
-      format ? define_key_format(format) : @key_format
-    end
-    def define_key_format key_format
-      @key_format = key_format
+    def key_format key_format = nil
+      key_format ? (@key_format = key_format) : @key_format
     end
     # Define what to do after indexing.
     # (Only used in the Sources::DB)
     #
     def after_indexing after_indexing = nil
-      after_indexing ? define_after_indexing(after_indexing) : @after_indexing
-    end
-    def define_after_indexing after_indexing
-      @after_indexing = after_indexing
+      after_indexing ? (@after_indexing = after_indexing) : @after_indexing
     end
   end

data/lib/picky/indexers/base.rb CHANGED Viewed

@@ -19,21 +19,20 @@ module Picky
       # Starts the indexing process.
       #
-      def index categories
+      def prepare categories, scheduler = Scheduler.new
         check_source
         categories.empty
-        process categories do |file|
-          notify_finished file
+        process categories, scheduler do |prepared_file|
+          notify_finished prepared_file
         end
-        categories.cache
       end
       def check_source # :nodoc:
         raise "Trying to index without a source for #{@index_or_category.name}." unless source
       end
-      def notify_finished file
-        timed_exclaim %Q{"#{@index_or_category.identifier}": Tokenized -> #{file.path.gsub("#{PICKY_ROOT}/", '')}.}
+      def notify_finished prepared_file
+        timed_exclaim %Q{  "#{@index_or_category.identifier}": Tokenized -> #{prepared_file.path.gsub("#{PICKY_ROOT}/", '')}.}
       end
     end

data/lib/picky/indexers/parallel.rb CHANGED Viewed

@@ -15,62 +15,65 @@ module Picky
       # Parameters:
       #  * categories: An Enumerable of Category-s.
       #
-      def process categories
-        comma   = ?,
-        newline = ?\n
+      def process categories, scheduler = Scheduler.new
         # Prepare a combined object - array.
         #
         combined = categories.map do |category|
-          [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)]
+          [category, category.prepared_index_file, [], (category.tokenizer || tokenizer)]
         end
-        # Index.
-        #
-        # TODO Extract into flush_every(100_000) do
-        #
-        i = 0
         # Explicitly reset the source to avoid caching trouble.
         #
         source.reset if source.respond_to?(:reset)
         # Go through each object in the source.
         #
+        objects = []
         source.each do |object|
-          id = object.id
-          # This needs to be rewritten.
+          # Accumulate objects.
           #
-          # Is it a good idea that not the tokenizer has control over when he gets the next text?
+          objects << object
+          next if objects.size < 10_000
+          # THINK Is it a good idea that not the tokenizer has
+          # control over when he gets the next text?
           #
-          combined.each do |category, cache, _, tokenizer|
-            tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
-            tokens.each do |token_text|
-              next unless token_text
-              cache << id << comma << token_text << newline
-            end
+          combined.each do |category, file, cache, tokenizer|
+            index_flush objects, file, category, cache, tokenizer
           end
-          if i >= 100_000
-            flush combined
-            i = 0
-          end
-          i += 1
+          objects.clear
         end
-        flush combined
-        combined.each do |_, _, file, _|
+        # Close all files.
+        #
+        combined.each do |category, file, cache, tokenizer|
+          index_flush objects, file, category, cache, tokenizer
           yield file
           file.close
         end
       end
-      # Flush the combined array into the file.
-      #
-      def flush combined # :nodoc:
-        combined.each do |_, cache, file, _|
-          file.write(cache.join) && cache.clear
+      def index_flush objects, file, category, cache, tokenizer
+        comma   = ?,
+        newline = ?\n
+        objects.each do |object|
+          tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
+          tokens.each do |token_text|
+            next unless token_text
+            cache << object.id << comma << token_text << newline
+          end
         end
+        flush file, cache
+      end
+      def flush file, cache
+        file.write(cache.join) && cache.clear
       end
     end

data/lib/picky/indexers/serial.rb CHANGED Viewed

@@ -16,33 +16,56 @@ module Picky
       # Parameters:
       #  * categories: An enumerable of Category-s.
       #
-      def process categories
-        comma   = ?,
-        newline = ?\n
+      def process categories, scheduler = Scheduler.new
         categories.each do |category|
-          tokenizer = category.tokenizer
           category.prepared_index_file do |file|
+            datas = []
             result = []
+            tokenizer = category.tokenizer
+            source.harvest(category) do |*data|
+              # Accumulate data.
+              #
+              datas << data
+              next if datas.size < 10_000
+              # Opening the file inside the scheduler to
+              # have it automagically closed.
+              #
+              index_flush datas, file, result, tokenizer
+              datas.clear
-            source.harvest(category) do |indexed_id, text|
-              tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
-              tokens.each do |token_text|
-                next unless token_text
-                result << indexed_id << comma << token_text << newline
-              end
-              file.write(result.join) && result.clear if result.size > 100_000
             end
-            yield file
+            index_flush datas, file, result, tokenizer
-            file.write result.join
+            yield file
           end
+        end
+      end
+      def index_flush datas, file, cache, tokenizer
+        comma   = ?,
+        newline = ?\n
+        datas.each do |indexed_id, text|
+          tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
+          tokens.each do |token_text|
+            next unless token_text
+            cache << indexed_id << comma << token_text << newline
+          end
         end
+        flush file, cache
+      end
+      def flush prepared_file, cache
+        prepared_file.write(cache.join) && cache.clear
       end
     end

data/lib/picky/indexes_indexed.rb CHANGED Viewed

@@ -8,15 +8,8 @@ module Picky
                       :analyze
     each_delegate :load,
-                  :reload,
                   :to => :indexes
-    # TODO Remove in 4.0.
-    #
-    def self.reload
-      self.instance.reload
-    end
   end
 end

data/lib/picky/indexes_indexing.rb CHANGED Viewed

@@ -4,35 +4,32 @@ module Picky
   #
   class Indexes
-    instance_delegate :index,
-                      :clear,
-                      :index_for_tests,
+    extend Helpers::Indexing
+    instance_delegate :clear,
                       :tokenizer
     each_delegate :clear,
                   :to => :indexes
-    # Runs the indexers in parallel (prepare + cache).
     #
-    def index randomly = true
-      # Run in parallel.
-      #
-      timed_exclaim "Indexing using #{Cores.max_processors} processors, in #{randomly ? 'random' : 'given'} order."
-      # Run indexing/caching forked.
-      #
-      Cores.forked self.indexes, { randomly: randomly }, &:index
-      timed_exclaim "Indexing finished."
+    #
+    def self.index scheduler = Scheduler.new
+      timed_indexing scheduler do
+        instance.index scheduler
+      end
     end
-    # For integration testing – indexes for the tests
-    # without forking and shouting ;)
     #
-    # TODO Rename to #index_without_forking, or just #index.
     #
-    def index_for_tests
-      indexes.each(&:index)
+    def index scheduler = Scheduler.new
+      indexes.each { |index| index.prepare scheduler }
+      scheduler.finish
+      timed_exclaim "Tokenizing finished, generating data for indexes from tokenized data."
+      indexes.each { |index| index.cache scheduler }
+      scheduler.finish
     end
     #

data/lib/picky/loader.rb CHANGED Viewed

@@ -64,6 +64,7 @@ module Picky
       # Requiring Helpers
       #
       load_relative 'helpers/measuring'
+      load_relative 'helpers/indexing'
       # Calculations.
       #
@@ -112,9 +113,10 @@ module Picky
       load_relative 'backends/helpers/file'
       load_relative 'backends/backend'
+      load_relative 'backends/prepared/text'
       load_relative 'backends/memory'
       load_relative 'backends/memory/basic'
-      load_relative 'backends/memory/text'
       load_relative 'backends/memory/marshal'
       load_relative 'backends/memory/json'
@@ -147,7 +149,6 @@ module Picky
       # Wrappers.
       #
       load_relative 'wrappers/category/location'
-      load_relative 'wrappers/category/exact_first'
       load_relative 'wrappers/bundle/delegators'
       load_relative 'wrappers/bundle/wrapper'
@@ -237,6 +238,7 @@ module Picky
       # Results.
       #
       load_relative 'results'
+      load_relative 'results/exact_first'
       # Search.
       #
@@ -259,9 +261,9 @@ module Picky
       #
       load_relative 'application'
-      # Load tools. Load in specific case?
+      # Load tools. Load specifically?
       #
-      load_relative 'cores'
+      load_relative 'scheduler'
       # Load migration notices.
       #

data/lib/picky/query/allocation.rb CHANGED Viewed

@@ -16,8 +16,11 @@ module Picky
       #
       def initialize index, combinations
         @combinations      = combinations
-        @result_identifier = index.result_identifier # TODO Make cleverer.
-        @backend           = index.backend           # TODO Make cleverer. Use inverted?
+        # Could this be rewritten?
+        #
+        @result_identifier = index.result_identifier
+        @backend           = index.backend
       end
       def hash
@@ -49,6 +52,8 @@ module Picky
       # This starts the searching process.
       #
+      # Returns the calculated ids (from the offset).
+      #
       def process! amount, offset
         ids    = calculate_ids amount, offset
         @count = ids.size                         # cache the count before throwing away the ids

data/lib/picky/query/combination.rb CHANGED Viewed

@@ -51,7 +51,7 @@ module Picky
       # Note: Required for uniq!
       #
-      # TODO Ok with category or is the bundle needed?
+      # THINK Ok with category or is the bundle needed?
       #
       def hash
         [token, category].hash

data/lib/picky/query/indexes.rb CHANGED Viewed

@@ -22,7 +22,7 @@ module Picky
       # Note: We cannot mix memory and redis indexes just yet.
       #
       def initialize *indexes
-        IndexesCheck.check_backend_types indexes
+        IndexesCheck.check_backends indexes
         @indexes = indexes

data/lib/picky/query/indexes_check.rb CHANGED Viewed

@@ -2,8 +2,6 @@ module Picky
   module Query
-    # TODO Remove.
-    #
     class IndexesCheck
       class << self
@@ -14,16 +12,16 @@ module Picky
         # Currently it isn't possible using Memory and Redis etc.
         # indexes in the same query index group.
         #
-        # Picky will raise a Query::Indexes::DifferentTypesError.
+        # Picky will raise a Query::Indexes::DifferentBackendsError.
         #
-        def check_backend_types index_definitions_ary # :nodoc:
-          backend_types = index_definitions_ary.map(&:backend).map(&:class)
-          backend_types.uniq!
-          raise_different backend_types if backend_types.size > 1
-          backend_types
+        def check_backends indexes # :nodoc:
+          backends = indexes.map &:backend
+          backends.uniq! &:class
+          raise_different backends if backends.size > 1
+          backends
         end
-        def raise_different backend_types # :nodoc:
-          raise DifferentTypesError.new(backend_types)
+        def raise_different backends # :nodoc:
+          raise DifferentBackendsError.new(backends)
         end
       end
@@ -33,12 +31,12 @@ module Picky
     # Currently it isn't possible using Memory and Redis etc.
     # indexes in the same query index group.
     #
-    class DifferentTypesError < StandardError # :nodoc:all
-      def initialize types
-        @types = types
+    class DifferentBackendsError < StandardError # :nodoc:all
+      def initialize backends
+        @backends = backends
       end
       def to_s
-        "Currently it isn't possible to mix Indexes with backends #{@types.join(" and ")} in the same Search instance."
+        "Currently it isn't possible to mix Indexes with backends #{@backends.join(" and ")} in the same Search instance."
       end
     end

data/lib/picky/query/token.rb CHANGED Viewed

@@ -6,7 +6,8 @@ module Picky
     #
     # It remembers the original form, and and a normalized form.
     #
-    # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
+    # It also knows whether it needs to look for similarity (bla~),
+    # or whether it is a partial (bla*).
     #
     class Token # :nodoc:all
@@ -17,7 +18,8 @@ module Picky
       # Normal initializer.
       #
-      # Note: Use this if you do not want a normalized token.
+      # Note:
+      # Use this if you do not want a normalized token.
       #
       def initialize text, original = nil
         @text     = text
@@ -26,21 +28,25 @@ module Picky
       # Returns a qualified and normalized token.
       #
-      # Note: Use this in the search engine if you need a qualified
-      #       and normalized token. I.e. one prepared for a search.
+      # Note:
+      # Use this in the search engine if you need a qualified
+      # and normalized token. I.e. one prepared for a search.
       #
       def self.processed text, original = nil
         new(text, original).process
       end
-      def process # TODO Move this into the processed method and let the token have more params?
-        qualify # TODO Should this operate on the original?
-        partialize # TODO Should this operate on the original?
-        similarize # TODO Should this operate on the original?
-        remove_illegals # TODO Remove?
+      def process
+        qualify
+        partialize
+        similarize
+        remove_illegals
         self
       end
+      # Symbolizes this token's text.
       #
+      # Note:
+      # Call externally when Picky operates in Symbols mode.
       #
       def symbolize!
         @text = @text.to_sym
@@ -48,7 +54,10 @@ module Picky
       # Translates this token's qualifiers into actual categories.
       #
-      # Note: If this is not done, there is no mapping.
+      # Note:
+      # If this is not done, there is no mapping.
+      #
+      # THINK Can this be improved somehow?
       #
       def categorize mapper
         @user_defined_categories = @qualifiers && @qualifiers.map do |qualifier|
@@ -63,13 +72,22 @@ module Picky
       def partial= partial
         @partial = partial if @partial.nil?
       end
+      # A token is partial? only if it not similar
+      # and is partial.
+      #
+      # It can't be similar and partial at the same time.
+      #
       def partial?
         !@similar && @partial
       end
-      # If the text ends with *, partialize it. If with ", don't.
+      # If the text ends with *, partialize it. If with ",
+      # non-partialize it.
       #
-      # The latter wins. So "hello*" will not be partially searched.
+      # The last one wins.
+      # So "hello*" will not be partially searched.
+      # So "hello"* will be partially searched.
       #
       @@no_partial = /\"\Z/
       @@partial    = /\*\Z/
@@ -97,7 +115,7 @@ module Picky
       #
       @@illegals = /["*~]/
       def remove_illegals
-        @text.gsub! @@illegals, '' unless @text.blank?
+        @text.gsub! @@illegals, EMPTY_STRING unless @text.blank?
       end
       # Returns an array of possible combinations.
@@ -140,9 +158,9 @@ module Picky
       @@split_qualifier_text = ':'
       @@split_qualifiers     = ','
       def qualify
-        @qualifiers, @text = (@text || '').split(@@split_qualifier_text, 2)
+        @qualifiers, @text = (@text || EMPTY_STRING).split(@@split_qualifier_text, 2)
         @qualifiers, @text = if @text.blank?
-          [nil, (@qualifiers || '')]
+          [nil, (@qualifiers || EMPTY_STRING)]
         else
           [@qualifiers.split(@@split_qualifiers), @text]
         end