RubyGems - picky - Versions diffs - 2.0.0 → 2.1.0 - Mend

picky 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

data/lib/picky/application.rb +0 -2
data/lib/picky/internals/configuration/index.rb +11 -17
data/lib/picky/internals/index/redis/string_hash.rb +1 -1
data/lib/picky/internals/indexed/categories.rb +7 -3
data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +9 -9
data/lib/picky/internals/indexed/wrappers/exact_first.rb +12 -12
data/lib/picky/internals/query/allocations.rb +3 -5
data/lib/picky/internals/query/combinations/memory.rb +8 -8
data/lib/picky/internals/query/indexes.rb +6 -14
data/lib/picky/internals/query/token.rb +32 -23
data/lib/picky/internals/query/tokens.rb +30 -18
data/lib/picky/internals/query/weights.rb +9 -7
data/lib/picky/internals/tokenizers/base.rb +42 -16
data/lib/picky/internals/tokenizers/index.rb +7 -36
data/lib/picky/internals/tokenizers/query.rb +20 -40
data/lib/picky/loader.rb +0 -2
data/lib/picky/search.rb +1 -1
data/lib/tasks/server.rake +16 -14
data/lib/tasks/todo.rake +1 -1
data/spec/lib/internals/frontend_adapters/rack_spec.rb +2 -2
data/spec/lib/internals/tokenizers/base_spec.rb +5 -3
data/spec/lib/internals/tokenizers/query_spec.rb +1 -14
data/spec/lib/query/combination_spec.rb +2 -2
data/spec/lib/query/indexes_spec.rb +5 -5
data/spec/lib/query/token_spec.rb +36 -11
data/spec/lib/query/tokens_spec.rb +39 -0
metadata +2 -2

data/lib/picky/application.rb CHANGED Viewed

@@ -207,8 +207,6 @@ class Application
     #
     # Warns if something is missing.
     #
-    # TODO Good specs.
-    #
     def check # :nodoc:
       warnings = []
       warnings << check_external_interface

data/lib/picky/internals/configuration/index.rb CHANGED Viewed

@@ -1,32 +1,30 @@
 module Configuration # :nodoc:all
   # Holds the configuration for a
   # index/category combination.
   #
-  # TODO Rename paths?
-  #
   class Index
     attr_reader :index, :category
     def initialize index, category
       @index       = index
       @category    = category
     end
     def index_name
       @index_name ||= index.name
     end
     def category_name
       @category_name ||= category.name
     end
     #
     #
     def index_path bundle_name, name
       "#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
     end
     # Was: search_index_file_name
     #
     def prepared_index_path
@@ -36,21 +34,17 @@ module Configuration # :nodoc:all
       @prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
       @prepared_index_file.open_for_indexing &block
     end
-    # def file_name
-    #   @file_name ||= "#{@index_name}_#{@category_name}"
-    # end
     # Identifier for internal use.
     #
     def identifier
       @identifier ||= "#{index_name}:#{category_name}"
     end
     def to_s
       "#{index_name} #{category_name}"
     end
     def self.index_root
       @index_root ||= "#{PICKY_ROOT}/index"
     end
@@ -67,7 +61,7 @@ module Configuration # :nodoc:all
     def prepare_index_directory
       FileUtils.mkdir_p index_directory
     end
   end
 end

data/lib/picky/internals/index/redis/string_hash.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Internals
         # Writes the hash into Redis.
         #
-        # TODO Could we use multi?
+        # Note: We could use multi, but it did not help.
         #
         def dump hash
           redis = backend

data/lib/picky/internals/indexed/categories.rb CHANGED Viewed

@@ -67,7 +67,7 @@ module Internals
       # for each found similar token.
       #
       def similar_possible_for token
-        # Get as many similar tokens as necessary
+        # Get as many tokens as necessary
         #
         tokens = similar_tokens_for token
         # possible combinations
@@ -105,9 +105,13 @@ module Internals
       #       (Also none of the categories matched, but the ignore unassigned
       #       tokens option is true)
       #
+      # TODO Could use Combinations class here and remove the inject.
+      #
       def possible_for token, preselected_categories = nil
-        possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
-        possible.compact!
+        possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
+          combination = category.combination_for token
+          combination ? combinations << combination : combinations
+        end
         # This is an optimization to mark tokens that are ignored.
         #
         return if ignore_unassigned_tokens && possible.empty?

data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb CHANGED Viewed

@@ -1,35 +1,35 @@
 module Indexed
   module Wrappers
     module Bundle
       # A calculation rewrites the symbol into a float.
       #
-      # TODO I really need to allow integers as keys. The code below is just not ok.
+      # TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
       #
       class Calculation < Wrapper
         #
         #
         def recalculate float
           float
         end
         #
         #
         def ids sym
           @bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
         end
         #
         #
         def weight sym
           @bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
         end
       end
     end
   end
 end

data/lib/picky/internals/indexed/wrappers/exact_first.rb CHANGED Viewed

@@ -3,18 +3,18 @@ module Internals
   # encoding: utf-8
   #
   module Indexed
     # TODO Spec
     #
     module Wrappers
       # This index combines an exact and partial index.
       # It serves to order the results such that exact hits are found first.
       #
       # TODO Need to use the right subtokens. Bake in?
       #
-      class ExactFirst < Indexed::Bundle::Memory
+      class ExactFirst < Indexed::Bundle::Base
         delegate :similar,
                  :identifier,
                  :name,
@@ -28,12 +28,12 @@ module Internals
                  :dump,
                  :load,
                  :to => :@partial
         def initialize category
           @exact   = category.exact
           @partial = category.partial
         end
         def self.wrap index_or_category
           if index_or_category.respond_to? :categories
             wrap_each_of index_or_category.categories
@@ -47,19 +47,19 @@ module Internals
         def self.wrap_each_of categories
           categories.categories.collect! { |category| new(category) }
         end
         def ids text
           @exact.ids(text) + @partial.ids(text)
         end
         def weight text
           [@exact.weight(text) || 0, @partial.weight(text) || 0].max
         end
       end
     end
   end
 end

data/lib/picky/internals/query/allocations.rb CHANGED Viewed

@@ -5,8 +5,6 @@ module Internals
     #
     class Allocations # :nodoc:all
-      # TODO Remove size
-      #
       delegate :each, :inject, :empty?, :size, :to => :@allocations
       attr_reader :total
@@ -23,7 +21,7 @@ module Internals
       end
       # Sort the allocations.
       #
-      def sort
+      def sort!
         @allocations.sort!
       end
@@ -116,7 +114,7 @@ module Internals
       end
     end
   end
 end

data/lib/picky/internals/query/combinations/memory.rb CHANGED Viewed

@@ -8,12 +8,12 @@ module Internals
     # An allocation consists of a number of combinations.
     #
     module Combinations # :nodoc:all
       # Memory Combinations contain specific methods for
       # calculating score and ids in memory.
       #
       class Memory < Base
         # Returns the result ids for the allocation.
         #
         # Sorts the ids by size and & through them in the following order (sizes):
@@ -24,7 +24,7 @@ module Internals
         # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
         #
         # Note: In the memory based version we ignore the (amount) needed hint.
-        # TODO Not ignore it?
+        #       We might use the fact to optimize the algorithm.
         #
         def ids _, _
           return [] if @combinations.empty?
@@ -43,16 +43,16 @@ module Internals
           #      this precondition for a fast algorithm is always given.
           #
           id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
           # Call the optimized C algorithm.
           #
           Performant::Array.memory_efficient_intersect id_arrays
         end
       end
     end
   end
 end

data/lib/picky/internals/query/indexes.rb CHANGED Viewed

@@ -27,6 +27,9 @@ module Internals
       # Returns a number of possible allocations for the given tokens.
       #
+      def sorted_allocations_for tokens
+      end
       def allocations_for tokens
         Allocations.new allocations_ary_for(tokens)
       end
@@ -40,21 +43,10 @@ module Internals
         #
         possible_combinations = tokens.possible_combinations_in index
-        # Optimization for ignoring tokens that allocate to nothing and
-        # can be ignored.
-        # For example in a special search, where "florian" is not
-        # mapped to any category.
-        #
-        possible_combinations.compact!
         # Generate all possible combinations.
         #
         expanded_combinations = expand_combinations_from possible_combinations
-        # If there are none, try the next allocation.
-        #
-        return [] unless expanded_combinations
         # Add the wrapped possible allocations to the ones we already have.
         #
         expanded_combinations.map! do |expanded_combination|
@@ -62,7 +54,7 @@ module Internals
         end
       end
-      # This is the core of the search engine.
+      # This is the core of the search engine. No kidding.
       #
       # Gets an array of
       # [
@@ -122,7 +114,7 @@ module Internals
         # If an element has size 0, this means one of the
         # tokens could not be allocated.
         #
-        return if possible_combinations.any?(&:empty?)
+        return [] if possible_combinations.any?(&:empty?)
         # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
         #
@@ -170,7 +162,7 @@ module Internals
           combinations
         end
-        return if possible_combinations.empty?
+        return [] if possible_combinations.empty?
         possible_combinations.shift.zip *possible_combinations
       end

data/lib/picky/internals/query/token.rb CHANGED Viewed

@@ -28,14 +28,18 @@ module Internals
       # Note: Use this in the search engine if you need a qualified
       #       and normalized token. I.e. one prepared for a search.
       #
-      def self.processed text
-        token = new text
-        token.qualify
-        token.extract_original
-        token.partialize
-        token.similarize
-        token.remove_illegals
-        token
+      def self.processed text, downcase = true
+        new(text).process downcase
+      end
+      def process downcases = true
+        qualify
+        extract_original
+        downcase if downcases
+        partialize
+        similarize
+        remove_illegals
+        symbolize
+        self
       end
       # This returns a predefined category name if the user has given one.
@@ -56,6 +60,12 @@ module Internals
         @original = @text.dup
       end
+      # Downcases the text.
+      #
+      def downcase
+        @text.downcase!
+      end
       # Partial is a conditional setter.
       #
       # It is only settable if it hasn't been set yet.
@@ -69,15 +79,19 @@ module Internals
       # If the text ends with *, partialize it. If with ", don't.
       #
+      # The latter wins. So "hello*" will not be partially searched.
+      #
       @@no_partial = /\"\Z/
       @@partial    = /\*\Z/
       def partialize
-        self.partial = false and return if @text =~ @@no_partial
-        self.partial = true if @text =~ @@partial
+        self.partial = false and return unless @text !~ @@no_partial
+        self.partial = true unless @text !~ @@partial
       end
       # If the text ends with ~ similarize it. If with ", don't.
       #
+      # The latter wins.
+      #
       @@no_similar = /\"\Z/
       @@similar    = /\~\Z/
       def similarize
@@ -96,21 +110,10 @@ module Internals
         @text.gsub! @@illegals, '' unless @text.blank?
       end
-      # Visitor for tokenizer.
       #
-      # TODO Rewrite!!!
       #
-      def tokenize_with tokenizer
-        @text = tokenizer.normalize @text
-      end
-      # TODO spec!
-      #
-      # TODO Rewrite!!
-      #
-      def tokenized tokenizer
-        tokenizer.tokenize(@text.to_s).each do |text|
-          yield text
-        end
+      def symbolize
+        @text = @text.to_sym
       end
       # Returns an array of possible combinations.
@@ -181,6 +184,12 @@ module Internals
         "#{similar?? :similarity : :index}:#{@text}"
       end
+      # If the originals & the text are the same, they are the same.
+      #
+      def == other
+        self.original == other.original && self.text == other.text
+      end
       # Displays the qualifier text and the text, joined.
       #
       # e.g. name:meier

data/lib/picky/internals/query/tokens.rb CHANGED Viewed

@@ -1,31 +1,36 @@
 # encoding: utf-8
 #
 module Internals
   #
   #
   module Query
     # This class primarily handles switching through similar token constellations.
     #
     class Tokens # :nodoc:all
       # Basically delegates to its internal tokens array.
       #
       self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
       #
       #
       def initialize tokens = []
         @tokens = tokens
       end
+      def self.processed words, downcase = true
+        new words.collect! { |word| Token.processed word, downcase }
+      end
+      # Tokenizes each token.
       #
+      # Note: Passed tokenizer needs to offer #normalize(text).
       #
       def tokenize_with tokenizer
         @tokens.each { |token| token.tokenize_with(tokenizer) }
       end
       # Generates an array in the form of
       # [
       #  [combination],                           # of token 1
@@ -33,14 +38,17 @@ module Internals
       #  [combination, combination]               # of token 3
       # ]
       #
-      # TODO If we want token behaviour defined per Query, we can
-      #      compact! here
-      #
       def possible_combinations_in type
         @tokens.inject([]) do |combinations, token|
-          combinations << token.possible_combinations_in(type)
+          possible_combinations = token.possible_combinations_in type
+          # Note: Optimization for ignoring tokens that allocate to nothing and
+          # can be ignored.
+          # For example in a special search, where "florian" is not
+          # mapped to any category.
+          #
+          possible_combinations ? combinations << possible_combinations : combinations
         end
-        # TODO compact! if ignore_unassigned_tokens
       end
       # Makes the last of the tokens partial.
@@ -57,33 +65,37 @@ module Internals
       def cap? maximum
         @tokens.size > maximum
       end
       # Rejects blank tokens.
       #
       def reject
         @tokens.reject! &:blank?
       end
       # Returns a solr query.
       #
       def to_solr_query
         @tokens.map(&:to_solr).join ' '
       end
       #
       #
       def originals
         @tokens.map(&:original)
       end
+      def == other
+        self.tokens == other.tokens
+      end
       # Just join the token original texts.
       #
       def to_s
         originals.join ' '
       end
     end
   end
 end

data/lib/picky/internals/query/weights.rb CHANGED Viewed

@@ -3,19 +3,19 @@ module Query
   # Calculates weights for certain combinations.
   #
   class Weights # :nodoc:all
     #
     #
     def initialize weights = {}
       @weights = weights
     end
     # Get the weight of an allocation.
     #
     def weight_for clustered
       @weights[clustered] || 0
     end
     # Returns an energy term E for allocation. this turns into a probability
     # by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
     # where Z is the normalizing partition function
@@ -31,24 +31,26 @@ module Query
     # Note: Cache this if more complicated weighings become necessary.
     #
     def score combinations
-      # TODO Or hide: combinations#to_weights_key
+      # TODO Or hide: combinations#to_weights_key (but it's an array, so…)
       #
       # TODO combinations could cluster uniq as combinations are added (since combinations don't change).
       #
+      # TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
+      #
       weight_for combinations.map(&:category_name).clustered_uniq_fast
     end
     # Are there any weights defined?
     #
     def empty?
       @weights.empty?
     end
     # Prints out a nice representation of the configured weights.
     #
     def to_s
       @weights.to_s
     end
   end
 end

data/lib/picky/internals/tokenizers/base.rb CHANGED Viewed

@@ -20,6 +20,7 @@ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@r
 Normalizes words:          #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
 Rejects tokens?            #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
 Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-' }
+Case sensitive?            #{@case_sensitive ? "Yes." : "-"}
         TOKENIZER
       end
@@ -125,6 +126,13 @@ Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-'
         tokens.reject! &@reject_condition
       end
+      def case_sensitive case_sensitive
+        @case_sensitive = case_sensitive
+      end
+      def downcase?
+        !@case_sensitive
+      end
       # Checks if the right argument type has been given.
       #
       def check_argument_in method, type, argument, &condition
@@ -156,6 +164,7 @@ Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-'
         normalizes_words options[:normalizes_words]                                     if options[:normalizes_words]
         removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
         substitutes_characters_with options[:substitutes_characters_with]               if options[:substitutes_characters_with]
+        case_sensitive options[:case_sensitive]                                         unless options[:case_sensitive].nil?
         # Defaults.
         #
@@ -163,37 +172,54 @@ Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-'
         reject_token_if &(options[:reject_token_if] || :blank?)
       end
-      # Hooks.
+      # Default preprocessing hook.
       #
-      # Preprocessing.
+      # Does:
+      # 1. Character substitution.
+      # 2. Remove illegal expressions.
+      # 3. Remove non-single stopwords. (Stopwords that occur with other words)
       #
-      def preprocess text; end
+      def preprocess text
+        text = substitute_characters text
+        remove_illegals text
+        # We do not remove single stopwords e.g. in the indexer for
+        # an entirely different reason than in the query tokenizer.
+        # An indexed thing with just name "UND" (a possible stopword)
+        # should not lose its name.
+        #
+        remove_non_single_stopwords text
+        text
+      end
       # Pretokenizing.
       #
-      def pretokenize text; end
-      # Postprocessing.
+      # Does:
+      # 1. Split the text into words.
+      # 2. Normalize each word.
+      #
+      def pretokenize text
+        words = split text
+        words.collect! do |word|
+          normalize_with_patterns word
+          word
+        end
+      end
+      # Basic postprocessing (overridden in both query/index tokenizers).
       #
       def process tokens
         reject tokens # Reject any tokens that don't meet criteria
         tokens
       end
-      # Converts words into real tokens.
-      #
-      def tokens_for words
-        Internals::Query::Tokens.new words.collect! { |word| token_for word }
-      end
+      # # Converts words into real tokens.
+      # #
+      # def tokens_for words
+      #   Internals::Query::Tokens.new words.collect! { |word| token_for word }
+      # end
       # Turns non-blank text into symbols.
       #
       def symbolize text
         text.blank? ? nil : text.to_sym
       end
-      # Returns a tokens object.
-      #
-      def empty_tokens
-        Internals::Query::Tokens.new
-      end
     end

data/lib/picky/internals/tokenizers/index.rb CHANGED Viewed

@@ -15,45 +15,16 @@ module Internals
         @default ||= new
       end
-      # Default indexing preprocessing hook.
-      #
-      # Does:
-      # 1. Character substitution.
-      # 2. Downcasing.
-      # 3. Remove illegal expressions.
-      # 4. Remove non-single stopwords. (Stopwords that occur with other words)
-      #
-      def preprocess text
-        text = substitute_characters text
-        text.downcase!
-        remove_illegals text
-        # we do not remove single stopwords for an entirely different
-        # reason than in the query tokenizer.
-        # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
-        #
-        remove_non_single_stopwords text
-        text
-      end
-      # Default indexing pretokenizing hook.
-      #
-      # Does:
-      # 1. Split the text into words.
-      # 2. Normalize each word.
-      #
-      def pretokenize text
-        words = split text
-        words.collect! do |word|
-          normalize_with_patterns word
-          word
-        end
-      end
       # Does not actually return a token, but a
       # symbol "token".
       #
-      def token_for text
-        symbolize text
+      def tokens_for words
+        words.collect! { |word| word.downcase! if downcase?; word.to_sym }
+      end
+      # Returns empty tokens.
+      #
+      def empty_tokens
+        []
       end
     end

data/lib/picky/internals/tokenizers/query.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Internals
   module Tokenizers
     # There are a few class methods that you can use to configure how a query works.
     #
     # removes_characters regexp
@@ -14,66 +14,46 @@ module Internals
     # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
     #
     class Query < Base
       def self.default= new_default
         @default = new_default
       end
       def self.default
         @default ||= new
       end
       attr_reader :maximum_tokens
       def initialize options = {}
         super options
         @maximum_tokens = options[:maximum_tokens] || 5
       end
-      def preprocess text
-        remove_illegals text             # Remove illegal characters
-        remove_non_single_stopwords text # remove stop words
-        text
-      end
-      # Split the text and put some back together.
-      #
-      # TODO Make the same as in indexing?
-      #
-      def pretokenize text
-        split text
-      end
       # Let each token process itself.
       # Reject, limit, and partialize tokens.
       #
+      # In querying we work with real tokens (in indexing it's just symbols).
+      #
       def process tokens
-        tokens.tokenize_with self
-        tokens.reject              # Reject any tokens that don't meet criteria
-        tokens.cap maximum_tokens  # Cut off superfluous tokens
-        tokens.partialize_last     # Set certain tokens as partial
+        tokens.reject                # Reject any tokens that don't meet criteria.
+        tokens.cap maximum_tokens    # Cut off superfluous tokens.
+        tokens.partialize_last       # Set certain tokens as partial.
         tokens
       end
-      # Called by the token.
-      #
-      # TODO Perhaps move to Normalizer?
+      # Converts words into real tokens.
       #
-      def normalize text
-        text = substitute_characters text # Substitute special characters
-        text.downcase!                    # Downcase all text
-        normalize_with_patterns text      # normalize
-        text.to_sym                       # symbolize
+      def tokens_for words
+        Internals::Query::Tokens.processed words, downcase?
       end
-      # Returns a token for a word.
-      # The basic query tokenizer uses new tokens.
+      # Returns a tokens object.
       #
-      def token_for word
-        Internals::Query::Token.processed word
+      def empty_tokens
+        Internals::Query::Tokens.new
       end
     end
   end
 end

data/lib/picky/loader.rb CHANGED Viewed

@@ -179,8 +179,6 @@ module Loader # :nodoc:all
     load_internals 'indexed/categories'
     load_internals 'indexed/index'
-    # TODO Ok here?
-    #
     load_internals 'indexed/wrappers/exact_first'
     # Bundle Wrapper

data/lib/picky/search.rb CHANGED Viewed

@@ -146,7 +146,7 @@ class Search
     # Sort the allocations.
     # (allocations are sorted according to score, highest to lowest)
     #
-    allocations.sort
+    allocations.sort!
     # Return the allocations.
     #

data/lib/tasks/server.rake CHANGED Viewed

@@ -1,17 +1,9 @@
-# TODO This file needs some love.
+# Server tasks, like starting/stopping/restarting.
 #
 namespace :server do
-  def chdir_to_root
-    Dir.chdir PICKY_ROOT
-  end
-  def current_pid
-    pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
-    pid.blank? ? nil : pid.chomp
-  end
   # desc "Start the unicorns. (Wehee!)"
+  #
   task :start => :framework do
     chdir_to_root
     daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
@@ -19,17 +11,27 @@ namespace :server do
     puts "Running \`#{command}\`."
     exec command
   end
   # desc "Stop the unicorns. (Blam!)"
+  #
   task :stop => :framework do
     `kill -QUIT #{current_pid}` if current_pid
   end
   # desc "Restart the unicorns."
   task :restart do
     Rake::Task[:"server:stop"].invoke
     sleep 5
     Rake::Task[:"server:start"].invoke
   end
+  def chdir_to_root
+    Dir.chdir PICKY_ROOT
+  end
+  def current_pid
+    pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
+    pid.blank? ? nil : pid.chomp
+  end
 end

data/lib/tasks/todo.rake CHANGED Viewed

@@ -1,5 +1,5 @@
 desc "Finds where Picky still needs input from you."
-task :todo do
+task :'to#{}do' do
   if system "grep -e 'TODO.*' -n --color=always -R *"
     puts "Picky needs a bit of input from you there. Thanks."
   else

data/spec/lib/internals/frontend_adapters/rack_spec.rb CHANGED Viewed

@@ -273,14 +273,14 @@ describe Internals::FrontendAdapters::Rack do
       end
       context 'without app' do
         context 'with url' do
-          it 'should use the 404 with default_options from the url' do
+          it 'should use the 200 with default_options from the url' do
             @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET", :path_info => /some_url/ }
             @rack_adapter.answer 'some_url'
           end
         end
         context 'without url' do
-          it 'should use the 404 with default_options' do
+          it 'should use the 200 with default_options' do
             @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET" }
             @rack_adapter.answer

data/spec/lib/internals/tokenizers/base_spec.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'spec_helper'
 describe Internals::Tokenizers::Base do
   context 'with special instance' do
-    let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
+    let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }, case_sensitive: true }
     it 'rejects tokens with length < 2' do
       tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
     end
@@ -13,7 +13,7 @@ describe Internals::Tokenizers::Base do
       tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
     end
     describe 'to_s' do
-      it 'does something' do
+      it 'spits out the right text' do
         tokenizer.to_s.should == <<-EXPECTED
 Removes characters:        -
 Stopwords:                 -
@@ -22,6 +22,7 @@ Removes chars after split: -
 Normalizes words:          -
 Rejects tokens?            Yes, see line 8 in app/application.rb
 Substitutes chars?         -
+Case sensitive?            Yes.
 EXPECTED
       end
     end
@@ -31,7 +32,7 @@ EXPECTED
     let(:tokenizer) { described_class.new }
         describe 'to_s' do
-          it 'does something' do
+          it 'spits out the right text' do
             tokenizer.to_s.should == <<-EXPECTED
 Removes characters:        -
 Stopwords:                 -
@@ -40,6 +41,7 @@ Removes chars after split: -
 Normalizes words:          -
 Rejects tokens?            -
 Substitutes chars?         -
+Case sensitive?            -
 EXPECTED
           end
         end

data/spec/lib/internals/tokenizers/query_spec.rb CHANGED Viewed

@@ -41,6 +41,7 @@ describe Internals::Tokenizers::Query do
     it 'should call methods in order' do
       text = stub :text
+      tokenizer.should_receive(:substitute_characters).once.with(text).and_return text
       tokenizer.should_receive(:remove_illegals).once.ordered.with text
       tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
@@ -57,13 +58,7 @@ describe Internals::Tokenizers::Query do
     before(:each) do
       @tokens = mock :tokens, :null_object => true
     end
-    it 'should tokenize the tokens' do
-      @tokens.should_receive(:tokenize_with).once.with tokenizer
-      tokenizer.process @tokens
-    end
     it 'should call methods on the tokens in order' do
-      @tokens.should_receive(:tokenize_with).once.ordered
       @tokens.should_receive(:reject).once.ordered
       @tokens.should_receive(:cap).once.ordered
       @tokens.should_receive(:partialize_last).once.ordered
@@ -122,13 +117,5 @@ describe Internals::Tokenizers::Query do
       tokenizer.tokenize('').map(&:to_s).should == []
     end
   end
-  describe "token_for" do
-    it "should get a preprocessed token" do
-      text = stub(:text)
-      Internals::Query::Token.should_receive(:processed).with text
-      tokenizer.token_for text
-    end
-  end
 end

data/spec/lib/query/combination_spec.rb CHANGED Viewed

@@ -64,7 +64,7 @@ describe 'Query::Combination' do
   describe 'ids' do
     it 'should call ids with the text on bundle' do
-      @bundle.should_receive(:ids).once.with 'some_text'
+      @bundle.should_receive(:ids).once.with :some_text
       @combination.ids
     end
@@ -80,7 +80,7 @@ describe 'Query::Combination' do
   describe 'weight' do
     it 'should call weight with the text on bundle' do
-      @bundle.should_receive(:weight).once.with 'some_text'
+      @bundle.should_receive(:weight).once.with :some_text
       @combination.weight
     end

data/spec/lib/query/indexes_spec.rb CHANGED Viewed

@@ -44,17 +44,17 @@ describe Internals::Query::Indexes do
     it 'can handle empty combinations' do
       combinations = [[1,2,3], [:a, :b, :c], []]
-      indexes.expand_combinations_from(combinations).should == nil
+      indexes.expand_combinations_from(combinations).should == []
     end
     it 'can handle empty combinations' do
       combinations = [[], [:a, :b, :c], []]
-      indexes.expand_combinations_from(combinations).should == nil
+      indexes.expand_combinations_from(combinations).should == []
     end
     it 'can handle totally empty combinations' do
       combinations = [[], [], []]
-      indexes.expand_combinations_from(combinations).should == nil
+      indexes.expand_combinations_from(combinations).should == []
     end
     it 'is fast in a complicated case' do
       combinations = [[1,2,3], [:a, :b, :c], [:k, :l]]
@@ -64,12 +64,12 @@ describe Internals::Query::Indexes do
     it 'is fast in a simple case' do
       combinations = [[1], [2], [3]]
-      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00055
+      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0006
     end
     it 'is very fast in a 1-empty case' do
       combinations = [[], [2], [3]]
-      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00045
+      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0005
     end
     it 'is very fast in a all-empty case' do
       combinations = [[], [], []]

data/spec/lib/query/token_spec.rb CHANGED Viewed

@@ -8,6 +8,15 @@ describe Internals::Query::Token do
     Internals::Query::Qualifiers.instance.prepare
   end
+  describe '==' do
+    it 'is equal if the originals are equal' do
+      described_class.processed('similar~').should == described_class.processed('similar~')
+    end
+    it 'is not equal if the originals are not equal' do
+      described_class.processed('similar~').should_not == described_class.processed('similar')
+    end
+  end
   describe 'next_similar_token' do
     before(:each) do
       @bundle   = stub :bundle, :similar => [:array, :of, :similar]
@@ -157,20 +166,29 @@ describe Internals::Query::Token do
   end
   describe 'processed' do
+    it 'should return a new token' do
+      described_class.processed('some text').should be_kind_of(described_class)
+    end
+    it 'generates a token' do
+      described_class.processed('some text').class.should == described_class
+    end
+  end
+  describe 'process' do
+    let(:token) { described_class.new 'any_text' }
+    it 'returns itself' do
+      token.process.should == token
+    end
     it 'should have an order' do
-      token = stub :token
-      described_class.should_receive(:new).once.and_return token
       token.should_receive(:qualify).once.ordered
       token.should_receive(:extract_original).once.ordered
+      token.should_receive(:downcase).once.ordered
       token.should_receive(:partialize).once.ordered
       token.should_receive(:similarize).once.ordered
       token.should_receive(:remove_illegals).once.ordered
+      token.should_receive(:symbolize).once.ordered
-      described_class.processed :any_text
-    end
-    it 'should return a new token' do
-      described_class.processed('some text').should be_kind_of(described_class)
+      token.process
     end
   end
@@ -352,6 +370,13 @@ describe Internals::Query::Token do
       before(:each) do
         @token = described_class.processed 'text*'
       end
+      it 'should not set partial' do
+        @token.instance_variable_set :@partial, false
+        @token.partial = true
+        @token.instance_variable_get(:@partial).should be_false
+      end
       it 'should not set partial' do
         @token.partial = false
@@ -382,20 +407,20 @@ describe Internals::Query::Token do
     it 'should remove *' do
       token = described_class.processed 'text*'
-      token.text.should == 'text'
+      token.text.should == :text
     end
     it 'should remove ~' do
       token = described_class.processed 'text~'
-      token.text.should == 'text'
+      token.text.should == :text
     end
     it 'should remove "' do
       token = described_class.processed 'text"'
-      token.text.should == 'text'
+      token.text.should == :text
     end
     it "should pass on a processed text" do
-      described_class.processed('text').text.should == 'text'
+      described_class.processed('text').text.should == :text
     end
   end

data/spec/lib/query/tokens_spec.rb CHANGED Viewed

@@ -7,6 +7,35 @@ describe Internals::Query::Tokens do
     Internals::Query::Qualifiers.instance.prepare
   end
+  describe '.processed' do
+    it 'generates processed tokens from all words' do
+      expected = [
+        Internals::Query::Token.processed('this~'),
+        Internals::Query::Token.processed('is'),
+        Internals::Query::Token.processed('a'),
+        Internals::Query::Token.processed('sp:solr'),
+        Internals::Query::Token.processed('query"')
+      ]
+      described_class.should_receive(:new).once.with expected
+      described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
+    end
+    it 'generates processed tokens from all words' do
+      expected = [
+        Internals::Query::Token.processed('this~', false),
+        Internals::Query::Token.processed('is', false),
+        Internals::Query::Token.processed('a', false),
+        Internals::Query::Token.processed('sp:solr', false),
+        Internals::Query::Token.processed('query"', false)
+      ]
+      described_class.should_receive(:new).once.with expected
+      described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
+    end
+  end
   describe 'to_solr_query' do
     context 'many tokens' do
       before(:each) do
@@ -151,6 +180,16 @@ describe Internals::Query::Tokens do
         [:combination31, :combination32, :combination33]
       ]
     end
+    it 'should work correctly' do
+      @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
+      @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return nil
+      @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
+      @tokens.possible_combinations_in(:some_index).should == [
+        [:combination11, :combination12],
+        [:combination31, :combination32, :combination33]
+      ]
+    end
   end
   describe 'to_s' do

metadata CHANGED Viewed

@@ -2,7 +2,7 @@
 name: picky
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 2.0.0
+  version: 2.1.0
 platform: ruby
 authors:
 - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-03-28 00:00:00 +02:00
+date: 2011-04-07 00:00:00 +10:00
 default_executable: picky
 dependencies:
 - !ruby/object:Gem::Dependency