RubyGems - picky - Versions diffs - 2.0.0 → 2.1.0 - Mend

picky 2.0.0 → 2.1.0

Files changed (27) hide show

data/lib/picky/application.rb +0 -2
data/lib/picky/internals/configuration/index.rb +11 -17
data/lib/picky/internals/index/redis/string_hash.rb +1 -1
data/lib/picky/internals/indexed/categories.rb +7 -3
data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +9 -9
data/lib/picky/internals/indexed/wrappers/exact_first.rb +12 -12
data/lib/picky/internals/query/allocations.rb +3 -5
data/lib/picky/internals/query/combinations/memory.rb +8 -8
data/lib/picky/internals/query/indexes.rb +6 -14
data/lib/picky/internals/query/token.rb +32 -23
data/lib/picky/internals/query/tokens.rb +30 -18
data/lib/picky/internals/query/weights.rb +9 -7
data/lib/picky/internals/tokenizers/base.rb +42 -16
data/lib/picky/internals/tokenizers/index.rb +7 -36
data/lib/picky/internals/tokenizers/query.rb +20 -40
data/lib/picky/loader.rb +0 -2
data/lib/picky/search.rb +1 -1
data/lib/tasks/server.rake +16 -14
data/lib/tasks/todo.rake +1 -1
data/spec/lib/internals/frontend_adapters/rack_spec.rb +2 -2
data/spec/lib/internals/tokenizers/base_spec.rb +5 -3
data/spec/lib/internals/tokenizers/query_spec.rb +1 -14
data/spec/lib/query/combination_spec.rb +2 -2
data/spec/lib/query/indexes_spec.rb +5 -5
data/spec/lib/query/token_spec.rb +36 -11
data/spec/lib/query/tokens_spec.rb +39 -0
metadata +2 -2

data/lib/picky/application.rb CHANGED Viewed

@@ -207,8 +207,6 @@ class Application
     #
     # Warns if something is missing.
     #
-    # TODO Good specs.
-    #
     def check # :nodoc:
       warnings = []
       warnings << check_external_interface

data/lib/picky/internals/configuration/index.rb CHANGED Viewed

@@ -1,32 +1,30 @@
 module Configuration # :nodoc:all
   # Holds the configuration for a
   # index/category combination.
   #
-  # TODO Rename paths?
-  #
   class Index
     attr_reader :index, :category
     def initialize index, category
       @index       = index
       @category    = category
     end
     def index_name
       @index_name ||= index.name
     end
     def category_name
       @category_name ||= category.name
     end
     #
     #
     def index_path bundle_name, name
       "#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
     end
     # Was: search_index_file_name
     #
     def prepared_index_path
@@ -36,21 +34,17 @@ module Configuration # :nodoc:all
       @prepared_index_file ||= Internals::Index::File::Text.new prepared_index_path
       @prepared_index_file.open_for_indexing &block
     end
-    # def file_name
-    #   @file_name ||= "#{@index_name}_#{@category_name}"
-    # end
     # Identifier for internal use.
     #
     def identifier
       @identifier ||= "#{index_name}:#{category_name}"
     end
     def to_s
       "#{index_name} #{category_name}"
     end
     def self.index_root
       @index_root ||= "#{PICKY_ROOT}/index"
     end
@@ -67,7 +61,7 @@ module Configuration # :nodoc:all
     def prepare_index_directory
       FileUtils.mkdir_p index_directory
     end
   end
 end

data/lib/picky/internals/index/redis/string_hash.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Internals
         # Writes the hash into Redis.
         #
-        # TODO Could we use multi?
+        # Note: We could use multi, but it did not help.
         #
         def dump hash
           redis = backend

data/lib/picky/internals/indexed/categories.rb CHANGED Viewed

@@ -67,7 +67,7 @@ module Internals
       # for each found similar token.
       #
       def similar_possible_for token
-        # Get as many similar tokens as necessary
+        # Get as many tokens as necessary
         #
         tokens = similar_tokens_for token
         # possible combinations
@@ -105,9 +105,13 @@ module Internals
       #       (Also none of the categories matched, but the ignore unassigned
       #       tokens option is true)
       #
+      # TODO Could use Combinations class here and remove the inject.
+      #
       def possible_for token, preselected_categories = nil
-        possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
-        possible.compact!
+        possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
+          combination = category.combination_for token
+          combination ? combinations << combination : combinations
+        end
         # This is an optimization to mark tokens that are ignored.
         #
         return if ignore_unassigned_tokens && possible.empty?

data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb CHANGED Viewed

@@ -1,35 +1,35 @@
 module Indexed
   module Wrappers
     module Bundle
       # A calculation rewrites the symbol into a float.
       #
-      # TODO I really need to allow integers as keys. The code below is just not ok.
+      # TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
       #
       class Calculation < Wrapper
         #
         #
         def recalculate float
           float
         end
         #
         #
         def ids sym
           @bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
         end
         #
         #
         def weight sym
           @bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
         end
       end
     end
   end
 end

data/lib/picky/internals/indexed/wrappers/exact_first.rb CHANGED Viewed

@@ -3,18 +3,18 @@ module Internals
   # encoding: utf-8
   #
   module Indexed
     # TODO Spec
     #
     module Wrappers
       # This index combines an exact and partial index.
       # It serves to order the results such that exact hits are found first.
       #
       # TODO Need to use the right subtokens. Bake in?
       #
-      class ExactFirst < Indexed::Bundle::Memory
+      class ExactFirst < Indexed::Bundle::Base
         delegate :similar,
                  :identifier,
                  :name,
@@ -28,12 +28,12 @@ module Internals
                  :dump,
                  :load,
                  :to => :@partial
         def initialize category
           @exact   = category.exact
           @partial = category.partial
         end
         def self.wrap index_or_category
           if index_or_category.respond_to? :categories
             wrap_each_of index_or_category.categories
@@ -47,19 +47,19 @@ module Internals
         def self.wrap_each_of categories
           categories.categories.collect! { |category| new(category) }
         end
         def ids text
           @exact.ids(text) + @partial.ids(text)
         end
         def weight text
           [@exact.weight(text) || 0, @partial.weight(text) || 0].max
         end
       end
     end
   end
 end

data/lib/picky/internals/query/allocations.rb CHANGED Viewed

@@ -5,8 +5,6 @@ module Internals
     #
     class Allocations # :nodoc:all
-      # TODO Remove size
-      #
       delegate :each, :inject, :empty?, :size, :to => :@allocations
       attr_reader :total
@@ -23,7 +21,7 @@ module Internals
       end
       # Sort the allocations.
       #
-      def sort
+      def sort!
         @allocations.sort!
       end
@@ -116,7 +114,7 @@ module Internals
       end
     end
   end
 end

data/lib/picky/internals/query/combinations/memory.rb CHANGED Viewed

@@ -8,12 +8,12 @@ module Internals
     # An allocation consists of a number of combinations.
     #
     module Combinations # :nodoc:all
       # Memory Combinations contain specific methods for
       # calculating score and ids in memory.
       #
       class Memory < Base
         # Returns the result ids for the allocation.
         #
         # Sorts the ids by size and & through them in the following order (sizes):
@@ -24,7 +24,7 @@ module Internals
         # Note: Uses a C-optimized intersection routine for speed and memory efficiency.
         #
         # Note: In the memory based version we ignore the (amount) needed hint.
-        # TODO Not ignore it?
+        #       We might use the fact to optimize the algorithm.
         #
         def ids _, _
           return [] if @combinations.empty?
@@ -43,16 +43,16 @@ module Internals
           #      this precondition for a fast algorithm is always given.
           #
           id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
           # Call the optimized C algorithm.
           #
           Performant::Array.memory_efficient_intersect id_arrays
         end
       end
     end
   end
 end

data/lib/picky/internals/query/indexes.rb CHANGED Viewed

@@ -27,6 +27,9 @@ module Internals
       # Returns a number of possible allocations for the given tokens.
       #
+      def sorted_allocations_for tokens
+      end
       def allocations_for tokens
         Allocations.new allocations_ary_for(tokens)
       end
@@ -40,21 +43,10 @@ module Internals
         #
         possible_combinations = tokens.possible_combinations_in index
-        # Optimization for ignoring tokens that allocate to nothing and
-        # can be ignored.
-        # For example in a special search, where "florian" is not
-        # mapped to any category.
-        #
-        possible_combinations.compact!
         # Generate all possible combinations.
         #
         expanded_combinations = expand_combinations_from possible_combinations
-        # If there are none, try the next allocation.
-        #
-        return [] unless expanded_combinations
         # Add the wrapped possible allocations to the ones we already have.
         #
         expanded_combinations.map! do |expanded_combination|
@@ -62,7 +54,7 @@ module Internals
         end
       end
-      # This is the core of the search engine.
+      # This is the core of the search engine. No kidding.
       #
       # Gets an array of
       # [
@@ -122,7 +114,7 @@ module Internals
         # If an element has size 0, this means one of the
         # tokens could not be allocated.
         #
-        return if possible_combinations.any?(&:empty?)
+        return [] if possible_combinations.any?(&:empty?)
         # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
         #
@@ -170,7 +162,7 @@ module Internals
           combinations
         end
-        return if possible_combinations.empty?
+        return [] if possible_combinations.empty?
         possible_combinations.shift.zip *possible_combinations
       end

data/lib/picky/internals/query/token.rb CHANGED Viewed

@@ -28,14 +28,18 @@ module Internals
       # Note: Use this in the search engine if you need a qualified
       #       and normalized token. I.e. one prepared for a search.
       #
-      def self.processed text
-        token = new text
-        token.qualify
-        token.extract_original
-        token.partialize
-        token.similarize
-        token.remove_illegals
-        token
+      def self.processed text, downcase = true
+        new(text).process downcase
+      end
+      def process downcases = true
+        qualify
+        extract_original
+        downcase if downcases
+        partialize
+        similarize
+        remove_illegals
+        symbolize
+        self
       end
       # This returns a predefined category name if the user has given one.
@@ -56,6 +60,12 @@ module Internals
         @original = @text.dup
       end
+      # Downcases the text.
+      #
+      def downcase
+        @text.downcase!
+      end
       # Partial is a conditional setter.
       #
       # It is only settable if it hasn't been set yet.
@@ -69,15 +79,19 @@ module Internals
       # If the text ends with *, partialize it. If with ", don't.
       #
+      # The latter wins. So "hello*" will not be partially searched.
+      #
       @@no_partial = /\"\Z/
       @@partial    = /\*\Z/
       def partialize
-        self.partial = false and return if @text =~ @@no_partial
-        self.partial = true if @text =~ @@partial
+        self.partial = false and return unless @text !~ @@no_partial
+        self.partial = true unless @text !~ @@partial
       end
       # If the text ends with ~ similarize it. If with ", don't.
       #
+      # The latter wins.
+      #
       @@no_similar = /\"\Z/
       @@similar    = /\~\Z/
       def similarize
@@ -96,21 +110,10 @@ module Internals
         @text.gsub! @@illegals, '' unless @text.blank?
       end
-      # Visitor for tokenizer.
       #
-      # TODO Rewrite!!!
       #
-      def tokenize_with tokenizer
-        @text = tokenizer.normalize @text
-      end
-      # TODO spec!
-      #
-      # TODO Rewrite!!
-      #
-      def tokenized tokenizer
-        tokenizer.tokenize(@text.to_s).each do |text|
-          yield text
-        end
+      def symbolize
+        @text = @text.to_sym
       end
       # Returns an array of possible combinations.
@@ -181,6 +184,12 @@ module Internals
         "#{similar?? :similarity : :index}:#{@text}"
       end
+      # If the originals & the text are the same, they are the same.
+      #
+      def == other
+        self.original == other.original && self.text == other.text
+      end
       # Displays the qualifier text and the text, joined.
       #
       # e.g. name:meier

data/lib/picky/internals/query/tokens.rb CHANGED Viewed

@@ -1,31 +1,36 @@
 # encoding: utf-8
 #
 module Internals
   #
   #
   module Query
     # This class primarily handles switching through similar token constellations.
     #
     class Tokens # :nodoc:all
       # Basically delegates to its internal tokens array.
       #
       self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
       #
       #
       def initialize tokens = []
         @tokens = tokens
       end
+      def self.processed words, downcase = true
+        new words.collect! { |word| Token.processed word, downcase }
+      end
+      # Tokenizes each token.
       #
+      # Note: Passed tokenizer needs to offer #normalize(text).
       #
       def tokenize_with tokenizer
         @tokens.each { |token| token.tokenize_with(tokenizer) }
       end
       # Generates an array in the form of
       # [
       #  [combination],                           # of token 1
@@ -33,14 +38,17 @@ module Internals
       #  [combination, combination]               # of token 3
       # ]
       #
-      # TODO If we want token behaviour defined per Query, we can
-      #      compact! here
-      #
       def possible_combinations_in type
         @tokens.inject([]) do |combinations, token|
-          combinations << token.possible_combinations_in(type)
+          possible_combinations = token.possible_combinations_in type
+          # Note: Optimization for ignoring tokens that allocate to nothing and
+          # can be ignored.
+          # For example in a special search, where "florian" is not
+          # mapped to any category.
+          #
+          possible_combinations ? combinations << possible_combinations : combinations
         end
-        # TODO compact! if ignore_unassigned_tokens
       end
       # Makes the last of the tokens partial.
@@ -57,33 +65,37 @@ module Internals
       def cap? maximum
         @tokens.size > maximum
       end
       # Rejects blank tokens.
       #
       def reject
         @tokens.reject! &:blank?
       end
       # Returns a solr query.
       #
       def to_solr_query
         @tokens.map(&:to_solr).join ' '
       end
       #
       #
       def originals
         @tokens.map(&:original)
       end
+      def == other
+        self.tokens == other.tokens
+      end
       # Just join the token original texts.
       #
       def to_s
         originals.join ' '
       end
     end
   end
 end

data/lib/picky/internals/query/weights.rb CHANGED Viewed

@@ -3,19 +3,19 @@ module Query
   # Calculates weights for certain combinations.
   #
   class Weights # :nodoc:all
     #
     #
     def initialize weights = {}
       @weights = weights
     end
     # Get the weight of an allocation.
     #
     def weight_for clustered
       @weights[clustered] || 0
     end
     # Returns an energy term E for allocation. this turns into a probability
     # by P(allocation) = 1/Z * exp (-1/T * E(allocation)),
     # where Z is the normalizing partition function
@@ -31,24 +31,26 @@ module Query
     # Note: Cache this if more complicated weighings become necessary.
     #
     def score combinations
-      # TODO Or hide: combinations#to_weights_key
+      # TODO Or hide: combinations#to_weights_key (but it's an array, so…)
       #
       # TODO combinations could cluster uniq as combinations are added (since combinations don't change).
       #
+      # TODO Or it could use actual combinations? Could it? Or make combinations comparable to Symbols.
+      #
       weight_for combinations.map(&:category_name).clustered_uniq_fast
     end
     # Are there any weights defined?
     #
     def empty?
       @weights.empty?
     end
     # Prints out a nice representation of the configured weights.
     #
     def to_s
       @weights.to_s
     end
   end
 end

data/lib/picky/internals/tokenizers/base.rb CHANGED Viewed

@@ -20,6 +20,7 @@ Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@r
 Normalizes words:          #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
 Rejects tokens?            #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
 Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-' }
+Case sensitive?            #{@case_sensitive ? "Yes." : "-"}
         TOKENIZER
       end
@@ -125,6 +126,13 @@ Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-'
         tokens.reject! &@reject_condition
       end
+      def case_sensitive case_sensitive
+        @case_sensitive = case_sensitive
+      end
+      def downcase?
+        !@case_sensitive
+      end
       # Checks if the right argument type has been given.
       #
       def check_argument_in method, type, argument, &condition
@@ -156,6 +164,7 @@ Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-'
         normalizes_words options[:normalizes_words]                                     if options[:normalizes_words]
         removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
         substitutes_characters_with options[:substitutes_characters_with]               if options[:substitutes_characters_with]
+        case_sensitive options[:case_sensitive]                                         unless options[:case_sensitive].nil?
         # Defaults.
         #
@@ -163,37 +172,54 @@ Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-'
         reject_token_if &(options[:reject_token_if] || :blank?)
       end
-      # Hooks.
+      # Default preprocessing hook.
       #
-      # Preprocessing.
+      # Does:
+      # 1. Character substitution.
+      # 2. Remove illegal expressions.
+      # 3. Remove non-single stopwords. (Stopwords that occur with other words)
       #
-      def preprocess text; end
+      def preprocess text
+        text = substitute_characters text
+        remove_illegals text
+        # We do not remove single stopwords e.g. in the indexer for
+        # an entirely different reason than in the query tokenizer.
+        # An indexed thing with just name "UND" (a possible stopword)
+        # should not lose its name.
+        #
+        remove_non_single_stopwords text
+        text
+      end
       # Pretokenizing.
       #
-      def pretokenize text; end
-      # Postprocessing.
+      # Does:
+      # 1. Split the text into words.
+      # 2. Normalize each word.
+      #
+      def pretokenize text
+        words = split text
+        words.collect! do |word|
+          normalize_with_patterns word
+          word
+        end
+      end
+      # Basic postprocessing (overridden in both query/index tokenizers).
       #
       def process tokens
         reject tokens # Reject any tokens that don't meet criteria
         tokens
       end
-      # Converts words into real tokens.
-      #
-      def tokens_for words
-        Internals::Query::Tokens.new words.collect! { |word| token_for word }
-      end
+      # # Converts words into real tokens.
+      # #
+      # def tokens_for words
+      #   Internals::Query::Tokens.new words.collect! { |word| token_for word }
+      # end
       # Turns non-blank text into symbols.
       #
       def symbolize text
         text.blank? ? nil : text.to_sym
       end
-      # Returns a tokens object.
-      #
-      def empty_tokens
-        Internals::Query::Tokens.new
-      end
     end

data/lib/picky/internals/tokenizers/index.rb CHANGED Viewed

@@ -15,45 +15,16 @@ module Internals
         @default ||= new
       end
-      # Default indexing preprocessing hook.
-      #
-      # Does:
-      # 1. Character substitution.
-      # 2. Downcasing.
-      # 3. Remove illegal expressions.
-      # 4. Remove non-single stopwords. (Stopwords that occur with other words)
-      #
-      def preprocess text
-        text = substitute_characters text
-        text.downcase!
-        remove_illegals text
-        # we do not remove single stopwords for an entirely different
-        # reason than in the query tokenizer.
-        # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
-        #
-        remove_non_single_stopwords text
-        text
-      end
-      # Default indexing pretokenizing hook.
-      #
-      # Does:
-      # 1. Split the text into words.
-      # 2. Normalize each word.
-      #
-      def pretokenize text
-        words = split text
-        words.collect! do |word|
-          normalize_with_patterns word
-          word
-        end
-      end
       # Does not actually return a token, but a
       # symbol "token".
       #
-      def token_for text
-        symbolize text
+      def tokens_for words
+        words.collect! { |word| word.downcase! if downcase?; word.to_sym }
+      end
+      # Returns empty tokens.
+      #
+      def empty_tokens
+        []
       end
     end

data/lib/picky/internals/tokenizers/query.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Internals
   module Tokenizers
     # There are a few class methods that you can use to configure how a query works.
     #
     # removes_characters regexp
@@ -14,66 +14,46 @@ module Internals
     # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
     #
     class Query < Base
       def self.default= new_default
         @default = new_default
       end
       def self.default
         @default ||= new
       end
       attr_reader :maximum_tokens
       def initialize options = {}
         super options
         @maximum_tokens = options[:maximum_tokens] || 5
       end
-      def preprocess text
-        remove_illegals text             # Remove illegal characters
-        remove_non_single_stopwords text # remove stop words
-        text
-      end
-      # Split the text and put some back together.
-      #
-      # TODO Make the same as in indexing?
-      #
-      def pretokenize text
-        split text
-      end
       # Let each token process itself.
       # Reject, limit, and partialize tokens.
       #
+      # In querying we work with real tokens (in indexing it's just symbols).
+      #
       def process tokens
-        tokens.tokenize_with self
-        tokens.reject              # Reject any tokens that don't meet criteria
-        tokens.cap maximum_tokens  # Cut off superfluous tokens
-        tokens.partialize_last     # Set certain tokens as partial
+        tokens.reject                # Reject any tokens that don't meet criteria.
+        tokens.cap maximum_tokens    # Cut off superfluous tokens.
+        tokens.partialize_last       # Set certain tokens as partial.
         tokens
       end
-      # Called by the token.
-      #
-      # TODO Perhaps move to Normalizer?
+      # Converts words into real tokens.
       #
-      def normalize text
-        text = substitute_characters text # Substitute special characters
-        text.downcase!                    # Downcase all text
-        normalize_with_patterns text      # normalize
-        text.to_sym                       # symbolize
+      def tokens_for words
+        Internals::Query::Tokens.processed words, downcase?
       end
-      # Returns a token for a word.
-      # The basic query tokenizer uses new tokens.
+      # Returns a tokens object.
       #
-      def token_for word
-        Internals::Query::Token.processed word
+      def empty_tokens
+        Internals::Query::Tokens.new
       end
     end
   end
 end

data/lib/picky/loader.rb CHANGED Viewed

@@ -179,8 +179,6 @@ module Loader # :nodoc:all
     load_internals 'indexed/categories'
     load_internals 'indexed/index'
-    # TODO Ok here?
-    #
     load_internals 'indexed/wrappers/exact_first'
     # Bundle Wrapper

data/lib/picky/search.rb CHANGED Viewed

@@ -146,7 +146,7 @@ class Search
     # Sort the allocations.
     # (allocations are sorted according to score, highest to lowest)
     #
-    allocations.sort
+    allocations.sort!
     # Return the allocations.
     #

data/lib/tasks/server.rake CHANGED Viewed

@@ -1,17 +1,9 @@
-# TODO This file needs some love.
+# Server tasks, like starting/stopping/restarting.
 #
 namespace :server do
-  def chdir_to_root
-    Dir.chdir PICKY_ROOT
-  end
-  def current_pid
-    pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
-    pid.blank? ? nil : pid.chomp
-  end
   # desc "Start the unicorns. (Wehee!)"
+  #
   task :start => :framework do
     chdir_to_root
     daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
@@ -19,17 +11,27 @@ namespace :server do
     puts "Running \`#{command}\`."
     exec command
   end
   # desc "Stop the unicorns. (Blam!)"
+  #
   task :stop => :framework do
     `kill -QUIT #{current_pid}` if current_pid
   end
   # desc "Restart the unicorns."
   task :restart do
     Rake::Task[:"server:stop"].invoke
     sleep 5
     Rake::Task[:"server:start"].invoke
   end
+  def chdir_to_root
+    Dir.chdir PICKY_ROOT
+  end
+  def current_pid
+    pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
+    pid.blank? ? nil : pid.chomp
+  end
 end

data/lib/tasks/todo.rake CHANGED Viewed

@@ -1,5 +1,5 @@
 desc "Finds where Picky still needs input from you."
-task :todo do
+task :'to#{}do' do
   if system "grep -e 'TODO.*' -n --color=always -R *"
     puts "Picky needs a bit of input from you there. Thanks."
   else

data/spec/lib/internals/frontend_adapters/rack_spec.rb CHANGED Viewed

@@ -273,14 +273,14 @@ describe Internals::FrontendAdapters::Rack do
       end
       context 'without app' do
         context 'with url' do
-          it 'should use the 404 with default_options from the url' do
+          it 'should use the 200 with default_options from the url' do
             @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET", :path_info => /some_url/ }
             @rack_adapter.answer 'some_url'
           end
         end
         context 'without url' do
-          it 'should use the 404 with default_options' do
+          it 'should use the 200 with default_options' do
             @routes.should_receive(:add_route).once.with Internals::FrontendAdapters::Rack::STATUSES[200], { :request_method => "GET" }
             @rack_adapter.answer

data/spec/lib/internals/tokenizers/base_spec.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'spec_helper'
 describe Internals::Tokenizers::Base do
   context 'with special instance' do
-    let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello } }
+    let (:tokenizer) { described_class.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }, case_sensitive: true }
     it 'rejects tokens with length < 2' do
       tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
     end
@@ -13,7 +13,7 @@ describe Internals::Tokenizers::Base do
       tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
     end
     describe 'to_s' do
-      it 'does something' do
+      it 'spits out the right text' do
         tokenizer.to_s.should == <<-EXPECTED
 Removes characters:        -
 Stopwords:                 -
@@ -22,6 +22,7 @@ Removes chars after split: -
 Normalizes words:          -
 Rejects tokens?            Yes, see line 8 in app/application.rb
 Substitutes chars?         -
+Case sensitive?            Yes.
 EXPECTED
       end
     end
@@ -31,7 +32,7 @@ EXPECTED
     let(:tokenizer) { described_class.new }
         describe 'to_s' do
-          it 'does something' do
+          it 'spits out the right text' do
             tokenizer.to_s.should == <<-EXPECTED
 Removes characters:        -
 Stopwords:                 -
@@ -40,6 +41,7 @@ Removes chars after split: -
 Normalizes words:          -
 Rejects tokens?            -
 Substitutes chars?         -
+Case sensitive?            -
 EXPECTED
           end
         end

data/spec/lib/internals/tokenizers/query_spec.rb CHANGED Viewed

@@ -41,6 +41,7 @@ describe Internals::Tokenizers::Query do
     it 'should call methods in order' do
       text = stub :text
+      tokenizer.should_receive(:substitute_characters).once.with(text).and_return text
       tokenizer.should_receive(:remove_illegals).once.ordered.with text
       tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
@@ -57,13 +58,7 @@ describe Internals::Tokenizers::Query do
     before(:each) do
       @tokens = mock :tokens, :null_object => true
     end
-    it 'should tokenize the tokens' do
-      @tokens.should_receive(:tokenize_with).once.with tokenizer
-      tokenizer.process @tokens
-    end
     it 'should call methods on the tokens in order' do
-      @tokens.should_receive(:tokenize_with).once.ordered
       @tokens.should_receive(:reject).once.ordered
       @tokens.should_receive(:cap).once.ordered
       @tokens.should_receive(:partialize_last).once.ordered
@@ -122,13 +117,5 @@ describe Internals::Tokenizers::Query do
       tokenizer.tokenize('').map(&:to_s).should == []
     end
   end
-  describe "token_for" do
-    it "should get a preprocessed token" do
-      text = stub(:text)
-      Internals::Query::Token.should_receive(:processed).with text
-      tokenizer.token_for text
-    end
-  end
 end

data/spec/lib/query/combination_spec.rb CHANGED Viewed

@@ -64,7 +64,7 @@ describe 'Query::Combination' do
   describe 'ids' do
     it 'should call ids with the text on bundle' do
-      @bundle.should_receive(:ids).once.with 'some_text'
+      @bundle.should_receive(:ids).once.with :some_text
       @combination.ids
     end
@@ -80,7 +80,7 @@ describe 'Query::Combination' do
   describe 'weight' do
     it 'should call weight with the text on bundle' do
-      @bundle.should_receive(:weight).once.with 'some_text'
+      @bundle.should_receive(:weight).once.with :some_text
       @combination.weight
     end

data/spec/lib/query/indexes_spec.rb CHANGED Viewed

@@ -44,17 +44,17 @@ describe Internals::Query::Indexes do
     it 'can handle empty combinations' do
       combinations = [[1,2,3], [:a, :b, :c], []]
-      indexes.expand_combinations_from(combinations).should == nil
+      indexes.expand_combinations_from(combinations).should == []
     end
     it 'can handle empty combinations' do
       combinations = [[], [:a, :b, :c], []]
-      indexes.expand_combinations_from(combinations).should == nil
+      indexes.expand_combinations_from(combinations).should == []
     end
     it 'can handle totally empty combinations' do
       combinations = [[], [], []]
-      indexes.expand_combinations_from(combinations).should == nil
+      indexes.expand_combinations_from(combinations).should == []
     end
     it 'is fast in a complicated case' do
       combinations = [[1,2,3], [:a, :b, :c], [:k, :l]]
@@ -64,12 +64,12 @@ describe Internals::Query::Indexes do
     it 'is fast in a simple case' do
       combinations = [[1], [2], [3]]
-      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00055
+      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0006
     end
     it 'is very fast in a 1-empty case' do
       combinations = [[], [2], [3]]
-      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00045
+      performance_of { indexes.expand_combinations_from(combinations) }.should < 0.0005
     end
     it 'is very fast in a all-empty case' do
       combinations = [[], [], []]

data/spec/lib/query/token_spec.rb CHANGED Viewed

@@ -8,6 +8,15 @@ describe Internals::Query::Token do
     Internals::Query::Qualifiers.instance.prepare
   end
+  describe '==' do
+    it 'is equal if the originals are equal' do
+      described_class.processed('similar~').should == described_class.processed('similar~')
+    end
+    it 'is not equal if the originals are not equal' do
+      described_class.processed('similar~').should_not == described_class.processed('similar')
+    end
+  end
   describe 'next_similar_token' do
     before(:each) do
       @bundle   = stub :bundle, :similar => [:array, :of, :similar]
@@ -157,20 +166,29 @@ describe Internals::Query::Token do
   end
   describe 'processed' do
+    it 'should return a new token' do
+      described_class.processed('some text').should be_kind_of(described_class)
+    end
+    it 'generates a token' do
+      described_class.processed('some text').class.should == described_class
+    end
+  end
+  describe 'process' do
+    let(:token) { described_class.new 'any_text' }
+    it 'returns itself' do
+      token.process.should == token
+    end
     it 'should have an order' do
-      token = stub :token
-      described_class.should_receive(:new).once.and_return token
       token.should_receive(:qualify).once.ordered
       token.should_receive(:extract_original).once.ordered
+      token.should_receive(:downcase).once.ordered
       token.should_receive(:partialize).once.ordered
       token.should_receive(:similarize).once.ordered
       token.should_receive(:remove_illegals).once.ordered
+      token.should_receive(:symbolize).once.ordered
-      described_class.processed :any_text
-    end
-    it 'should return a new token' do
-      described_class.processed('some text').should be_kind_of(described_class)
+      token.process
     end
   end
@@ -352,6 +370,13 @@ describe Internals::Query::Token do
       before(:each) do
         @token = described_class.processed 'text*'
       end
+      it 'should not set partial' do
+        @token.instance_variable_set :@partial, false
+        @token.partial = true
+        @token.instance_variable_get(:@partial).should be_false
+      end
       it 'should not set partial' do
         @token.partial = false
@@ -382,20 +407,20 @@ describe Internals::Query::Token do
     it 'should remove *' do
       token = described_class.processed 'text*'
-      token.text.should == 'text'
+      token.text.should == :text
     end
     it 'should remove ~' do
       token = described_class.processed 'text~'
-      token.text.should == 'text'
+      token.text.should == :text
     end
     it 'should remove "' do
       token = described_class.processed 'text"'
-      token.text.should == 'text'
+      token.text.should == :text
     end
     it "should pass on a processed text" do
-      described_class.processed('text').text.should == 'text'
+      described_class.processed('text').text.should == :text
     end
   end

data/spec/lib/query/tokens_spec.rb CHANGED Viewed

@@ -7,6 +7,35 @@ describe Internals::Query::Tokens do
     Internals::Query::Qualifiers.instance.prepare
   end
+  describe '.processed' do
+    it 'generates processed tokens from all words' do
+      expected = [
+        Internals::Query::Token.processed('this~'),
+        Internals::Query::Token.processed('is'),
+        Internals::Query::Token.processed('a'),
+        Internals::Query::Token.processed('sp:solr'),
+        Internals::Query::Token.processed('query"')
+      ]
+      described_class.should_receive(:new).once.with expected
+      described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
+    end
+    it 'generates processed tokens from all words' do
+      expected = [
+        Internals::Query::Token.processed('this~', false),
+        Internals::Query::Token.processed('is', false),
+        Internals::Query::Token.processed('a', false),
+        Internals::Query::Token.processed('sp:solr', false),
+        Internals::Query::Token.processed('query"', false)
+      ]
+      described_class.should_receive(:new).once.with expected
+      described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"']
+    end
+  end
   describe 'to_solr_query' do
     context 'many tokens' do
       before(:each) do
@@ -151,6 +180,16 @@ describe Internals::Query::Tokens do
         [:combination31, :combination32, :combination33]
       ]
     end
+    it 'should work correctly' do
+      @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
+      @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return nil
+      @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
+      @tokens.possible_combinations_in(:some_index).should == [
+        [:combination11, :combination12],
+        [:combination31, :combination32, :combination33]
+      ]
+    end
   end
   describe 'to_s' do

metadata CHANGED Viewed

@@ -2,7 +2,7 @@
 name: picky
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 2.0.0
+  version: 2.1.0
 platform: ruby
 authors:
 - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-03-28 00:00:00 +02:00
+date: 2011-04-07 00:00:00 +10:00
 default_executable: picky
 dependencies:
 - !ruby/object:Gem::Dependency