RubyGems - picky - Versions diffs - 1.2.3 → 1.2.4 - Mend

picky 1.2.3 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

data/lib/picky/application.rb +1 -1
data/lib/picky/cli.rb +6 -5
data/lib/picky/extensions/array.rb +0 -2
data/lib/picky/indexed/categories.rb +1 -3
data/lib/picky/indexed/category.rb +1 -1
data/lib/picky/indexed/index.rb +3 -1
data/lib/picky/indexed/wrappers/exact_first.rb +1 -1
data/lib/picky/indexes_api.rb +1 -1
data/lib/picky/indexing/indexes.rb +1 -1
data/lib/picky/loader.rb +0 -2
data/lib/picky/query/allocations.rb +0 -1
data/lib/picky/signals.rb +1 -4
data/lib/picky/tokenizers/base.rb +13 -6
data/lib/picky/tokenizers/index.rb +0 -13
data/lib/picky/tokenizers/query.rb +1 -1
data/spec/lib/application_spec.rb +9 -2
data/spec/lib/cli_spec.rb +20 -0
data/spec/lib/configuration/index_spec.rb +5 -5
data/spec/lib/index/files_spec.rb +4 -4
data/spec/lib/indexed/bundle_spec.rb +4 -4
data/spec/lib/indexed/index_spec.rb +19 -2
data/spec/lib/indexing/indexes_spec.rb +36 -0
data/spec/lib/sources/db_spec.rb +1 -1
data/spec/lib/tokenizers/base_spec.rb +164 -138
metadata +5 -9
data/lib/picky/helpers/cache.rb +0 -25
data/lib/picky/helpers/gc.rb +0 -13
data/spec/lib/helpers/cache_spec.rb +0 -35
data/spec/lib/helpers/gc_spec.rb +0 -71

data/lib/picky/application.rb CHANGED Viewed

@@ -179,7 +179,7 @@ class Application
     # * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
     #
     # Options:
-    # * result_identifier: # TODO Rename.
+    # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
     #
     def index name, source, options = {}
       IndexAPI.new name, source, options

data/lib/picky/cli.rb CHANGED Viewed

@@ -9,10 +9,13 @@ module Picky
     # Note: By default, help is displayed. I.e. when no command is given.
     #
     def execute selector = nil, *args
-      executor_class, *params = selector && @@mapping[selector.to_sym] || Help
+      executor_class, *params = executor_class_for selector
       executor = executor_class.new
       executor.execute selector, args, params
     end
+    def executor_class_for selector = nil
+      selector && @@mapping[selector.to_sym] || [Help]
+    end
     class Base
       def usage name, params
@@ -63,12 +66,10 @@ module Picky
     # Maps commands to the other gem's command.
     #
-    # TODO Add optional params.
-    #
     @@mapping = {
-      :generate => [Generate, 'thing_to_generate: e.g. "unicorn_server"', :parameters],
+      :generate => [Generate, 'sinatra_client | unicorn_server | empty_unicorn_server', 'app_directory_name (optional)'],
       :help     => [Help],
-      :stats    => [Statistics, 'logfile_to_use: e.g. log/search.log', 'port (optional)']
+      :stats    => [Statistics, 'logfile, e.g. log/search.log', 'port (optional)']
     }
     def self.mapping
       @@mapping

data/lib/picky/extensions/array.rb CHANGED Viewed

@@ -35,8 +35,6 @@ class Array # :nodoc:all
   def sort_by_levenshtein! from
     from = from.to_s
     sort! do |this, that|
-      # TODO Cache for speed?
-      #
       Text::Levenshtein.distance(this.to_s, from) <=> Text::Levenshtein.distance(that.to_s, from)
     end
   end

data/lib/picky/indexed/categories.rb CHANGED Viewed

@@ -60,13 +60,11 @@ module Indexed
     # Returns possible Combinations for the token.
     #
-    # The categories param is an optimization.
+    # The preselected_categories param is an optimization.
     #
     # TODO Return [RemovedCategory(token, nil)]
     #      If the search is ...
     #
-    # TODO Make categories also a collection class.
-    #
     # TODO Return [] if not ok, nil if needs to be removed?
     #      Somehow unnice, but…
     #

data/lib/picky/indexed/category.rb CHANGED Viewed

@@ -65,7 +65,7 @@ module Indexed
     # Returns the right index bundle for this token.
     #
     def bundle_for token
-      token.partial? ? partial : exact
+      token.partial?? partial : exact
     end
     # The partial strategy defines whether to really use the partial index.

data/lib/picky/indexed/index.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Indexed
       @categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
     end
-    # TODO Spec. Doc.
+    # TODO Doc.
     #
     def define_category category_name, options = {}
       new_category = Category.new category_name, self, options
@@ -26,7 +26,9 @@ module Indexed
       new_category
     end
+    # Return the possible combinations for this token.
     #
+    # A combination is a tuple <token, index_bundle>.
     #
     def possible_combinations token
       categories.possible_combinations_for token

data/lib/picky/indexed/wrappers/exact_first.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 #
 module Indexed
-  # FIXME and spec
+  # TODO Spec
   #
   module Wrappers

data/lib/picky/indexes_api.rb CHANGED Viewed

@@ -29,7 +29,7 @@ class IndexesAPI # :nodoc:all
     self.index_mapping[index.name] = index
     @indexing.register index.indexing
-    @indexed.register  index.indexed # TODO Even necessary?
+    @indexed.register  index.indexed
   end
   def [] name

data/lib/picky/indexing/indexes.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module Indexing
       clear
     end
-    # TODO Spec.
+    # TODO Doc.
     #
     def clear
       @indexes = []

data/lib/picky/loader.rb CHANGED Viewed

@@ -100,8 +100,6 @@ module Loader # :nodoc:all
     # Requiring Helpers
     #
-    load_relative 'helpers/gc'
-    load_relative 'helpers/cache'
     load_relative 'helpers/measuring'
     # Character Substituters

data/lib/picky/query/allocations.rb CHANGED Viewed

@@ -63,7 +63,6 @@ module Query
     # But still TODO try for a faster one.
     #
     def random_ids amount = 1
-      # TODO can there be no @allocations???
       return [] if @allocations.empty?
       ids = @allocations.first.ids
       indexes = Array.new(ids.size) { |id| id }.sort_by { rand }

data/lib/picky/signals.rb CHANGED Viewed

@@ -1,11 +1,8 @@
 # TODO Cleanup and move to project_prototype.
 #
 # Signal.trap 'USR1' do
-#   Indexed.reload
+#   Indexes.reload
 # end
 # Signal.trap 'USR2' do
 #   Loader.reload
-# end
-# Signal.trap 'INT' do
-#   exit!
 # end

data/lib/picky/tokenizers/base.rb CHANGED Viewed

@@ -82,6 +82,17 @@ module Tokenizers # :nodoc:all
       substituter?? substituter.substitute(text) : text
     end
+    # Reject tokens after tokenizing based on the given criteria.
+    #
+    # Note: Currently only for indexing. TODO Redesign and write for both!
+    #
+    def reject_token_if &condition
+      @reject_condition = condition
+    end
+    def reject tokens
+      tokens.reject! &@reject_condition
+    end
     # Returns a number of tokens, generated from the given text.
     #
@@ -111,6 +122,7 @@ module Tokenizers # :nodoc:all
       # Defaults.
       #
       splits_text_on options[:splits_text_on] || /\s/
+      reject_token_if &(options[:reject_token_if] || :blank?)
     end
     # Hooks.
@@ -125,15 +137,10 @@ module Tokenizers # :nodoc:all
     # Postprocessing.
     #
     def process tokens
-      reject tokens    # Reject any tokens that don't meet criteria
+      reject tokens # Reject any tokens that don't meet criteria
       tokens
     end
-    # Rejects blank tokens.
-    #
-    def reject tokens
-      tokens.reject! &:blank?
-    end
     # Converts words into real tokens.
     #
     def tokens_for words

data/lib/picky/tokenizers/index.rb CHANGED Viewed

@@ -39,8 +39,6 @@ module Tokenizers
     # 1. Split the text into words.
     # 2. Normalize each word.
     #
-    # TODO Rename into wordize? Or somesuch?
-    #
     def pretokenize text
       words = split text
       words.collect! do |word|
@@ -56,16 +54,5 @@ module Tokenizers
       symbolize text
     end
-    # Rejects tokens if they are too short (or blank).
-    #
-    # Override in subclasses to redefine behaviour.
-    #
-    # TODO TODO TODO Make parametrizable! reject { |token| }
-    #
-    def reject tokens
-      tokens.reject! &:blank?
-      # tokens.reject! { |token| token.to_s.size < 2 }
-    end
   end
 end

data/lib/picky/tokenizers/query.rb CHANGED Viewed

@@ -57,7 +57,7 @@ module Tokenizers
     # TODO Perhaps move to Normalizer?
     #
     def normalize text
-      text = substitute_characters text # Substitute special characters TODO Move to subclass
+      text = substitute_characters text # Substitute special characters
       text.downcase!                    # Downcase all text
       normalize_with_patterns text      # normalize
       text.to_sym                       # symbolize

data/spec/lib/application_spec.rb CHANGED Viewed

@@ -24,13 +24,15 @@ describe Application do
     end
     it "should run ok" do
       lambda {
-        # TODO Add all possible cases.
+        # Here we just test if the API can be called ok.
         #
         class TestApplication < Application
           default_indexing removes_characters:                 /[^a-zA-Z0-9\s\/\-\"\&\.]/,
                            stopwords:                          /\b(and|the|of|it|in|for)\b/,
                            splits_text_on:                     /[\s\/\-\"\&\.]/,
-                           removes_characters_after_splitting: /[\.]/
+                           removes_characters_after_splitting: /[\.]/,
+                           normalizes_words:                   [[/\$(\w+)/i, '\1 dollars']],
+                           reject_token_if:                    lambda { |token| token.blank? || token == :amistad }
           default_querying removes_characters: /[^a-zA-Z0-9äöü\s\/\-\,\&\"\~\*\:]/,
                            stopwords:          /\b(and|the|of|it|in|for)\b/,
@@ -48,6 +50,11 @@ describe Application do
           books_index.define_category :isbn,
                                       partial: Partial::None.new # Partially searching on an ISBN makes not much sense.
+          geo_index = index :geo, Sources::CSV.new(:location, :north, :east, file: 'data/ch.csv', col_sep: ',')
+          geo_index.define_category :location
+          geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
+                   .define_ranged_category(:east1,  1, precision: 3, from: :east)
           full = Query::Full.new books_index
           live = Query::Live.new books_index

data/spec/lib/cli_spec.rb CHANGED Viewed

@@ -10,6 +10,26 @@ require File.expand_path '../../../lib/picky/cli', __FILE__
 #
 describe Picky::CLI do
+  describe 'instance' do
+    before(:each) do
+      @cli = Picky::CLI.new
+    end
+    describe 'executor_class_for' do
+      it 'returns Help by default' do
+        @cli.executor_class_for.should == [Picky::CLI::Help]
+      end
+      it 'returns Generator for generate' do
+        @cli.executor_class_for(:generate).should == [Picky::CLI::Generate, "sinatra_client | unicorn_server | empty_unicorn_server", "app_directory_name (optional)"]
+      end
+      it 'returns Help for help' do
+        @cli.executor_class_for(:help).should == [Picky::CLI::Help]
+      end
+      it 'returns Statistics for stats' do
+        @cli.executor_class_for(:stats).should == [Picky::CLI::Statistics, "logfile, e.g. log/search.log", "port (optional)"]
+      end
+    end
+  end
   describe Picky::CLI::Base do
     before(:each) do
       @executor = Picky::CLI::Base.new

data/spec/lib/configuration/index_spec.rb CHANGED Viewed

@@ -24,7 +24,7 @@ describe 'Configuration::Index' do
       @config.index_path(:some_bundle, :some_name).should_not equal(@config.index_path(:some_bundle, :some_name))
     end
     it "returns the right thing" do
-      @config.index_path(:some_bundle, :some_name).should == 'some/search/root/index/test/some_index/some_category_some_bundle_some_name'
+      @config.index_path(:some_bundle, :some_name).should == 'spec/test_directory/index/test/some_index/some_category_some_bundle_some_name'
     end
   end
@@ -50,7 +50,7 @@ describe 'Configuration::Index' do
       @config.index_root.should equal(@config.index_root)
     end
     it "returns the right thing" do
-      @config.index_root.should == 'some/search/root/index'
+      @config.index_root.should == 'spec/test_directory/index'
     end
   end
   describe "index_directory" do
@@ -58,7 +58,7 @@ describe 'Configuration::Index' do
       @config.index_directory.should equal(@config.index_directory)
     end
     it "returns the right thing" do
-      @config.index_directory.should == 'some/search/root/index/test/some_index'
+      @config.index_directory.should == 'spec/test_directory/index/test/some_index'
     end
   end
   describe "prepared_index_path" do
@@ -66,12 +66,12 @@ describe 'Configuration::Index' do
       @config.prepared_index_path.should equal(@config.prepared_index_path)
     end
     it "returns the right thing" do
-      @config.prepared_index_path.should == 'some/search/root/index/test/some_index/prepared_some_category_index'
+      @config.prepared_index_path.should == 'spec/test_directory/index/test/some_index/prepared_some_category_index'
     end
   end
   describe "prepare_index_directory" do
     it "calls the right thing" do
-      FileUtils.should_receive(:mkdir_p).once.with 'some/search/root/index/test/some_index'
+      FileUtils.should_receive(:mkdir_p).once.with 'spec/test_directory/index/test/some_index'
       @config.prepare_index_directory
     end

data/spec/lib/index/files_spec.rb CHANGED Viewed

@@ -64,7 +64,7 @@ describe Index::Files do
       it "uses the right file" do
         Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_index.json', 'r'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
         @files.load_index
       end
@@ -73,7 +73,7 @@ describe Index::Files do
       it "uses the right file" do
         Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_weights.json', 'r'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
         @files.load_weights
       end
@@ -82,7 +82,7 @@ describe Index::Files do
       it "uses the right file" do
         Marshal.stub! :load
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
         @files.load_similarity
       end
@@ -91,7 +91,7 @@ describe Index::Files do
       it "uses the right file" do
         Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_configuration.json', 'r'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
         @files.load_configuration
       end

data/spec/lib/indexed/bundle_spec.rb CHANGED Viewed

@@ -60,7 +60,7 @@ describe Indexed::Bundle do
       it "uses the right file" do
         Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_index.json', 'r'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_index.json', 'r'
         @bundle.load_index
       end
@@ -69,7 +69,7 @@ describe Indexed::Bundle do
       it "uses the right file" do
         Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_weights.json', 'r'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.json', 'r'
         @bundle.load_weights
       end
@@ -78,7 +78,7 @@ describe Indexed::Bundle do
       it "uses the right file" do
         Marshal.stub! :load
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.dump', 'r:binary'
         @bundle.load_similarity
       end
@@ -87,7 +87,7 @@ describe Indexed::Bundle do
       it "uses the right file" do
         Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_index/some_category_some_name_configuration.json', 'r'
+        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.json', 'r'
         @bundle.load_configuration
       end

data/spec/lib/indexed/index_spec.rb CHANGED Viewed

@@ -2,7 +2,24 @@ require 'spec_helper'
 describe Indexed::Index do
-  context "with categories" do
+  context 'without stubbed categories' do
+    before(:each) do
+      @index = Indexed::Index.new :some_index_name
+    end
+    describe 'define_category' do
+      it 'adds a new category to the categories' do
+        @index.define_category :some_category_name
+        @index.categories.categories.size.should == 1
+      end
+      it 'returns the new category' do
+        @index.define_category(:some_category_name).should be_kind_of(Indexed::Category)
+      end
+    end
+  end
+  context "with stubbed categories" do
     before(:each) do
       @categories = stub :categories
@@ -12,7 +29,7 @@ describe Indexed::Index do
       @index.stub! :categories => @categories
     end
     describe "load_from_cache" do
       it "delegates to each category" do
         @categories.should_receive(:load_from_cache).once.with

data/spec/lib/indexing/indexes_spec.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require 'spec_helper'
+describe Indexing::Indexes do
+  before(:each) do
+    @indexes = Indexing::Indexes.new
+  end
+  describe 'indexes' do
+    it 'exists' do
+      lambda { @indexes.indexes }.should_not raise_error
+    end
+    it 'is empty by default' do
+      @indexes.indexes.should be_empty
+    end
+  end
+  describe 'clear' do
+    it 'clears the indexes' do
+      @indexes.register :some_index
+      @indexes.clear
+      @indexes.indexes.should == []
+    end
+  end
+  describe 'register' do
+    it 'adds the given index to the indexes' do
+      @indexes.register :some_index
+      @indexes.indexes.should == [:some_index]
+    end
+  end
+end

data/spec/lib/sources/db_spec.rb CHANGED Viewed

@@ -53,7 +53,7 @@ describe Sources::DB do
     end
     context "with file" do
       it "opens the config file relative to root" do
-        File.should_receive(:open).once.with 'some/search/root/app/bla.yml'
+        File.should_receive(:open).once.with 'spec/test_directory/app/bla.yml'
         @source.configure :file => 'app/bla.yml'
       end

data/spec/lib/tokenizers/base_spec.rb CHANGED Viewed

@@ -3,167 +3,193 @@
 require 'spec_helper'
 describe Tokenizers::Base do
-  before(:each) do
-    @tokenizer = Tokenizers::Base.new
-  end
-  describe "substitute(s)_characters*" do
-    it "doesn't substitute if there is no substituter" do
-      @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
-    end
-    it "uses the substituter to replace characters" do
-      @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
-      @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
+  context 'with special instance' do
+    before(:each) do
+      @tokenizer = Tokenizers::Base.new reject_token_if: lambda { |token| token.to_s.length < 2 || token == :hello }
     end
-    it "uses the european substituter as default" do
-      @tokenizer.substitutes_characters_with
-      @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
-    end
-  end
-  describe "removes_characters_after_splitting" do
-    context "without removes_characters_after_splitting called" do
-      it "has remove_after_normalizing_illegals" do
-        lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
-      end
-      it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
-        unchanging = stub :unchanging
-        @tokenizer.remove_after_normalizing_illegals unchanging
-      end
+    it 'rejects tokens with length < 2' do
+      @tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
     end
-    context "with removes_characters_after_splitting called" do
-      before(:each) do
-        @tokenizer.removes_characters_after_splitting(/[afo]/)
-      end
-      it "has remove_after_normalizing_illegals" do
-        lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
-      end
-      it "removes illegal characters" do
-        @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
-      end
+    it 'rejects tokens that are called :hello' do
+      @tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
     end
   end
-  describe "normalizes_words" do
-    context "without normalizes_words called" do
-      it "has normalize_with_patterns" do
-        lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
-      end
-      it 'should define a method normalize_with_patterns does nothing' do
-        unchanging = stub :unchanging
-        @tokenizer.normalize_with_patterns(unchanging).should == unchanging
-      end
-    end
-    context "with normalizes_words called" do
-      before(:each) do
-        @tokenizer.normalizes_words([
-          [/st\./, 'sankt'],
-          [/stras?s?e?/, 'str']
-        ])
-      end
-      it "has normalize_with_patterns" do
-        lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
-      end
-      it "normalizes, but just the first one" do
-        @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
-      end
+  context 'with normal instance' do
+    before(:each) do
+      @tokenizer = Tokenizers::Base.new
     end
-  end
-  describe "splits_text_on" do
-    context "without splits_text_on called" do
-      it "has split" do
-        lambda { @tokenizer.split('any') }.should_not raise_error
-      end
-      it 'should define a method split that splits by default on \s' do
-        @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
+    describe 'reject_token_if' do
+      it 'rejects empty tokens by default' do
+        @tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
       end
-      it 'splits text on /\s/ by default' do
-        @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
+      it 'rejects tokens based on the given rejection criteria if set' do
+        @tokenizer.reject_token_if &:nil?
+        @tokenizer.reject(['a', nil, '', 'b']).should == ['a', '', 'b']
       end
     end
-    context "with removes_characters called" do
-      before(:each) do
-        @tokenizer.splits_text_on(/[\s\.\/]/)
+    describe "substitute(s)_characters*" do
+      it "doesn't substitute if there is no substituter" do
+        @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
       end
-      it "has split" do
-        lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
+      it "uses the substituter to replace characters" do
+        @tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
+        @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
       end
-      it "removes illegal characters" do
-        @tokenizer.split('a b/c.d').should == ['a','b','c','d']
+      it "uses the european substituter as default" do
+        @tokenizer.substitutes_characters_with
+        @tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
       end
     end
-  end
-  describe "removes_characters" do
-    context "without removes_characters called" do
-      it "has remove_illegals" do
-        lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
-      end
-      it 'should define a method remove_illegals that does nothing' do
-        unchanging = stub :unchanging
-        @tokenizer.remove_illegals unchanging
+    describe "removes_characters_after_splitting" do
+      context "without removes_characters_after_splitting called" do
+        it "has remove_after_normalizing_illegals" do
+          lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
+        end
+        it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
+          unchanging = stub :unchanging
+          @tokenizer.remove_after_normalizing_illegals unchanging
+        end
+      end
+      context "with removes_characters_after_splitting called" do
+        before(:each) do
+          @tokenizer.removes_characters_after_splitting(/[afo]/)
+        end
+        it "has remove_after_normalizing_illegals" do
+          lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
+        end
+        it "removes illegal characters" do
+          @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
+        end
       end
     end
-    context "with removes_characters called" do
-      before(:each) do
-        @tokenizer.removes_characters(/[afo]/)
-      end
-      it "has remove_illegals" do
-        lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
-      end
-      it "removes illegal characters" do
-        @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
+    describe "normalizes_words" do
+      context "without normalizes_words called" do
+        it "has normalize_with_patterns" do
+          lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
+        end
+        it 'should define a method normalize_with_patterns does nothing' do
+          unchanging = stub :unchanging
+          @tokenizer.normalize_with_patterns(unchanging).should == unchanging
+        end
+      end
+      context "with normalizes_words called" do
+        before(:each) do
+          @tokenizer.normalizes_words([
+            [/st\./, 'sankt'],
+            [/stras?s?e?/, 'str']
+          ])
+        end
+        it "has normalize_with_patterns" do
+          lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
+        end
+        it "normalizes, but just the first one" do
+          @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
+        end
       end
     end
-  end
-  describe 'stopwords' do
-    context 'without stopwords given' do
-      it 'should define a method remove_stopwords' do
-        lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
-      end
-      it 'should define a method remove_stopwords that does nothing' do
-        @tokenizer.remove_stopwords('from this text').should == 'from this text'
-      end
-      it 'should define a method remove_non_single_stopwords' do
-        lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
+    describe "splits_text_on" do
+      context "without splits_text_on called" do
+        it "has split" do
+          lambda { @tokenizer.split('any') }.should_not raise_error
+        end
+        it 'should define a method split that splits by default on \s' do
+          @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
+        end
+        it 'splits text on /\s/ by default' do
+          @tokenizer.split('this is a test').should == ['this', 'is', 'a', 'test']
+        end
+      end
+      context "with removes_characters called" do
+        before(:each) do
+          @tokenizer.splits_text_on(/[\s\.\/]/)
+        end
+        it "has split" do
+          lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
+        end
+        it "removes illegal characters" do
+          @tokenizer.split('a b/c.d').should == ['a','b','c','d']
+        end
       end
     end
-    context 'with stopwords given' do
-      before(:each) do
-        @tokenizer.stopwords(/r|e/)
-      end
-      it 'should define a method remove_stopwords' do
-        lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
-      end
-      it 'should define a method stopwords that removes stopwords' do
-        @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
-      end
-      it 'should define a method remove_non_single_stopwords' do
-        lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
-      end
-      it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
-        @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
-      end
-      it 'should define a method remove_non_single_stopwords that does not single stopwords' do
-        @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
+    describe "removes_characters" do
+      context "without removes_characters called" do
+        it "has remove_illegals" do
+          lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
+        end
+        it 'should define a method remove_illegals that does nothing' do
+          unchanging = stub :unchanging
+          @tokenizer.remove_illegals unchanging
+        end
+      end
+      context "with removes_characters called" do
+        before(:each) do
+          @tokenizer.removes_characters(/[afo]/)
+        end
+        it "has remove_illegals" do
+          lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
+        end
+        it "removes illegal characters" do
+          @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
+        end
       end
     end
-    context 'error case' do
-      before(:each) do
-        @tokenizer.stopwords(/any/)
-      end
-      it 'should not remove non-single stopwords with a star' do
-        @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
-      end
-      it 'should not remove non-single stopwords with a tilde' do
-        @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
+    describe 'stopwords' do
+      context 'without stopwords given' do
+        it 'should define a method remove_stopwords' do
+          lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
+        end
+        it 'should define a method remove_stopwords that does nothing' do
+          @tokenizer.remove_stopwords('from this text').should == 'from this text'
+        end
+        it 'should define a method remove_non_single_stopwords' do
+          lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
+        end
+      end
+      context 'with stopwords given' do
+        before(:each) do
+          @tokenizer.stopwords(/r|e/)
+        end
+        it 'should define a method remove_stopwords' do
+          lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
+        end
+        it 'should define a method stopwords that removes stopwords' do
+          @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
+        end
+        it 'should define a method remove_non_single_stopwords' do
+          lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
+        end
+        it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
+          @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
+        end
+        it 'should define a method remove_non_single_stopwords that does not single stopwords' do
+          @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
+        end
+      end
+      context 'error case' do
+        before(:each) do
+          @tokenizer.stopwords(/any/)
+        end
+        it 'should not remove non-single stopwords with a star' do
+          @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
+        end
+        it 'should not remove non-single stopwords with a tilde' do
+          @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
+        end
       end
     end
   end
 end

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 1
   - 2
-  - 3
-  version: 1.2.3
+  - 4
+  version: 1.2.4
 platform: ruby
 authors:
 - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-12-28 00:00:00 +01:00
+date: 2010-12-30 00:00:00 +01:00
 default_executable: picky
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -73,8 +73,6 @@ files:
 - lib/picky/extensions/module.rb
 - lib/picky/extensions/object.rb
 - lib/picky/extensions/symbol.rb
-- lib/picky/helpers/cache.rb
-- lib/picky/helpers/gc.rb
 - lib/picky/helpers/measuring.rb
 - lib/picky/index/bundle.rb
 - lib/picky/index/file/basic.rb
@@ -169,8 +167,6 @@ files:
 - spec/lib/extensions/module_spec.rb
 - spec/lib/extensions/object_spec.rb
 - spec/lib/extensions/symbol_spec.rb
-- spec/lib/helpers/cache_spec.rb
-- spec/lib/helpers/gc_spec.rb
 - spec/lib/helpers/measuring_spec.rb
 - spec/lib/index/bundle_spec.rb
 - spec/lib/index/file/basic_spec.rb
@@ -191,6 +187,7 @@ files:
 - spec/lib/indexing/bundle_spec.rb
 - spec/lib/indexing/category_spec.rb
 - spec/lib/indexing/index_spec.rb
+- spec/lib/indexing/indexes_spec.rb
 - spec/lib/loader_spec.rb
 - spec/lib/loggers/search_spec.rb
 - spec/lib/query/allocation_spec.rb
@@ -276,8 +273,6 @@ test_files:
 - spec/lib/extensions/module_spec.rb
 - spec/lib/extensions/object_spec.rb
 - spec/lib/extensions/symbol_spec.rb
-- spec/lib/helpers/cache_spec.rb
-- spec/lib/helpers/gc_spec.rb
 - spec/lib/helpers/measuring_spec.rb
 - spec/lib/index/bundle_spec.rb
 - spec/lib/index/file/basic_spec.rb
@@ -298,6 +293,7 @@ test_files:
 - spec/lib/indexing/bundle_spec.rb
 - spec/lib/indexing/category_spec.rb
 - spec/lib/indexing/index_spec.rb
+- spec/lib/indexing/indexes_spec.rb
 - spec/lib/loader_spec.rb
 - spec/lib/loggers/search_spec.rb
 - spec/lib/query/allocation_spec.rb

data/lib/picky/helpers/cache.rb DELETED Viewed

@@ -1,25 +0,0 @@
-# TODO Not used anymore? Remove.
-#
-module Helpers # :nodoc:all
-  module Cache
-    # This is a simple cache.
-    # The store needs to be able to answer to [] and []=.
-    #
-    def cached store, key, &block
-      # Get cached result
-      #
-      results = store[key]
-      return results if results
-      results = lambda(&block).call
-      # Store results
-      #
-      store[key] = results
-      results
-    end
-  end
-end

data/lib/picky/helpers/gc.rb DELETED Viewed

@@ -1,13 +0,0 @@
-# TODO Not used anymore? Remove.
-#
-module Helpers
-  module GC
-    def gc_disabled &block
-      ::GC.disable
-      block.call
-      ::GC.enable
-      ::GC.start
-    end
-    alias disabled gc_disabled
-  end
-end

data/spec/lib/helpers/cache_spec.rb DELETED Viewed

@@ -1,35 +0,0 @@
-require 'spec_helper'
-describe Helpers::Cache do
-  include Helpers::Cache
-  describe "#cached" do
-    attr_reader :store, :key
-    before(:each) do
-      @store = {}
-      @key = 'some key'
-    end
-    describe "not yet cached" do
-      it "should cache" do
-        store.should_receive(:[]=).once.with(@key, 'value')
-        cached @store, @key do
-          'value'
-        end
-      end
-    end
-    describe "already cached" do
-      before(:each) do
-        cached @store, @key do
-          'value'
-        end
-      end
-      it "should not cache" do
-        store.should_receive(:[]=).never
-        cached @store, @key do
-          'value'
-        end
-      end
-    end
-  end
-end

data/spec/lib/helpers/gc_spec.rb DELETED Viewed

@@ -1,71 +0,0 @@
-require 'spec_helper'
-describe Helpers::GC do
-  include Helpers::GC
-  before(:each) do
-    ::GC.stub!(:disable)
-    ::GC.stub!(:enable)
-    ::GC.stub!(:start)
-  end
-  describe "block calling" do
-    it 'should call the block' do
-      inside_block = mock :inside
-      inside_block.should_receive(:call).once
-      disabled do
-        inside_block.call
-      end
-    end
-    it 'should call the block' do
-      inside_block = mock :inside
-      inside_block.should_receive(:call).once
-      gc_disabled do
-        inside_block.call
-      end
-    end
-  end
-  describe "gc calls" do
-    after(:each) do
-      disabled {}
-    end
-    it 'should disable the garbage collector' do
-      ::GC.should_receive(:disable)
-    end
-    it 'should enable the garbage collector' do
-      ::GC.should_receive(:enable)
-    end
-    it 'should start the garbage collector' do
-      ::GC.should_receive(:start)
-    end
-    it 'should disable the gc, call the block, enable the gc and start the gc' do
-      ::GC.should_receive(:disable).ordered
-      ::GC.should_receive(:enable).ordered
-      ::GC.should_receive(:start).ordered
-    end
-  end
-  describe "gc calls" do
-    after(:each) do
-      gc_disabled {}
-    end
-    it 'should disable the garbage collector' do
-      ::GC.should_receive(:disable)
-    end
-    it 'should enable the garbage collector' do
-      ::GC.should_receive(:enable)
-    end
-    it 'should start the garbage collector' do
-      ::GC.should_receive(:start)
-    end
-    it 'should disable the gc, call the block, enable the gc and start the gc' do
-      ::GC.should_receive(:disable).ordered
-      ::GC.should_receive(:enable).ordered
-      ::GC.should_receive(:start).ordered
-    end
-  end
-end