RubyGems - picky - Versions diffs - 0.0.9 → 0.1.0 - Mend

picky 0.0.9 → 0.1.0

Files changed (41) hide show

data/lib/picky/application.rb +38 -37
data/lib/picky/cacher/partial/default.rb +1 -3
data/lib/picky/cacher/partial/subtoken.rb +44 -18
data/lib/picky/configuration/field.rb +6 -2
data/lib/picky/configuration/indexes.rb +16 -7
data/lib/picky/configuration/queries.rb +3 -13
data/lib/picky/extensions/symbol.rb +19 -4
data/lib/picky/generator.rb +9 -0
data/lib/picky/helpers/measuring.rb +3 -3
data/lib/picky/index/bundle.rb +5 -4
data/lib/picky/index/category.rb +14 -7
data/lib/picky/index/combined.rb +6 -1
data/lib/picky/indexers/no_source_specified_error.rb +2 -0
data/lib/picky/indexes.rb +3 -9
data/lib/picky/query/allocation.rb +1 -1
data/lib/picky/query/allocations.rb +2 -2
data/lib/picky/rack/harakiri.rb +10 -8
data/lib/picky/routing.rb +19 -21
data/lib/picky/solr/schema_generator.rb +4 -4
data/lib/picky/sources/base.rb +16 -4
data/lib/picky/sources/csv.rb +3 -0
data/lib/picky/sources/db.rb +30 -22
data/lib/picky/tokenizers/base.rb +7 -5
data/lib/picky/tokenizers/index.rb +5 -5
data/lib/picky/tokenizers/query.rb +9 -9
data/prototype_project/app/application.rb +36 -29
data/prototype_project/app/db.yml +1 -1
data/prototype_project/config.ru +3 -2
data/spec/ext/performant_spec.rb +2 -2
data/spec/lib/application_spec.rb +54 -8
data/spec/lib/cacher/partial/default_spec.rb +15 -0
data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
data/spec/lib/extensions/symbol_spec.rb +124 -30
data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
data/spec/lib/query/allocations_spec.rb +5 -5
data/spec/lib/query/combinations_spec.rb +3 -3
data/spec/lib/rack/harakiri_spec.rb +29 -0
data/spec/lib/routing_spec.rb +22 -98
data/spec/lib/tokenizers/index_spec.rb +1 -1
data/spec/specific/speed_spec.rb +4 -5
metadata +7 -3

data/prototype_project/app/db.yml CHANGED Viewed

@@ -6,5 +6,5 @@ adapter: mysql
 host: localhost
 username: root
 password:
-database: table_with_search_data
+database: books_database # Needs to contain the DB source in app/application.rb.
 encoding: utf8

data/prototype_project/config.ru CHANGED Viewed

@@ -29,8 +29,9 @@ Indexes.load_from_cache
     Rack::Harakiri.after = 50
 use Rack::Harakiri
-# Start the application and start accepting requests.
+# Finalize the application and start accepting requests.
 #
-# Note: Needs to be the same name as in app/application.rb.
+# Note: Needs to be the same constant name as in app/application.rb.
 #
+    PickySearch.finalize
 run PickySearch

data/spec/ext/performant_spec.rb CHANGED Viewed

@@ -47,7 +47,7 @@ describe Performant::Array do
       # brute force
       Benchmark.realtime do
         Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
-      end.should <= 0.001
+      end.should < 0.001
     end
     it "should be optimal for 2 small arrays of 50/10_000" do
       arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
@@ -57,7 +57,7 @@ describe Performant::Array do
         arys.inject(arys.shift.dup) do |total, ary|
           total & arys
         end
-      end.should <= 0.0015
+      end.should < 0.0015
     end
   end

data/spec/lib/application_spec.rb CHANGED Viewed

@@ -4,6 +4,52 @@ require 'spec_helper'
 describe Application do
+  describe "integration" do
+    it "should run ok" do
+      lambda {
+        # TODO Add all possible cases.
+        #
+        class TestApplication < Application
+          indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
+          indexing.contracts_expressions(/mr\.\s*|mister\s*/i, 'mr ')
+          indexing.stopwords(/\b(and|the|of|it|in|for)\b/)
+          indexing.splits_text_on(/[\s\/\-\"\&\.]/)
+          indexing.removes_characters_after_splitting(/[\.]/)
+          books_index = index Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
+                              field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
+                              field(:author),
+                              field(:isbn,  :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
+          # Note that Picky needs the following characters to
+          # pass through, as they are control characters: *"~:
+          #
+          querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
+          querying.stopwords(/\b(and|the|of|it|in|for)\b/)
+          querying.splits_text_on(/[\s\/\-\,\&]+/)
+          querying.normalizes_words([
+            [/Deoxyribonucleic Acid/i, 'DNA']
+          ])
+          querying.maximum_tokens 5
+          full = Query::Full.new books_index
+          live = Query::Live.new books_index
+          route %r{^/books/full} => full
+          route %r{^/books/live} => live
+        end
+      }.should_not raise_error
+    end
+  end
+  describe 'delegation' do
+    it "should delegate route" do
+      Application.routing.should_receive(:route).once.with :path => :query
+      Application.route :path => :query
+    end
+  end
   describe 'routing' do
     it 'should be there' do
       lambda { Application.routing }.should_not raise_error
@@ -31,30 +77,30 @@ describe Application do
   describe "indexes" do
   end
-  describe "indexes_configuration" do
+  describe "indexing" do
     it 'should be there' do
-      lambda { Application.indexes_configuration }.should_not raise_error
+      lambda { Application.indexing }.should_not raise_error
     end
     it "should return a new Routing instance" do
-      Application.indexes_configuration.should be_kind_of(Configuration::Indexes)
+      Application.indexing.should be_kind_of(Configuration::Indexes)
     end
     it "should cache the instance" do
-      Application.indexes_configuration.should == Application.indexes_configuration
+      Application.indexing.should == Application.indexing
     end
   end
   describe "queries" do
   end
-  describe "queries_configuration" do
+  describe "querying" do
     it 'should be there' do
-      lambda { Application.queries_configuration }.should_not raise_error
+      lambda { Application.querying }.should_not raise_error
     end
     it "should return a new Routing instance" do
-      Application.queries_configuration.should be_kind_of(Configuration::Queries)
+      Application.querying.should be_kind_of(Configuration::Queries)
     end
     it "should cache the instance" do
-      Application.queries_configuration.should == Application.queries_configuration
+      Application.querying.should == Application.querying
     end
   end

data/spec/lib/cacher/partial/default_spec.rb ADDED Viewed

@@ -0,0 +1,15 @@
+require 'spec_helper'
+describe Cacher::Partial::Default do
+  it "should be a subtoken" do
+    Cacher::Partial::Default.should be_kind_of(Cacher::Partial::Subtoken)
+  end
+  it "should be a the right down to" do
+    Cacher::Partial::Default.down_to.should == 1
+  end
+  it "should be a the right starting at" do
+    Cacher::Partial::Default.starting_at.should == -1
+  end
+end

data/spec/lib/cacher/partial/subtoken_spec.rb CHANGED Viewed

@@ -27,9 +27,35 @@ describe Cacher::Partial::Subtoken do
           :fla => [2]
         }
       end
+      it "should be fast" do
+        Benchmark.realtime { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
+      end
+      it "should handle duplicate ids" do
+        @cacher.generate_from( :flo => [1], :fla => [1] ).should == {
+          :flo => [1],
+          :fl => [1],
+          :f => [1],
+          :fla => [1]
+        }
+      end
     end
   end
   context 'down_to set' do
+    describe 'negative down_to' do
+      before(:each) do
+        @cacher = Cacher::Partial::Subtoken.new :down_to => -2
+      end
+      it 'should generate the right index' do
+        @cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
+          :florian => [1],
+          :floria => [1],
+          :flori => [1],
+          :flavia => [2],
+          :flavi => [2],
+          :flav => [2]
+        }
+      end
+    end
     context "large down_to" do
       before(:each) do
         @cacher = Cacher::Partial::Subtoken.new :down_to => 10
@@ -50,7 +76,7 @@ describe Cacher::Partial::Subtoken do
       end
       describe 'starting_at' do
         it 'should return the right value' do
-          @cacher.starting_at.should == 0
+          @cacher.starting_at.should == -1
         end
       end
       describe 'down_to' do
@@ -71,10 +97,36 @@ describe Cacher::Partial::Subtoken do
           }
         end
       end
+      describe "a bigger example with disjunct symbols" do
+        before(:each) do
+          abc = ('A'..'Z').to_a + ('a'..'z').to_a
+          @index = {}
+          52.times do |i|
+            @index[abc.join.to_sym] = [i]
+            character = abc.shift
+            abc << character
+          end
+        end
+        it "should be fast" do
+          Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.005
+        end
+      end
+      describe "a bigger example with almost identical symbols" do
+        before(:each) do
+          abc = ('A'..'Z').to_a + ('a'..'z').to_a
+          @index = {}
+          52.times do |i|
+            @index[(abc.join + abc[i].to_s).to_sym] = [i]
+          end
+        end
+        it "should be fast" do
+          Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.003
+        end
+      end
     end
     context 'starting_at -1' do
       before(:each) do
-        @cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -1
+        @cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -2
       end
       describe 'starting_at' do
         it 'should return the right value' do

data/spec/lib/extensions/symbol_spec.rb CHANGED Viewed

@@ -1,26 +1,120 @@
 require 'spec_helper'
 describe Symbol do
-  before(:each) do
-    GC.disable
-  end
-  after(:each) do
-    GC.enable
-    GC.start
-  end
   context 'performance' do
     include Helpers::Measuring
+    before(:each) do
+      @token = (((0..9).to_a)*10).to_s.to_sym
+      GC.disable
+    end
+    after(:each) do
+      GC.enable
+      GC.start
+    end
+    # Note: They influence each other. each_subtoken is faster though.
+    #
     it 'should be fast' do
-      s = (((0..9).to_a)*10).to_s.to_sym
       timed do
-        s.subtokens
-      end.should <= 0.003 # was 0.0019
+        @token.subtokens
+      end.should < 0.0009
+    end
+    it "should be fast" do
+      timed do
+        @token.each_subtoken do |subtoken| end
+      end.should < 0.0004
     end
   end
+  describe "each_subtoken" do
+    context 'normal symbol' do
+      before(:each) do
+        @sym = :reinke
+      end
+      context 'no downto' do
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          result = []
+          @sym.each_subtoken do |subtoken|
+            result << subtoken
+          end
+          result.should == [:reinke, :reink, :rein, :rei, :re, :r]
+        end
+      end
+      context 'downto is larger than the symbol' do
+        before(:each) do
+          @downto = 8
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          result = []
+          @sym.each_subtoken(@downto) do |subtoken|
+            result << subtoken
+          end
+          result.should == [:reinke]
+        end
+      end
+      context 'downto is exactly the same as symbol' do
+        before(:each) do
+          @downto = 6
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          result = []
+          @sym.each_subtoken(@downto) do |subtoken|
+            result << subtoken
+          end
+          result.should == [:reinke]
+        end
+      end
+      context 'downto is smaller than the length of the symbol' do
+        before(:each) do
+          @downto = 4
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          result = []
+          @sym.each_subtoken(@downto) do |subtoken|
+            result << subtoken
+          end
+          result.should == [:reinke, :reink, :rein]
+        end
+      end
+      context 'downto is 1' do
+        before(:each) do
+          @downto = 1
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          result = []
+          @sym.each_subtoken(@downto) do |subtoken|
+            result << subtoken
+          end
+          result.should == [:reinke, :reink, :rein, :rei, :re, :r]
+        end
+      end
+      context 'downto is 0' do
+        before(:each) do
+          @downto = 0
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          result = []
+          @sym.each_subtoken(@downto) do |subtoken|
+            result << subtoken
+          end
+          result.should == [:reinke, :reink, :rein, :rei, :re, :r, :'']
+        end
+      end
+      context 'downto is less than zero' do
+        before(:each) do
+          @downto = -2
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          result = []
+          @sym.each_subtoken(@downto) do |subtoken|
+            result << subtoken
+          end
+          result.should == [:reinke, :reink, :rein]
+        end
+      end
+    end
+  end
   describe "subtokens" do
     context 'normal symbol' do
       before(:each) do
@@ -63,22 +157,22 @@ describe Symbol do
           @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
         end
       end
-      # context 'downto is 0' do
-      #   before(:each) do
-      #     @downto = 0
-      #   end
-      #   it "should return an array of pieces of the original token, each 1 smaller than the other" do
-      #     @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
-      #   end
-      # end
-      # context 'downto is less than zero' do
-      #   before(:each) do
-      #     @downto = -2
-      #   end
-      #   it "should return an array of pieces of the original token, each 1 smaller than the other" do
-      #     @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r]
-      #   end
-      # end
+      context 'downto is 0' do
+        before(:each) do
+          @downto = 0
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          @sym.subtokens(@downto).should == [:reinke, :reink, :rein, :rei, :re, :r, :""]
+        end
+      end
+      context 'downto is less than zero' do
+        before(:each) do
+          @downto = -2
+        end
+        it "should return an array of pieces of the original token, each 1 smaller than the other" do
+          @sym.subtokens(@downto).should == [:reinke, :reink, :rein]
+        end
+      end
     end
   end

data/spec/lib/index/bundle_partial_generation_speed_spec.rb CHANGED Viewed

@@ -39,7 +39,7 @@ describe Index::Bundle do
       it 'should be fast' do
         Benchmark.realtime do
           @full.generate_partial
-        end.should <= 0.2
+        end.should < 0.2
       end
     end
   end

data/spec/lib/query/allocations_spec.rb CHANGED Viewed

@@ -164,7 +164,7 @@ describe Query::Allocations do
     context 'enough ids' do
       before(:each) do
         @allocation1 = stub :allocation1, :ids => [1, 2, 3]
-        @allocation2 = stub :allocation1, :ids => [4, 5, 6, 7]
+        @allocation2 = stub :allocation2, :ids => [4, 5, 6, 7]
         @allocations = Query::Allocations.new [@allocation1, @allocation2]
       end
       it 'should return one random id from the first allocations by default' do
@@ -177,7 +177,7 @@ describe Query::Allocations do
         (1..7).to_a.should include(@allocations.random_ids.first)
       end
       it 'should not contain the same id twice' do
-        100.times do
+        20.times do
           @allocations.random_ids(2).uniq.size.should_not == 1
         end
       end
@@ -185,7 +185,7 @@ describe Query::Allocations do
     context 'just one id' do
       before(:each) do
         @allocation1 = stub :allocation1, :ids => [1]
-        @allocation2 = stub :allocation1, :ids => []
+        @allocation2 = stub :allocation2, :ids => []
         @allocations = Query::Allocations.new [@allocation1, @allocation2]
       end
       it 'should return one random id from its allocations by default' do
@@ -201,7 +201,7 @@ describe Query::Allocations do
     context 'no id' do
       before(:each) do
         @allocation1 = stub :allocation1, :ids => []
-        @allocation2 = stub :allocation1, :ids => []
+        @allocation2 = stub :allocation2, :ids => []
         @allocations = Query::Allocations.new [@allocation1, @allocation2]
       end
       it 'should return one random id from its allocations by default' do
@@ -328,7 +328,7 @@ describe Query::Allocations do
         @allocations.total.should == 110
       end
       it 'should be fast' do
-        Benchmark.realtime { @allocations.process!(20, 0) }.should <= 0.0001
+        Benchmark.realtime { @allocations.process!(20, 0) }.should < 0.0001
       end
     end
   end