RubyGems - picky - Versions diffs - 0.0.0 → 0.0.2 - Mend

picky 0.0.0 → 0.0.2

Files changed (161) hide show

data/bin/picky +14 -0
data/lib/bundling.rb +10 -0
data/lib/constants.rb +9 -0
data/lib/deployment.rb +212 -0
data/lib/picky/application.rb +40 -0
data/lib/picky/cacher/convenience.rb +3 -0
data/lib/picky/cacher/generator.rb +17 -0
data/lib/picky/cacher/partial/default.rb +7 -0
data/lib/picky/cacher/partial/none.rb +19 -0
data/lib/picky/cacher/partial/strategy.rb +7 -0
data/lib/picky/cacher/partial/subtoken.rb +91 -0
data/lib/picky/cacher/partial_generator.rb +15 -0
data/lib/picky/cacher/similarity/default.rb +7 -0
data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
data/lib/picky/cacher/similarity/none.rb +25 -0
data/lib/picky/cacher/similarity/strategy.rb +7 -0
data/lib/picky/cacher/similarity_generator.rb +15 -0
data/lib/picky/cacher/weights/default.rb +7 -0
data/lib/picky/cacher/weights/logarithmic.rb +39 -0
data/lib/picky/cacher/weights/strategy.rb +7 -0
data/lib/picky/cacher/weights_generator.rb +15 -0
data/lib/picky/configuration/configuration.rb +13 -0
data/lib/picky/configuration/field.rb +68 -0
data/lib/picky/configuration/indexes.rb +60 -0
data/lib/picky/configuration/queries.rb +32 -0
data/lib/picky/configuration/type.rb +52 -0
data/lib/picky/cores.rb +101 -0
data/lib/picky/db/configuration.rb +23 -0
data/lib/picky/ext/ruby19/extconf.rb +7 -0
data/lib/picky/ext/ruby19/performant.c +339 -0
data/lib/picky/extensions/array.rb +45 -0
data/lib/picky/extensions/hash.rb +11 -0
data/lib/picky/extensions/module.rb +15 -0
data/lib/picky/extensions/symbol.rb +18 -0
data/lib/picky/generator.rb +156 -0
data/lib/picky/helpers/cache.rb +23 -0
data/lib/picky/helpers/gc.rb +11 -0
data/lib/picky/helpers/measuring.rb +45 -0
data/lib/picky/helpers/search.rb +27 -0
data/lib/picky/index/bundle.rb +328 -0
data/lib/picky/index/category.rb +109 -0
data/lib/picky/index/combined.rb +38 -0
data/lib/picky/index/type.rb +30 -0
data/lib/picky/indexers/base.rb +77 -0
data/lib/picky/indexers/default.rb +3 -0
data/lib/picky/indexers/field.rb +13 -0
data/lib/picky/indexers/no_source_specified_error.rb +5 -0
data/lib/picky/indexers/solr.rb +60 -0
data/lib/picky/indexes.rb +180 -0
data/lib/picky/initializers/ext.rb +6 -0
data/lib/picky/initializers/mysql.rb +22 -0
data/lib/picky/loader.rb +287 -0
data/lib/picky/loggers/search.rb +19 -0
data/lib/picky/performant/array.rb +23 -0
data/lib/picky/query/allocation.rb +82 -0
data/lib/picky/query/allocations.rb +131 -0
data/lib/picky/query/base.rb +124 -0
data/lib/picky/query/combination.rb +69 -0
data/lib/picky/query/combinations.rb +106 -0
data/lib/picky/query/combinator.rb +92 -0
data/lib/picky/query/full.rb +15 -0
data/lib/picky/query/live.rb +22 -0
data/lib/picky/query/qualifiers.rb +73 -0
data/lib/picky/query/solr.rb +77 -0
data/lib/picky/query/token.rb +215 -0
data/lib/picky/query/tokens.rb +102 -0
data/lib/picky/query/weigher.rb +159 -0
data/lib/picky/query/weights.rb +55 -0
data/lib/picky/rack/harakiri.rb +37 -0
data/lib/picky/results/base.rb +103 -0
data/lib/picky/results/full.rb +19 -0
data/lib/picky/results/live.rb +19 -0
data/lib/picky/routing.rb +165 -0
data/lib/picky/signals.rb +11 -0
data/lib/picky/solr/schema_generator.rb +73 -0
data/lib/picky/sources/base.rb +19 -0
data/lib/picky/sources/csv.rb +30 -0
data/lib/picky/sources/db.rb +77 -0
data/lib/picky/tokenizers/base.rb +130 -0
data/lib/picky/tokenizers/default.rb +3 -0
data/lib/picky/tokenizers/index.rb +73 -0
data/lib/picky/tokenizers/query.rb +70 -0
data/lib/picky/umlaut_substituter.rb +21 -0
data/lib/picky-tasks.rb +6 -0
data/lib/picky.rb +18 -0
data/lib/tasks/application.rake +5 -0
data/lib/tasks/cache.rake +53 -0
data/lib/tasks/framework.rake +4 -0
data/lib/tasks/index.rake +29 -0
data/lib/tasks/server.rake +48 -0
data/lib/tasks/shortcuts.rake +13 -0
data/lib/tasks/solr.rake +36 -0
data/lib/tasks/spec.rake +11 -0
data/lib/tasks/statistics.rake +13 -0
data/lib/tasks/try.rake +29 -0
data/prototype_project/Gemfile +23 -0
data/prototype_project/Rakefile +1 -0
data/prototype_project/app/README +6 -0
data/prototype_project/app/application.rb +50 -0
data/prototype_project/app/application.ru +29 -0
data/prototype_project/app/db.yml +10 -0
data/prototype_project/app/logging.rb +20 -0
data/prototype_project/app/unicorn.ru +10 -0
data/prototype_project/log/README +1 -0
data/prototype_project/script/console +34 -0
data/prototype_project/tmp/README +0 -0
data/prototype_project/tmp/pids/README +0 -0
data/spec/ext/performant_spec.rb +64 -0
data/spec/lib/application_spec.rb +61 -0
data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
data/spec/lib/cacher/partial_generator_spec.rb +35 -0
data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
data/spec/lib/cacher/similarity/none_spec.rb +23 -0
data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
data/spec/lib/cacher/weights_generator_spec.rb +21 -0
data/spec/lib/configuration/configuration_spec.rb +38 -0
data/spec/lib/configuration/type_spec.rb +49 -0
data/spec/lib/configuration_spec.rb +8 -0
data/spec/lib/cores_spec.rb +65 -0
data/spec/lib/extensions/array_spec.rb +37 -0
data/spec/lib/extensions/hash_spec.rb +11 -0
data/spec/lib/extensions/module_spec.rb +27 -0
data/spec/lib/extensions/symbol_spec.rb +85 -0
data/spec/lib/generator_spec.rb +135 -0
data/spec/lib/helpers/cache_spec.rb +35 -0
data/spec/lib/helpers/gc_spec.rb +71 -0
data/spec/lib/helpers/measuring_spec.rb +18 -0
data/spec/lib/helpers/search_spec.rb +50 -0
data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
data/spec/lib/index/bundle_spec.rb +260 -0
data/spec/lib/index/category_spec.rb +203 -0
data/spec/lib/indexers/base_spec.rb +73 -0
data/spec/lib/indexers/field_spec.rb +20 -0
data/spec/lib/loader_spec.rb +48 -0
data/spec/lib/loggers/search_spec.rb +19 -0
data/spec/lib/performant/array_spec.rb +13 -0
data/spec/lib/query/allocation_spec.rb +194 -0
data/spec/lib/query/allocations_spec.rb +336 -0
data/spec/lib/query/base_spec.rb +104 -0
data/spec/lib/query/combination_spec.rb +90 -0
data/spec/lib/query/combinations_spec.rb +83 -0
data/spec/lib/query/combinator_spec.rb +112 -0
data/spec/lib/query/full_spec.rb +22 -0
data/spec/lib/query/live_spec.rb +61 -0
data/spec/lib/query/qualifiers_spec.rb +31 -0
data/spec/lib/query/solr_spec.rb +51 -0
data/spec/lib/query/token_spec.rb +297 -0
data/spec/lib/query/tokens_spec.rb +189 -0
data/spec/lib/query/weights_spec.rb +47 -0
data/spec/lib/results/base_spec.rb +233 -0
data/spec/lib/routing_spec.rb +318 -0
data/spec/lib/solr/schema_generator_spec.rb +42 -0
data/spec/lib/sources/db_spec.rb +91 -0
data/spec/lib/tokenizers/base_spec.rb +61 -0
data/spec/lib/tokenizers/index_spec.rb +51 -0
data/spec/lib/tokenizers/query_spec.rb +105 -0
data/spec/lib/umlaut_substituter_spec.rb +84 -0
data/spec/specific/speed_spec.rb +55 -0
metadata +371 -15
data/README.textile +0 -9

data/spec/lib/tokenizers/base_spec.rb ADDED Viewed

@@ -0,0 +1,61 @@
+# coding: utf-8
+require 'spec_helper'
+describe Tokenizers::Base do
+  before(:each) do
+    @tokenizer = Tokenizers::Base.new
+  end
+  context 'stopwords' do
+    describe '.stopwords' do
+      context 'without stopwords given' do
+        it 'should define a method remove_stopwords' do
+          lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
+        end
+        it 'should define a method remove_stopwords that does nothing' do
+          @tokenizer.remove_stopwords('from this text').should == nil
+        end
+        it 'should not define a method remove_non_single_stopwords' do
+          lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should raise_error(NoMethodError)
+        end
+      end
+      context 'with stopwords given' do
+        before(:each) do
+          class << @tokenizer
+            stopwords(/r|e/)
+          end
+        end
+        it 'should define a method remove_stopwords' do
+          lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
+        end
+        it 'should define a method stopwords that removes stopwords' do
+          @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
+        end
+        it 'should define a method remove_non_single_stopwords' do
+          lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
+        end
+        it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
+          @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
+        end
+        it 'should define a method remove_non_single_stopwords that does not single stopwords' do
+          @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
+        end
+      end
+      context 'error case' do
+        before(:each) do
+          class << @tokenizer
+            stopwords(/any/)
+          end
+        end
+        it 'should not remove non-single stopwords with a star' do
+          @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
+        end
+        it 'should not remove non-single stopwords with a tilde' do
+          @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
+        end
+      end
+    end
+  end
+end

data/spec/lib/tokenizers/index_spec.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# encoding: utf-8
+#
+require 'spec_helper'
+# TODO CLEAN UP.
+#
+describe Tokenizers::Index do
+  before(:each) do
+    @tokenizer = Tokenizers::Index.new
+  end
+  describe "remove_illegal_characters" do
+    it "should not remove ' from a query by default" do
+      @tokenizer.remove_illegals("Lugi's").should == "Lugi's"
+    end
+  end
+  describe "reject!" do
+    it "should reject tokens if blank" do
+      t1 = stub(:token, :to_s => '')
+      t2 = stub(:token, :to_s => 'not blank')
+      t3 = stub(:token, :to_s => '')
+      @tokenizer.reject([t1, t2, t3]).should == [t2]
+    end
+  end
+  describe "tokenize" do
+    describe "normalizing" do
+      def self.it_should_normalize_token(text, expected)
+        it "should handle the #{text} case" do
+          @tokenizer.tokenize(text).to_a.should == [expected].compact
+        end
+      end
+      # defaults
+      it_should_normalize_token 'it_should_not_normalize_by_default', :it_should_not_normalize_by_default
+    end
+    describe "tokenizing" do
+      def self.it_should_tokenize_token(text, expected)
+        it "should handle the #{text} case" do
+          @tokenizer.tokenize(text).to_a.should == expected
+        end
+      end
+      # defaults
+      it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
+      it_should_tokenize_token 'und', [:und]
+    end
+  end
+end

data/spec/lib/tokenizers/query_spec.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# coding: utf-8
+require 'spec_helper'
+describe Tokenizers::Query do
+  before(:each) do
+    @tokenizer = Tokenizers::Query.new
+  end
+  describe 'preprocess' do
+    it 'should call methods in order' do
+      text = stub :text
+      @tokenizer.should_receive(:remove_illegals).once.ordered.with text
+      @tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
+      @tokenizer.should_receive(:contract).once.ordered
+      @tokenizer.preprocess text
+    end
+    it 'should return the text unchanged by default' do
+      text = "some text"
+      @tokenizer.preprocess(text).should == text
+    end
+  end
+  describe 'process' do
+    before(:each) do
+      @tokens = mock :tokens, :null_object => true
+    end
+    it 'should tokenize the tokens' do
+      @tokens.should_receive(:tokenize_with).once.with @tokenizer
+      @tokenizer.process @tokens
+    end
+    it 'should call methods on the tokens in order' do
+      @tokens.should_receive(:tokenize_with).once.ordered
+      @tokens.should_receive(:reject).once.ordered
+      @tokens.should_receive(:cap).once.ordered
+      @tokens.should_receive(:partialize_last).once.ordered
+      @tokenizer.process @tokens
+    end
+    it 'should return the tokens' do
+      @tokenizer.process(@tokens).should == @tokens
+    end
+  end
+  describe 'pretokenize' do
+    def self.it_should_pretokenize text, expected
+      it "should pretokenize #{text} as #{expected}" do
+        @tokenizer.pretokenize(text).should == expected
+      end
+    end
+    it_should_pretokenize 'test miau test', ['test', 'miau', 'test']
+  end
+  describe "tokenizing" do
+    def self.it_should_tokenize_token(text, expected)
+      it "should handle the #{text} case" do
+        @tokenizer.tokenize(text).map(&:text).should == expected
+      end
+    end
+    it_should_tokenize_token 'simple tokenizing on \s', [:simple, :tokenizing, :on, :'\s']
+  end
+  describe 'normalize_with_patterns' do
+    def self.it_should_pattern_normalize original, expected
+      it "should normalize #{original} with pattern into #{expected}" do
+        @tokenizer.normalize_with_patterns(original).should == expected
+      end
+    end
+    it_should_pattern_normalize 'no pattern normalization', 'no pattern normalization'
+  end
+  describe 'reject' do
+    it 'should reject blank tokens' do
+      @tokenizer.reject(["some token answering to blank?", nil, nil]).should == ["some token answering to blank?"]
+    end
+  end
+  describe "last token" do
+    it "should be partial" do
+      @tokenizer.tokenize("First Second Third Last").last.instance_variable_get(:@partial).should be_true
+    end
+  end
+  describe ".tokenize" do
+    it "should return an Array of tokens" do
+      @tokenizer.tokenize('test test').to_a.should be_instance_of(Array)
+    end
+    it "should return an empty tokenized query if the query string is blank or empty" do
+      @tokenizer.tokenize('').map(&:to_s).should == []
+    end
+  end
+  describe "token_for" do
+    it "should get a preprocessed token" do
+      text = stub(:text)
+      Query::Token.should_receive(:processed).with text
+      @tokenizer.token_for text
+    end
+  end
+end

data/spec/lib/umlaut_substituter_spec.rb ADDED Viewed

@@ -0,0 +1,84 @@
+# encoding: utf-8
+require 'spec_helper'
+describe UmlautSubstituter do
+  include UmlautSubstituter
+  # A bit of metaprogramming to help with the myriads of its.
+  #
+  def self.it_should_substitute(special_character, normal_character)
+    it "should substitute #{special_character} with #{normal_character}" do
+      substitute_umlauts(special_character).should == normal_character
+    end
+  end
+  def self.it_should_not_substitute(special_character)
+    it "should not substitute #{special_character}" do
+      substitute_umlauts(special_character).should == special_character
+    end
+  end
+  describe "normal characters" do
+    it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
+  end
+  describe "situations" do
+    it_should_substitute 'Peter Müller', 'Peter Mueller'
+    it_should_substitute 'Lüchinger', 'Luechinger'
+    # it_should_substitute 'LÜCHINGER', 'LUECHINGER'
+  end
+  describe "umlauts" do
+    it_should_substitute 'ä', 'ae'
+    it_should_substitute 'Ä', 'Ae'
+    it_should_substitute 'ë', 'e'
+    it_should_substitute 'Ë', 'E'
+    it_should_substitute 'ï', 'i'
+    it_should_substitute 'Ï', 'I'
+    it_should_substitute 'ö', 'oe'
+    it_should_substitute 'Ö', 'Oe'
+    it_should_substitute 'ü', 'ue'
+    it_should_substitute 'Ü', 'Ue'
+  end
+  describe "acute" do
+    it_should_substitute 'é', 'e'
+    it_should_substitute 'É', 'E'
+  end
+  describe "grave" do
+    it_should_substitute 'à', 'a'
+    it_should_substitute 'À', 'A'
+    it_should_substitute 'è', 'e'
+    it_should_substitute 'È', 'E'
+    it_should_substitute 'ì', 'i'
+    it_should_substitute 'ò', 'o'
+  end
+  describe "circonflex" do
+    it_should_substitute 'â', 'a'
+    it_should_substitute 'ê', 'e'
+    it_should_substitute 'Ê', 'E'
+    it_should_substitute 'î', 'i'
+    it_should_substitute 'Î', 'I'
+    it_should_substitute 'ô', 'o'
+    it_should_substitute 'Ô', 'O'
+    it_should_substitute 'û', 'u'
+  end
+  describe "cedilla" do
+    it_should_substitute 'ç', 'c'
+    it_should_substitute 'Ç', 'C'
+  end
+  describe "ligatures" do
+    it_should_substitute 'ß', 'ss'
+    # it_should_substitute 'Æ', 'AE'
+  end
+  describe "norse" do
+    # it_should_substitute 'ø', 'o'
+    it_should_substitute 'å', 'a'
+    it_should_substitute 'Å', 'A'
+  end
+end

data/spec/specific/speed_spec.rb ADDED Viewed

@@ -0,0 +1,55 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+describe "Speccing Ruby for speed" do
+  describe "various versions for allocation id concatenating" do
+    before(:each) do
+      @allocs = [:hello, :speed, :test]
+      @ids = {
+        :hello => (1..100_000).to_a,
+        :speed => (1..5_000).to_a,
+        :test => (1..1_000).to_a
+      }
+      GC.disable
+    end
+    after(:each) do
+      GC.enable
+      GC.start # start the GC to minimize the chance that it will run again during the speed spec
+    end
+    describe "+" do
+      it "should be fast" do
+        Benchmark.realtime do
+          @allocs.inject([]) do |total, alloc|
+            total + @ids[alloc]
+          end
+        end.should <= 0.0025
+      end
+    end
+    describe "map and flatten!(1)" do
+      it "should be fast" do
+        Benchmark.realtime do
+          @allocs.map { |alloc| @ids[alloc] }.flatten!(1)
+        end.should <= 0.02
+      end
+    end
+    describe "<< and flatten!(1)" do
+      it "should be fast" do
+        Benchmark.realtime do
+          @allocs.inject([]) do |total, alloc|
+            total << @ids[alloc]
+          end.flatten!(1)
+        end.should <= 0.02
+      end
+    end
+    describe "<< and flatten!" do
+      it "should be fast" do
+        Benchmark.realtime do
+          @allocs.inject([]) do |total, alloc|
+            total << @ids[alloc]
+          end.flatten!
+        end.should <= 0.02
+      end
+    end
+  end
+end