RubyGems - picky - Versions diffs - 0.3.0 → 0.9.0 - Mend

picky 0.3.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

data/lib/picky/application.rb +2 -2
data/lib/picky/cacher/partial/default.rb +1 -1
data/lib/picky/configuration/field.rb +8 -10
data/lib/picky/configuration/indexes.rb +6 -6
data/lib/picky/configuration/queries.rb +4 -3
data/lib/picky/cores.rb +2 -2
data/lib/picky/extensions/array.rb +2 -12
data/lib/picky/generator.rb +27 -4
data/lib/picky/index/bundle.rb +5 -41
data/lib/picky/index/bundle_checker.rb +58 -0
data/lib/picky/index/type.rb +4 -1
data/lib/picky/index/wrappers/exact_first.rb +57 -0
data/lib/picky/indexes.rb +12 -19
data/lib/picky/loader.rb +7 -8
data/lib/picky/query/allocation.rb +1 -1
data/lib/picky/query/combinations.rb +9 -6
data/lib/picky/query/combinator.rb +11 -5
data/lib/picky/rack/harakiri.rb +1 -1
data/lib/picky/results/base.rb +4 -12
data/lib/picky/results/live.rb +0 -6
data/lib/picky/routing.rb +17 -17
data/lib/picky/sources/csv.rb +1 -2
data/lib/picky/sources/db.rb +0 -1
data/lib/picky/sources/delicious.rb +41 -0
data/lib/picky/tokenizers/base.rb +52 -43
data/lib/picky/tokenizers/default/index.rb +7 -0
data/lib/picky/tokenizers/default/query.rb +7 -0
data/lib/picky/tokenizers/index.rb +0 -9
data/lib/picky/tokenizers/query.rb +0 -9
data/lib/tasks/application.rake +1 -1
data/lib/tasks/cache.rake +41 -48
data/lib/tasks/framework.rake +1 -1
data/lib/tasks/index.rake +22 -12
data/lib/tasks/server.rake +3 -3
data/lib/tasks/shortcuts.rake +9 -2
data/lib/tasks/statistics.rake +8 -8
data/lib/tasks/try.rake +4 -2
data/project_prototype/Gemfile +1 -1
data/project_prototype/app/application.rb +7 -3
data/spec/lib/cacher/partial/default_spec.rb +1 -1
data/spec/lib/cacher/partial/none_spec.rb +12 -0
data/spec/lib/cacher/partial/subtoken_spec.rb +29 -1
data/spec/lib/configuration/field_spec.rb +162 -3
data/spec/lib/configuration/indexes_spec.rb +150 -0
data/spec/lib/cores_spec.rb +43 -0
data/spec/lib/extensions/module_spec.rb +27 -16
data/spec/lib/generator_spec.rb +3 -3
data/spec/lib/index/bundle_checker_spec.rb +67 -0
data/spec/lib/index/bundle_spec.rb +0 -50
data/spec/lib/index/type_spec.rb +47 -0
data/spec/lib/index/wrappers/exact_first_spec.rb +95 -0
data/spec/lib/indexers/base_spec.rb +18 -2
data/spec/lib/loader_spec.rb +21 -1
data/spec/lib/query/allocation_spec.rb +25 -0
data/spec/lib/query/base_spec.rb +37 -0
data/spec/lib/query/combination_spec.rb +10 -1
data/spec/lib/query/combinations_spec.rb +82 -3
data/spec/lib/query/combinator_spec.rb +45 -0
data/spec/lib/query/token_spec.rb +24 -0
data/spec/lib/rack/harakiri_spec.rb +28 -0
data/spec/lib/results/base_spec.rb +24 -0
data/spec/lib/results/live_spec.rb +15 -0
data/spec/lib/routing_spec.rb +5 -0
data/spec/lib/sources/db_spec.rb +31 -1
data/spec/lib/sources/delicious_spec.rb +75 -0
data/spec/lib/tokenizers/base_spec.rb +160 -49
data/spec/lib/tokenizers/default/index_spec.rb +11 -0
data/spec/lib/tokenizers/default/query_spec.rb +11 -0
metadata +26 -5
data/lib/picky/index/combined.rb +0 -45
data/lib/picky/tokenizers/default.rb +0 -3

data/spec/lib/tokenizers/base_spec.rb CHANGED Viewed

@@ -6,56 +6,167 @@ describe Tokenizers::Base do
   before(:each) do
     @tokenizer = Tokenizers::Base.new
   end
-  context 'stopwords' do
-    describe '.stopwords' do
-      context 'without stopwords given' do
-        it 'should define a method remove_stopwords' do
-          lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
-        end
-        it 'should define a method remove_stopwords that does nothing' do
-          @tokenizer.remove_stopwords('from this text').should == nil
-        end
-        it 'should not define a method remove_non_single_stopwords' do
-          lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should raise_error(NoMethodError)
-        end
-      end
-      context 'with stopwords given' do
-        before(:each) do
-          class << @tokenizer
-            stopwords(/r|e/)
-          end
-        end
-        it 'should define a method remove_stopwords' do
-          lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
-        end
-        it 'should define a method stopwords that removes stopwords' do
-          @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
-        end
-        it 'should define a method remove_non_single_stopwords' do
-          lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
-        end
-        it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
-          @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
-        end
-        it 'should define a method remove_non_single_stopwords that does not single stopwords' do
-          @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
-        end
-      end
-      context 'error case' do
-        before(:each) do
-          class << @tokenizer
-            stopwords(/any/)
-          end
-        end
-        it 'should not remove non-single stopwords with a star' do
-          @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
-        end
-        it 'should not remove non-single stopwords with a tilde' do
-          @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
-        end
+  describe "removes_characters_after_splitting" do
+    context "without removes_characters_after_splitting called" do
+      it "has remove_after_normalizing_illegals" do
+        lambda { @tokenizer.remove_after_normalizing_illegals('any') }.should_not raise_error
+      end
+      it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
+        unchanging = stub :unchanging
+        @tokenizer.remove_after_normalizing_illegals unchanging
+      end
+    end
+    context "with removes_characters_after_splitting called" do
+      before(:each) do
+        @tokenizer.removes_characters_after_splitting(/[afo]/)
+      end
+      it "has remove_after_normalizing_illegals" do
+        lambda { @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop') }.should_not raise_error
+      end
+      it "removes illegal characters" do
+        @tokenizer.remove_after_normalizing_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
+      end
+    end
+  end
+  describe "normalizes_words" do
+    context "without normalizes_words called" do
+      it "has normalize_with_patterns" do
+        lambda { @tokenizer.normalize_with_patterns('any') }.should_not raise_error
+      end
+      it 'should define a method normalize_with_patterns does nothing' do
+        unchanging = stub :unchanging
+        @tokenizer.normalize_with_patterns(unchanging).should == unchanging
+      end
+    end
+    context "with normalizes_words called" do
+      before(:each) do
+        @tokenizer.normalizes_words([
+          [/st\./, 'sankt'],
+          [/stras?s?e?/, 'str']
+        ])
+      end
+      it "has normalize_with_patterns" do
+        lambda { @tokenizer.normalize_with_patterns('a b/c.d') }.should_not raise_error
+      end
+      it "normalizes, but just the first one" do
+        @tokenizer.normalize_with_patterns('st. wegstrasse').should == 'sankt wegstrasse'
+      end
+    end
+  end
+  describe "splits_text_on" do
+    context "without splits_text_on called" do
+      it "has split" do
+        lambda { @tokenizer.split('any') }.should_not raise_error
+      end
+      it 'should define a method split that splits by default on \s' do
+        @tokenizer.split('a b/c.d').should == ['a', 'b/c.d']
+      end
+    end
+    context "with removes_characters called" do
+      before(:each) do
+        @tokenizer.splits_text_on(/[\s\.\/]/)
+      end
+      it "has split" do
+        lambda { @tokenizer.split('a b/c.d') }.should_not raise_error
+      end
+      it "removes illegal characters" do
+        @tokenizer.split('a b/c.d').should == ['a','b','c','d']
+      end
+    end
+  end
+  describe "removes_characters" do
+    context "without removes_characters called" do
+      it "has remove_illegals" do
+        lambda { @tokenizer.remove_illegals('any') }.should_not raise_error
+      end
+      it 'should define a method remove_illegals that does nothing' do
+        unchanging = stub :unchanging
+        @tokenizer.remove_illegals unchanging
+      end
+    end
+    context "with removes_characters called" do
+      before(:each) do
+        @tokenizer.removes_characters(/[afo]/)
+      end
+      it "has remove_illegals" do
+        lambda { @tokenizer.remove_illegals('abcdefghijklmnop') }.should_not raise_error
+      end
+      it "removes illegal characters" do
+        @tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
+      end
+    end
+  end
+  describe 'contracts_expressions' do
+    context 'without contract_expressions called' do
+      it 'should define a method contract' do
+        lambda { @tokenizer.contract('from this text') }.should_not raise_error
+      end
+      it 'should define a method contract that does nothing' do
+        unchanging = stub :unchanging
+        @tokenizer.contract unchanging
+      end
+    end
+    context 'with contracts_expressions called' do
+      before(:each) do
+        @tokenizer.contracts_expressions(/Mister|Mr./, 'mr')
+      end
+      it 'should define a method remove_stopwords' do
+        lambda { @tokenizer.contract('from this text') }.should_not raise_error
+      end
+      it 'should define a method contract that contracts expressions' do
+        @tokenizer.contract('Mister Meyer, Mr. Peter').should == 'mr Meyer, mr Peter'
+      end
+    end
+  end
+  describe 'stopwords' do
+    context 'without stopwords given' do
+      it 'should define a method remove_stopwords' do
+        lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
+      end
+      it 'should define a method remove_stopwords that does nothing' do
+        @tokenizer.remove_stopwords('from this text').should == 'from this text'
+      end
+      it 'should define a method remove_non_single_stopwords' do
+        lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
+      end
+    end
+    context 'with stopwords given' do
+      before(:each) do
+        @tokenizer.stopwords(/r|e/)
+      end
+      it 'should define a method remove_stopwords' do
+        lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
+      end
+      it 'should define a method stopwords that removes stopwords' do
+        @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
+      end
+      it 'should define a method remove_non_single_stopwords' do
+        lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
+      end
+      it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
+        @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
+      end
+      it 'should define a method remove_non_single_stopwords that does not single stopwords' do
+        @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
+      end
+    end
+    context 'error case' do
+      before(:each) do
+        @tokenizer.stopwords(/any/)
+      end
+      it 'should not remove non-single stopwords with a star' do
+        @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
+      end
+      it 'should not remove non-single stopwords with a tilde' do
+        @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
       end
     end
   end
 end

data/spec/lib/tokenizers/default/index_spec.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# encoding: utf-8
+#
+require 'spec_helper'
+describe Tokenizers::Default::Index do
+  it "is an instance of the index tokenizer" do
+    Tokenizers::Default::Index.should be_kind_of(Tokenizers::Index)
+  end
+end

data/spec/lib/tokenizers/default/query_spec.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# encoding: utf-8
+#
+require 'spec_helper'
+describe Tokenizers::Default::Query do
+  it "is an instance of the index tokenizer" do
+    Tokenizers::Default::Query.should be_kind_of(Tokenizers::Query)
+  end
+end

metadata CHANGED Viewed

@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
   prerelease: false
   segments:
   - 0
-  - 3
+  - 9
   - 0
-  version: 0.3.0
+  version: 0.9.0
 platform: ruby
 authors:
 - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-10-24 00:00:00 +02:00
+date: 2010-10-26 00:00:00 +02:00
 default_executable: picky
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -75,9 +75,10 @@ files:
 - lib/picky/helpers/gc.rb
 - lib/picky/helpers/measuring.rb
 - lib/picky/index/bundle.rb
+- lib/picky/index/bundle_checker.rb
 - lib/picky/index/category.rb
-- lib/picky/index/combined.rb
 - lib/picky/index/type.rb
+- lib/picky/index/wrappers/exact_first.rb
 - lib/picky/indexers/base.rb
 - lib/picky/indexers/default.rb
 - lib/picky/indexers/field.rb
@@ -111,8 +112,10 @@ files:
 - lib/picky/sources/base.rb
 - lib/picky/sources/csv.rb
 - lib/picky/sources/db.rb
+- lib/picky/sources/delicious.rb
 - lib/picky/tokenizers/base.rb
-- lib/picky/tokenizers/default.rb
+- lib/picky/tokenizers/default/index.rb
+- lib/picky/tokenizers/default/query.rb
 - lib/picky/tokenizers/index.rb
 - lib/picky/tokenizers/query.rb
 - lib/picky/umlaut_substituter.rb
@@ -145,6 +148,7 @@ files:
 - spec/ext/performant_spec.rb
 - spec/lib/application_spec.rb
 - spec/lib/cacher/partial/default_spec.rb
+- spec/lib/cacher/partial/none_spec.rb
 - spec/lib/cacher/partial/subtoken_spec.rb
 - spec/lib/cacher/partial_generator_spec.rb
 - spec/lib/cacher/similarity/double_levenshtone_spec.rb
@@ -153,6 +157,7 @@ files:
 - spec/lib/cacher/weights/logarithmic_spec.rb
 - spec/lib/cacher/weights_generator_spec.rb
 - spec/lib/configuration/field_spec.rb
+- spec/lib/configuration/indexes_spec.rb
 - spec/lib/configuration/type_spec.rb
 - spec/lib/cores_spec.rb
 - spec/lib/extensions/array_spec.rb
@@ -164,9 +169,12 @@ files:
 - spec/lib/helpers/cache_spec.rb
 - spec/lib/helpers/gc_spec.rb
 - spec/lib/helpers/measuring_spec.rb
+- spec/lib/index/bundle_checker_spec.rb
 - spec/lib/index/bundle_partial_generation_speed_spec.rb
 - spec/lib/index/bundle_spec.rb
 - spec/lib/index/category_spec.rb
+- spec/lib/index/type_spec.rb
+- spec/lib/index/wrappers/exact_first_spec.rb
 - spec/lib/indexers/base_spec.rb
 - spec/lib/indexers/field_spec.rb
 - spec/lib/loader_spec.rb
@@ -186,11 +194,15 @@ files:
 - spec/lib/query/weights_spec.rb
 - spec/lib/rack/harakiri_spec.rb
 - spec/lib/results/base_spec.rb
+- spec/lib/results/live_spec.rb
 - spec/lib/routing_spec.rb
 - spec/lib/solr/schema_generator_spec.rb
 - spec/lib/sources/csv_spec.rb
 - spec/lib/sources/db_spec.rb
+- spec/lib/sources/delicious_spec.rb
 - spec/lib/tokenizers/base_spec.rb
+- spec/lib/tokenizers/default/index_spec.rb
+- spec/lib/tokenizers/default/query_spec.rb
 - spec/lib/tokenizers/index_spec.rb
 - spec/lib/tokenizers/query_spec.rb
 - spec/lib/umlaut_substituter_spec.rb
@@ -232,6 +244,7 @@ test_files:
 - spec/ext/performant_spec.rb
 - spec/lib/application_spec.rb
 - spec/lib/cacher/partial/default_spec.rb
+- spec/lib/cacher/partial/none_spec.rb
 - spec/lib/cacher/partial/subtoken_spec.rb
 - spec/lib/cacher/partial_generator_spec.rb
 - spec/lib/cacher/similarity/double_levenshtone_spec.rb
@@ -240,6 +253,7 @@ test_files:
 - spec/lib/cacher/weights/logarithmic_spec.rb
 - spec/lib/cacher/weights_generator_spec.rb
 - spec/lib/configuration/field_spec.rb
+- spec/lib/configuration/indexes_spec.rb
 - spec/lib/configuration/type_spec.rb
 - spec/lib/cores_spec.rb
 - spec/lib/extensions/array_spec.rb
@@ -251,9 +265,12 @@ test_files:
 - spec/lib/helpers/cache_spec.rb
 - spec/lib/helpers/gc_spec.rb
 - spec/lib/helpers/measuring_spec.rb
+- spec/lib/index/bundle_checker_spec.rb
 - spec/lib/index/bundle_partial_generation_speed_spec.rb
 - spec/lib/index/bundle_spec.rb
 - spec/lib/index/category_spec.rb
+- spec/lib/index/type_spec.rb
+- spec/lib/index/wrappers/exact_first_spec.rb
 - spec/lib/indexers/base_spec.rb
 - spec/lib/indexers/field_spec.rb
 - spec/lib/loader_spec.rb
@@ -273,11 +290,15 @@ test_files:
 - spec/lib/query/weights_spec.rb
 - spec/lib/rack/harakiri_spec.rb
 - spec/lib/results/base_spec.rb
+- spec/lib/results/live_spec.rb
 - spec/lib/routing_spec.rb
 - spec/lib/solr/schema_generator_spec.rb
 - spec/lib/sources/csv_spec.rb
 - spec/lib/sources/db_spec.rb
+- spec/lib/sources/delicious_spec.rb
 - spec/lib/tokenizers/base_spec.rb
+- spec/lib/tokenizers/default/index_spec.rb
+- spec/lib/tokenizers/default/query_spec.rb
 - spec/lib/tokenizers/index_spec.rb
 - spec/lib/tokenizers/query_spec.rb
 - spec/lib/umlaut_substituter_spec.rb

data/lib/picky/index/combined.rb DELETED Viewed

@@ -1,45 +0,0 @@
-# encoding: utf-8
-#
-module Index
-  # This index combines an exact and partial index.
-  # It serves to order the results such that exact  hits are found first.
-  #
-  # TODO Need to use the right subtokens. Bake in?
-  #
-  # TODO One can use it as a wrapper, and it will extract the indexes itself. Rename: ExactFirst.
-  #
-  class Combined < Bundle
-    delegate :similar,
-             :identifier,
-             :name,
-             :to => :@exact
-    delegate :type,
-             :category,
-             :weight,
-             :generate_partial_from,
-             :generate_caches_from_memory,
-             :generate_derived,
-             :dump,
-             :load,
-             :to => :@partial
-    # TODO initialize type_or_category # => installs itself on all exact and partial
-    #
-    def initialize exact, partial
-      @exact   = exact
-      @partial = partial
-    end
-    def ids text
-      @exact.ids(text) + @partial.ids(text)
-    end
-    def weight text
-      [@exact.weight(text) || 0, @partial.weight(text) || 0].max
-    end
-  end
-end

data/lib/picky/tokenizers/default.rb DELETED Viewed

@@ -1,3 +0,0 @@
-module Tokenizers
-  Default = Index
-end