RubyGems - picky - Versions diffs - 4.18.0 → 4.19.0 - Mend

picky 4.18.0 → 4.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/ext/picky/picky.c +54 -19
data/lib/picky/loader.rb +2 -1
data/lib/picky/query/token.rb +18 -23
data/lib/picky/query/tokens.rb +5 -3
data/lib/picky/splitter.rb +27 -0
data/lib/picky/tokenizer/regexp_wrapper.rb +22 -13
data/lib/picky/tokenizer.rb +5 -3
data/spec/functional/custom_delimiters_spec.rb +4 -4
data/spec/functional/object_use_spec.rb +93 -0
data/spec/lib/query/token_spec.rb +4 -4
data/spec/lib/splitter_spec.rb +83 -0
data/spec/performant_spec.rb +7 -0
metadata +9 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5771adfbe24b649d5377b9d1233783ff5edb0c1e
-  data.tar.gz: 140aca46019f8b09496b508c9842a3ac84f2a933
+  metadata.gz: 37e52d1d18d9eec4ca1545d992ec9d6c220c24fa
+  data.tar.gz: 468ebcbaeab07fabbdd70b4693a665a98d6eaa52
 SHA512:
-  metadata.gz: 07d12c4437ba880e486d73626873f1e1fc16e26512a37d564a172cb23bf517ad2c1503d841d0bb3dcd8ef9653a2f776979a2467761a01c412df2acf5a6837547
-  data.tar.gz: bf488e2fac14e6d1f4a26a354a8579e5931b18779c3e115e92062008bc1535f0e6ebdcbb21bfa60758be8df5557c64779a420957ef4c3b106ace7afe12b6a384
+  metadata.gz: 69d292af9ab42ee928e34c94818c2437e255b25e09ec405e01ce9db30fc29350ab726b71db07dbc3ac1f5f1c2d9563cb362fcdf2770a3f4448cd3e569e796d4c
+  data.tar.gz: 2a0e9d7399da82e484a56ee8a16c088b6e627b35cbeabfe56f078aa4271b919f14ecd27f846272652e0ba0dc8b41a96a479fc892449814e58b5d05f38a98d7ac

data/ext/picky/picky.c CHANGED Viewed

@@ -33,9 +33,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
   // Vars.
   //
   VALUE rb_array_of_arrays;
-  VALUE smallest_array;
+  VALUE result_array;
   VALUE current_array;
   VALUE hash;
+  VALUE ary;
   // Temps.
   //
@@ -44,37 +45,71 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
   // Conversions & presorting.
   //
   rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
-  smallest_array     = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
+  // Assume the smallest array is the result already.
+  //
+  result_array = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
-  // Iterate through all arrays.
+  // Iterate through all other arrays.
   //
   for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
-    // Break if the smallest array is empty
+    // Break if the result array is empty.
+    // (Because intersecting anything with it will yield nothing)
     //
-    if (RARRAY_LEN(smallest_array) == 0) {
+    if (RARRAY_LEN(result_array) == 0) {
       break;
     }
-    // Make a hash from the currently smallest version.
+    // If the result array is currently larger than 10
+    // entries, use a hash for intersection, else
+    // use an array.
     //
-    hash = ary_make_hash(smallest_array, 0);
+    if (RARRAY_LEN(result_array) > 10) {
+      // Make a hash from the currently smallest version.
+      //
+      hash = ary_make_hash(result_array, 0);
-    // Clear for use as temp array.
-    //
-    rb_ary_clear(smallest_array);
+      // Clear for use as temp array.
+      //
+      rb_ary_clear(result_array);
-    // Iterate through all array elements.
-    //
-    current_array = rb_ary_entry(rb_array_of_arrays, i);
-    for (j = 0; j < RARRAY_LEN(current_array); j++) {
-      v = rb_ary_entry(current_array, j);
-      if (rb_hash_delete(hash, v) != Qnil) {
-        rb_ary_push(smallest_array, v);
+      // Get the current array.
+      //
+      current_array = rb_ary_entry(rb_array_of_arrays, i);
+      // Iterate through all array elements.
+      //
+      for (j = 0; j < RARRAY_LEN(current_array); j++) {
+        v = rb_ary_entry(current_array, j);
+        if (rb_hash_delete(hash, v) != Qnil) {
+          rb_ary_push(result_array, v);
+        }
+      }
+    } else {
+      // Make a new array from the currently smallest version.
+      //
+      ary = rb_ary_dup(result_array);
+      // Clear for use as temp array.
+      //
+      rb_ary_clear(result_array);
+      // Get the current array.
+      //
+      current_array = rb_ary_entry(rb_array_of_arrays, i);
+      // Iterate through all array elements.
+      //
+      for (j = 0; j < RARRAY_LEN(current_array); j++) {
+        v = rb_ary_entry(current_array, j);
+        if (rb_ary_delete(ary, v) != Qnil) {
+          rb_ary_push(result_array, v);
+        }
       }
     }
   }
-  return smallest_array;
+  return result_array;
 }
 VALUE p_mPerformant, p_cArray;

data/lib/picky/loader.rb CHANGED Viewed

@@ -68,7 +68,8 @@ module Picky
       def load_helpers
         load_relative 'helpers/measuring',
                       'helpers/indexing',
-                      'helpers/identification'
+                      'helpers/identification',
+                      'splitter'
       end
       def load_index_generation_strategies
         load_relative 'indexers/base',

data/lib/picky/query/token.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module Picky
     # or whether it is a partial (bla*).
     #
     class Token
       attr_reader :text, :original
       attr_writer :similar
       attr_writer :predefined_categories
@@ -237,6 +237,8 @@ module Picky
         @text.gsub! @@illegals, EMPTY_STRING unless @text == EMPTY_STRING
       end
       def self.redefine_illegals
+        # TODO Double no similar and no partial, both ".
+        #
         @@illegals = %r{[#@@no_similar_character#@@similar_character#@@no_partial_character#@@partial_character]}
       end
       redefine_illegals
@@ -273,49 +275,42 @@ module Picky
       # Splits text into a qualifier and text.
       #
-      @@qualifier_text_delimiter = ':'
-      @@qualifiers_delimiter     = ','
+      @@qualifier_text_delimiter = /:/
+      @@qualifiers_delimiter     = /,/
+      @@qualifier_text_splitter  = Splitter.new @@qualifier_text_delimiter
+      @@qualifiers_splitter      = Splitter.new @@qualifiers_delimiter
       def qualify
-        # TODO Is this actually an optimization?
-        # Check using include? + split, and split alone.
-        #
-        if @text.include? @@qualifier_text_delimiter
-          @qualifiers, @text = @text.split @@qualifier_text_delimiter, 2
-          if @text
-            @qualifiers = @qualifiers.split @@qualifiers_delimiter
-          else
-            @text, @qualifiers = @qualifiers, nil
-          end
+        @qualifiers, @text = @@qualifier_text_splitter.single @text
+        if @qualifiers
+          @qualifiers = @@qualifiers_splitter.multi @qualifiers
         end
       end
-      # Define a character which separates the qualifier
+      # Define a regexp which separates the qualifier
       # from the search text.
       #
-      # Default is ':'.
-      #
-      # This is used in a String#split.
+      # Default is /:/.
       #
       # Example:
-      #   Picky::Query::Token.qualifier_text_delimiter = '?'
+      #   Picky::Query::Token.qualifier_text_delimiter = /\?/
       #   try.search("text1?hello text2?world").ids.should == [1]
       #
       def self.qualifier_text_delimiter= character
         @@qualifier_text_delimiter = character
+        @@qualifier_text_splitter  = Splitter.new @@qualifier_text_delimiter
       end
-      # Define a character which separates the qualifiers
+      # Define a regexp which separates the qualifiers
       # (before the search text).
       #
-      # Default is ','.
-      #
-      # This is used in a String#split.
+      # Default is /,/.
       #
       # Example:
-      #   Picky::Query::Token.qualifiers_delimiter = '|'
+      #   Picky::Query::Token.qualifiers_delimiter = /|/
       #   try.search("text1|text2:hello").ids.should == [1]
       #
       def self.qualifiers_delimiter= character
         @@qualifiers_delimiter = character
+        @@qualifiers_splitter  = Splitter.new @@qualifiers_delimiter
       end
       # Returns the qualifiers as an array.

data/lib/picky/query/tokens.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Picky
     # This class primarily handles switching through similar token constellations.
     #
     class Tokens
       attr_reader :tokens, :ignore_unassigned
       # Basically forwards to its internal tokens array.
@@ -25,11 +25,13 @@ module Picky
       # Creates a new Tokens object from a number of Strings.
       #
+      @@or_splitting_pattern = /\|/
+      @@splitter = Splitter.new @@or_splitting_pattern
       def self.processed words, originals, ignore_unassigned = false
         new(words.zip(originals).collect! do |word, original|
-          w, *middle, rest  = word.split(/\|/)
+          w, *middle, rest = @@splitter.multi word
           if rest
-            Or.new processed [w, *middle, rest], original.split(/\|/)
+            Or.new processed [w, *middle, rest], original.split(@@or_splitting_pattern)
           else
             Token.processed w, original
           end

data/lib/picky/splitter.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module Picky
+  class Splitter < StringScanner
+    def initialize delimiter
+      @delimiter = delimiter
+      super ''
+    end
+    def single text
+      self.string = text
+      skip_until @delimiter
+      [pre_match, post_match || string]
+    end
+    def multi text
+      self.string = text
+      if exist? @delimiter
+        text.split @delimiter
+      else
+        [text]
+      end
+    end
+  end
+end

data/lib/picky/tokenizer/regexp_wrapper.rb CHANGED Viewed

@@ -1,19 +1,28 @@
-class RegexpWrapper
+module Picky
-  def initialize regexp
-    @regexp = regexp
-  end
+  class Tokenizer
+    class RegexpWrapper
+      def initialize regexp
+        @regexp = regexp
+        @splitter = Splitter.new @regexp
+      end
-  def split text
-    text.split @regexp
-  end
+      def split text
+        @splitter.multi text
+      end
-  def source
-    @regexp.source
-  end
+      def source
+        @regexp.source
+      end
-  def method_missing name, *args, &block
-    @regexp.send name, *args, &block
-  end
+      def method_missing name, *args, &block
+        @regexp.send name, *args, &block
+      end
+    end
+  end
 end

data/lib/picky/tokenizer.rb CHANGED Viewed

@@ -98,7 +98,7 @@ Case sensitive?     #{@case_sensitive ? "Yes." : "-"}
     # Note: We do not test against to_str since symbols do not work with String#split.
     #
     def splits_text_on thing
-      raise ArgumentError.new "#{__method__} takes a Regexp or String or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
+      raise ArgumentError.new "#{__method__} takes a Regexp or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
       @splits_text_on = if thing.respond_to? :split
         thing
       else
@@ -106,6 +106,8 @@ Case sensitive?     #{@case_sensitive ? "Yes." : "-"}
       end
     end
     def split text
+      # Does not create a new string if nothing is split.
+      #
       @splits_text_on.split text
     end
@@ -233,9 +235,9 @@ ERROR
     #  [[:token1, :token2], ["Original1", "Original2"]]
     #
     def tokenize text
-      text   = preprocess text.to_s # processing the text
+      text = preprocess text.to_s # processing the text
       return empty_tokens if text.empty? # TODO blank?
-      words  = pretokenize text # splitting and preparations for tokenizing
+      words = pretokenize text # splitting and preparations for tokenizing
       return empty_tokens if words.empty?
       tokens = tokens_for words # creating tokens / strings
       [tokens, words]

data/spec/functional/custom_delimiters_spec.rb CHANGED Viewed

@@ -10,8 +10,8 @@ describe 'custom delimiters' do
     Picky::Query::Token.similar_character = '~'
     Picky::Query::Token.no_similar_character = '"'
     Picky::Query::Token.range_character = '-'
-    Picky::Query::Token.qualifier_text_delimiter = ':'
-    Picky::Query::Token.qualifiers_delimiter = ','
+    Picky::Query::Token.qualifier_text_delimiter = /:/
+    Picky::Query::Token.qualifiers_delimiter = /,/
   end
   context 'offers custom partial delimiters to be set' do
@@ -80,11 +80,11 @@ describe 'custom delimiters' do
     try.search("text1:hello text2:world").ids.should == [1]
     try.search("text1?hello text2?world").ids.should == []
-    Picky::Query::Token.qualifier_text_delimiter = '?'
+    Picky::Query::Token.qualifier_text_delimiter = /\?/
     try.search("text1?hello text2?world").ids.should == [1]
     try.search("text1!text2?hello text2?world").ids.should == []
-    Picky::Query::Token.qualifiers_delimiter = '!'
+    Picky::Query::Token.qualifiers_delimiter = /!/
     try.search("text1!text2?hello text2?world").ids.should == [1]
   end

data/spec/functional/object_use_spec.rb ADDED Viewed

@@ -0,0 +1,93 @@
+# encoding: utf-8
+#
+require 'spec_helper'
+describe "Object Use" do
+  it 'is not too high' do
+    index = Picky::Index.new :object_use do
+      category :text1
+      category :text2
+      category :text3
+      category :text4
+    end
+    try = Picky::Search.new index
+    thing = Struct.new(:id, :text1, :text2, :text3, :text4)
+    index.add thing.new(1, 'one', 'two', 'three', 'four')
+    # Pre-run.
+    #
+    try.search 'one'
+    try.search 'one two three'
+    try.search 'text1:one'
+    try.search 'text1:one text2:two text3:three'
+    # Actual tests.
+    #
+    s = 'one'
+    result = mark do
+      try.search s
+    end
+    result.should == {} # No new strings since nothing is split.
+    s = 'one two three'
+    result = mark do
+      try.search s
+    end
+    result.should == {
+      "three" => 1,
+      "two" => 1,
+      "one" => 1,
+      'one two three' => 2 # TODO Is GC'd.
+    }
+    result = mark do
+      try.search 'text1:one'
+    end
+    result.should == {
+      "one" => 1,
+      "text1" => 1,
+      "text1:one" => 1
+    } # Only the necessary split strings.
+    s = 'text1:one text2:two text3:three'
+    result = mark do
+      try.search s
+    end
+    result.should == {
+      "three" => 1,
+      "two" => 1,
+      "one" => 1,
+      "text3" => 1,
+      "text2" => 1,
+      "text1" => 1,
+      "text3:three" => 1,
+      "text2:two" => 1,
+      "text1:one" => 1
+    } # Only the necessary split strings.
+    s = 'text1:one text2:two text3,text4:three'
+    result = mark do
+      try.search s
+    end
+    result.should == {
+      "three" => 1,
+      "two" => 1,
+      "one" => 1,
+      "text3,text4" => 2, # TODO
+      "text3" => 1,
+      "text4" => 1,
+      "text2" => 1,
+      "text1" => 1,
+      "text1:one" => 1,
+      "text2:two" => 1,
+      "text3,text4:three" => 1
+    }
+  end
+end

data/spec/lib/query/token_spec.rb CHANGED Viewed

@@ -151,19 +151,19 @@ describe Picky::Query::Token do
     it_should_qualify 'with:qualifier',    [['with'],      'qualifier']
     it_should_qualify 'without qualifier', [nil,           'without qualifier']
     it_should_qualify 'name:',             [['name'],      '']
-    it_should_qualify ':broken qualifier', [[],            'broken qualifier'] # Unsure about that. Probably should recognize it as text.
+    it_should_qualify ':broken qualifier', [[''],          'broken qualifier'] # Unsure about that. Probably should recognize it as text.
     it_should_qualify '',                  [nil,           '']
     it_should_qualify 'sp:text',           [['sp'],        'text']
     it_should_qualify '""',                [nil,           '""']
     it_should_qualify 'name:',             [['name'],      '']
     it_should_qualify 'name:hanke',        [['name'],      'hanke']
     it_should_qualify 'g:gaga',            [['g'],         'gaga']
-    it_should_qualify ':nothing',          [[],            'nothing']
+    it_should_qualify ':nothing',          [[''],          'nothing']
     it_should_qualify 'hello',             [nil,           'hello']
     it_should_qualify 'a:b:c',             [['a'],         'b:c']
     it_should_qualify 'a,b:c',             [['a','b'],     'c']
     it_should_qualify 'a,b,c:d',           [['a','b','c'], 'd']
-    it_should_qualify ':',                 [[],           '']
+    it_should_qualify ':',                 [[''],          '']
     it_should_qualify 'vorname:qualifier', [['vorname'],   'qualifier']
   end
@@ -429,7 +429,7 @@ describe Picky::Query::Token do
     context 'with missing qualifier' do
       let(:token) { described_class.processed ':missingqualifier' }
       it 'is correct' do
-        token.qualifiers.should == []
+        token.qualifiers.should == ['']
         token.text.should == 'missingqualifier'
       end
     end

data/spec/lib/splitter_spec.rb ADDED Viewed

@@ -0,0 +1,83 @@
+require 'spec_helper'
+describe Picky::Splitter do
+  describe "single" do
+    let(:splitter) { described_class.new /:/ }
+    it "splits right" do
+      splitter.single(':b').should == ['','b']
+    end
+    it "splits right" do
+      splitter.single('a:b').should == ['a','b']
+    end
+    it "splits right" do
+      splitter.single('a').should == [nil, 'a']
+    end
+    it "splits right" do
+      splitter.single('a:b c:d').should == ['a', 'b c:d']
+    end
+    it "returns the same string if not split" do
+      s = 'a'
+      splitter.single(s)[1].object_id.should == s.object_id
+    end
+  end
+  describe "multi" do
+    let(:splitter) { described_class.new /\s/ }
+    it "splits right" do
+      splitter.multi(' b').should == ['', 'b']
+    end
+    it "splits right" do
+      splitter.multi('a b').should == ['a', 'b']
+    end
+    it "splits right" do
+      splitter.multi('a b c d').should == ['a', 'b', 'c', 'd']
+    end
+    it "splits right" do
+      splitter.multi('a').should == ['a']
+    end
+    it "returns the same string if not split" do
+      s = 'a'
+      splitter.multi(s).first.object_id.should == s.object_id
+    end
+    # it 'is faster than split' do
+    #   pattern = /\s/
+    #   amount = 1000
+    #   text = 'abcd'
+    #   split = performance_of do
+    #     amount.times { text.split pattern }
+    #   end
+    #   multi = performance_of do
+    #     amount.times { splitter.multi text, pattern }
+    #   end
+    #   split.should < multi
+    # end
+    # it 'is slower than split (but uses less memory in the non-split case)' do
+    #   pattern = /\s/
+    #   amount = 1000
+    #   text = 'a b'
+    #   multi = performance_of do
+    #     amount.times { splitter.multi text, pattern }
+    #   end
+    #   split = performance_of do
+    #     amount.times { text.split pattern }
+    #   end
+    #   # p split
+    #   # p multi
+    # end
+    # it 'is slower than split (but uses less memory in the non-split case)' do
+    #   pattern = /\s/
+    #   amount = 1000
+    #   text = 'a b c d'
+    #   multi = performance_of do
+    #     amount.times { splitter.multi text, pattern }
+    #   end
+    #   split = performance_of do
+    #     amount.times { text.split pattern }
+    #   end
+    #   # p split
+    #   # p multi
+    # end
+  end
+end

data/spec/performant_spec.rb CHANGED Viewed

@@ -122,6 +122,13 @@ describe Performant::Array do
       #
       performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
     end
+    it "should be optimal for many small arrays of length == 10" do
+      arys = [('1'..'10').to_a, ('10'..'20').to_a, ['10'] + ('10000'..'20000').to_a]
+      # Brute force - note that it is slower than the Symbols/Integers version.
+      #
+      performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
+    end
     it "should be optimal for 2 small arrays of 50/10_000" do
       arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: picky
 version: !ruby/object:Gem::Version
-  version: 4.18.0
+  version: 4.19.0
 platform: ruby
 authors:
 - Florian Hanke
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-08-16 00:00:00.000000000 Z
+date: 2013-08-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -44,14 +44,14 @@ dependencies:
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 4.18.0
+        version: 4.19.0
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 4.18.0
+        version: 4.19.0
 - !ruby/object:Gem::Dependency
   name: text
   requirement: !ruby/object:Gem::Requirement
@@ -243,6 +243,7 @@ files:
 - lib/picky/sinatra/index_actions.rb
 - lib/picky/sinatra.rb
 - lib/picky/source.rb
+- lib/picky/splitter.rb
 - lib/picky/splitters/automatic.rb
 - lib/picky/statistics.rb
 - lib/picky/tasks.rb
@@ -289,6 +290,7 @@ files:
 - spec/functional/multi_index_qualifier_spec.rb
 - spec/functional/no_tokenize_spec.rb
 - spec/functional/non_specific_ids_larger_than_20_spec.rb
+- spec/functional/object_use_spec.rb
 - spec/functional/only_spec.rb
 - spec/functional/or_spec.rb
 - spec/functional/pool_spec.rb
@@ -406,6 +408,7 @@ files:
 - spec/lib/sinatra_spec.rb
 - spec/lib/solr/schema_generator_spec.rb
 - spec/lib/source_spec.rb
+- spec/lib/splitter_spec.rb
 - spec/lib/statistics_spec.rb
 - spec/lib/tasks/try_spec.rb
 - spec/lib/tokenizer_spec.rb
@@ -463,6 +466,7 @@ test_files:
 - spec/functional/multi_index_qualifier_spec.rb
 - spec/functional/no_tokenize_spec.rb
 - spec/functional/non_specific_ids_larger_than_20_spec.rb
+- spec/functional/object_use_spec.rb
 - spec/functional/only_spec.rb
 - spec/functional/or_spec.rb
 - spec/functional/pool_spec.rb
@@ -580,6 +584,7 @@ test_files:
 - spec/lib/sinatra_spec.rb
 - spec/lib/solr/schema_generator_spec.rb
 - spec/lib/source_spec.rb
+- spec/lib/splitter_spec.rb
 - spec/lib/statistics_spec.rb
 - spec/lib/tasks/try_spec.rb
 - spec/lib/tokenizer_spec.rb