RubyGems - picky - Versions diffs - 4.11.3 → 4.12.0 - Mend

picky 4.11.3 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

data/lib/performant.c +10 -22
data/lib/picky/loader.rb +1 -0
data/lib/picky/splitters/automatic.rb +82 -0
data/lib/picky/tokenizer.rb +17 -8
data/spec/functional/automatic_segmentation_spec.rb +98 -0
data/spec/lib/backends/file/basic_spec.rb +3 -3
data/spec/lib/backends/file_spec.rb +2 -2
data/spec/lib/backends/memory/basic_spec.rb +3 -3
data/spec/lib/backends/memory/json_spec.rb +2 -2
data/spec/lib/backends/memory/marshal_spec.rb +2 -2
data/spec/lib/backends/memory_spec.rb +3 -3
data/spec/lib/backends/sqlite/array_spec.rb +4 -4
data/spec/lib/backends/sqlite/value_spec.rb +2 -2
data/spec/lib/backends/sqlite_spec.rb +3 -3
data/spec/lib/bundle_indexed_spec.rb +4 -4
data/spec/lib/bundle_spec.rb +2 -2
data/spec/{category_realtime_spec.rb → lib/category_realtime_spec.rb} +0 -0
data/spec/lib/category_spec.rb +1 -1
data/spec/lib/index_spec.rb +1 -1
data/spec/lib/loader_spec.rb +1 -1
data/spec/{ext/performant_spec.rb → performant_spec.rb} +47 -2
metadata +11 -8

data/lib/performant.c CHANGED Viewed

@@ -1,17 +1,5 @@
 #include "ruby.h"
-// Copying internal ruby methods.
-//
-static inline VALUE rb_ary_elt(ary, offset)
-    VALUE ary;
-    long offset;
-{
-    if (RARRAY_LEN(ary) == 0) return Qnil;
-    if (offset < 0 || RARRAY_LEN(ary) <= offset) {
-        return Qnil;
-    }
-    return RARRAY_PTR(ary)[offset];
-}
 static inline VALUE ary_make_hash(ary1, ary2)
     VALUE ary1, ary2;
 {
@@ -19,11 +7,11 @@ static inline VALUE ary_make_hash(ary1, ary2)
     long i;
     for (i=0; i<RARRAY_LEN(ary1); i++) {
-        rb_hash_aset(hash, RARRAY_PTR(ary1)[i], Qtrue);
+        rb_hash_aset(hash, rb_ary_entry(ary1,i), Qtrue);
     }
     if (ary2) {
         for (i=0; i<RARRAY_LEN(ary2); i++) {
-            rb_hash_aset(hash, RARRAY_PTR(ary2)[i], Qtrue);
+            rb_hash_aset(hash, rb_ary_entry(ary2, i), Qtrue);
         }
     }
     return hash;
@@ -44,19 +32,19 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
   // Vars.
   //
-  struct RArray *rb_array_of_arrays;
+  VALUE rb_array_of_arrays;
   VALUE smallest_array;
   VALUE current_array;
   VALUE hash;
   // Temps.
   //
-  VALUE v, vv;
+  VALUE v;
   // Conversions & presorting.
   //
-  rb_array_of_arrays = (struct RArray*) rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
-  smallest_array     = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
+  rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
+  smallest_array     = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
   // Iterate through all arrays.
   //
@@ -77,10 +65,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
     // Iterate through all array elements.
     //
-    current_array = RARRAY_PTR(rb_array_of_arrays)[i];
+    current_array = rb_ary_entry(rb_array_of_arrays, i);
     for (j = 0; j < RARRAY_LEN(current_array); j++) {
-      v = vv = rb_ary_elt(current_array, j);
-      if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
+      v = rb_ary_entry(current_array, j);
+      if (rb_hash_delete(hash, v) != Qnil) {
         rb_ary_push(smallest_array, v);
       }
     }
@@ -95,4 +83,4 @@ void Init_performant() {
   p_mPerformant = rb_define_module("Performant");
   p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
   rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
-}
+}

data/lib/picky/loader.rb CHANGED Viewed

@@ -268,6 +268,7 @@ module Picky
         load_relative 'tokenizer'
         # load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
         load_relative 'character_substituters/west_european'
+        load_relative 'splitters/automatic'
         load_generators
         load_inner_api
         load_results

data/lib/picky/splitters/automatic.rb ADDED Viewed

@@ -0,0 +1,82 @@
+module Picky
+  module Splitters
+    # Automatic Splitter.
+    #
+    # Use as a splitter for the splits_text_on option
+    # for Searches. You need to give it an index category
+    # to use for the splitting.
+    #
+    # Example:
+    #   Picky::Search.new index do
+    #     searching splits_text_on: Picky::Splitters::Automatic.new(index[:name])
+    #   end
+    #
+    # Will split most queries correctly.
+    # However, has the following problems:
+    #   * "cannot" is usually split as ['can', 'not']
+    #   * "rainbow" is usually split as ['rain', 'bow']
+    #
+    # Reference: http://norvig.com/ngrams/ch14.pdf.
+    #
+    # Adapted from a script submitted
+    # by Andy Kitchen.
+    #
+    class Automatic
+      def initialize category, options = {}
+        @exact        = category.exact
+        @partial      = category.partial
+        @with_partial = options[:partial]
+        reset_memoization
+      end
+      # Reset the memoization.
+      #
+      def reset_memoization
+        @exact_memo = {}
+        @partial_memo = {}
+      end
+      # Split the given text into its most
+      # likely constituents.
+      #
+      def split text
+        segment(text, @with_partial).first
+      end
+      # Return all splits of a given string.
+      #
+      def splits text
+        l = text.length
+        (0..l-1).map do |x|
+          [text.slice(0,x), text.slice(x,l)]
+        end
+      end
+      # Segments the given text recursively.
+      #
+      def segment text, use_partial = false
+        (use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
+          tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
+          segments, head_weight = segment head
+          weight = (head_weight && tail_weight &&
+                   (head_weight + tail_weight) ||
+                   tail_weight || head_weight)
+          if (weight || -1) > (heaviest || 0)
+            [tail_weight ? segments + [tail] : segments, weight]
+          else
+            [current, heaviest]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/picky/tokenizer.rb CHANGED Viewed

@@ -99,16 +99,25 @@ Case sensitive?     #{@case_sensitive ? "Yes." : "-"}
     #
     def splits_text_on thing
       raise ArgumentError.new "#{__method__} takes a Regexp or String or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
-      @splits_text_on = thing
-      if thing.respond_to? :split
-        def split text
-          @splits_text_on.split text
-        end
+      @splits_text_on = if thing.respond_to? :split
+        thing
       else
-        def split text
-          text.split @splits_text_on
-        end
+        RegexpWrapper.new thing
+      end
+    end
+    class RegexpWrapper
+      def initialize regexp
+        @regexp = regexp
+      end
+      def split text
+        text.split @regexp
       end
+      def source
+        @regexp.source
+      end
+    end
+    def split text
+      @splits_text_on.split text
     end
     # Normalizing.

data/spec/functional/automatic_segmentation_spec.rb ADDED Viewed

@@ -0,0 +1,98 @@
+# encoding: utf-8
+#
+require 'spec_helper'
+describe "automatic splitting" do
+  let(:index) do
+    index = Picky::Index.new :automatic_text_splitting do
+      indexing removes_characters: /[^a-z\s]/i,
+               stopwords: /\b(in|a)\b/
+      category :text
+    end
+    require 'ostruct'
+    index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
+    index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
+    index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
+    index.add OpenStruct.new(id: 4, text: 'The color purple.')
+    index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
+    index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
+    index
+  end
+  it 'can split the text automatically' do
+    automatic_splitter = Picky::Splitters::Automatic.new index[:text]
+    # It splits the text correctly.
+    #
+    automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
+    automatic_splitter.split('purplerain').should == ['purple', 'rain']
+    automatic_splitter.split('purple').should == ['purple']
+    # When it can't, it splits it using the partial index (correctly).
+    #
+    automatic_splitter.split('purplerainbo').should == ['purple', 'rain']
+    automatic_splitter.split('purplerainb').should  == ['purple', 'rain']
+    #
+    automatic_splitter.split('purplerai').should == ['purple']
+    automatic_splitter.split('purplera').should  == ['purple']
+    automatic_splitter.split('purpler').should   == ['purple']
+    #
+    automatic_splitter.split('purpl').should == []
+    automatic_splitter.split('purp').should  == []
+    automatic_splitter.split('pur').should   == []
+    automatic_splitter.split('pu').should    == []
+    automatic_splitter.split('p').should     == []
+  end
+  it 'can split text automatically (with partial)' do
+    automatic_splitter = Picky::Splitters::Automatic.new index[:text], partial: true
+    # It splits the text correctly.
+    #
+    automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
+    automatic_splitter.split('purplerain').should == ['purple', 'rain']
+    automatic_splitter.split('purple').should == ['purple']
+    # When it can't, it splits it using the partial index (correctly).
+    #
+    automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo']
+    automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b']
+    #
+    automatic_splitter.split('purplerai').should == ['purple', 'rai']
+    automatic_splitter.split('purplera').should == ['purple', 'ra']
+    automatic_splitter.split('purpler').should == ['purple'] # No 'r' in partial index.
+    #
+    automatic_splitter.split('purpl').should == ['purpl']
+    automatic_splitter.split('purp').should == ['purp']
+    automatic_splitter.split('pur').should == [] # No 'pur' in partial index etc.
+    automatic_splitter.split('pu').should == []
+    automatic_splitter.split('p').should == []
+    try = Picky::Search.new index do
+      searching splits_text_on: automatic_splitter
+    end
+    # Should find the one with all parts.
+    #
+    try.search('purplerainbow').ids.should == [1]
+    try.search('sunandrain').ids.should == [5]
+    # Common parts are found in multiple examples.
+    #
+    try.search('colorpurple').ids.should == [4,1]
+    try.search('bownew').ids.should      == [3,1]
+    try.search('spainisking').ids.should == [6,1]
+  end
+  it 'is fast enough' do
+    automatic_splitter = Picky::Splitters::Automatic.new index[:text]
+    performance_of do
+      automatic_splitter.split('purplerainbow')
+    end.should < 0.0002
+  end
+end

data/spec/lib/backends/file/basic_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Picky::Backends::File::Basic do
   context 'without options' do
-    let(:basic) { described_class.new 'some/cache/path/to/file' }
+    let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
     describe 'empty' do
       it 'returns the container that is used for indexing' do
@@ -19,14 +19,14 @@ describe Picky::Backends::File::Basic do
     describe 'to_s' do
       it 'returns the cache path with the default file extension' do
-        basic.to_s.should == 'Picky::Backends::File::Basic(some/cache/path/to/file.file.index,some/cache/path/to/file.file_mapping.index.memory.json)'
+        basic.to_s.should == 'Picky::Backends::File::Basic(spec/temp/some/cache/path/to/file.file.index,spec/temp/some/cache/path/to/file.file_mapping.index.memory.json)'
       end
     end
   end
   context 'with options' do
     let(:basic) do
-      described_class.new 'some/cache/path/to/file',
+      described_class.new 'spec/temp/some/cache/path/to/file',
                           empty: [],
                           initial: []
     end

data/spec/lib/backends/file_spec.rb CHANGED Viewed

@@ -21,7 +21,7 @@ describe Picky::Backends::File do
   #     ].each do |type, kind|
   #       it "creates and returns a(n) #{type} index" do
   #         @backend.send(:"create_#{type}",
-  #                       stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
+  #                       stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
   #         ).should be_kind_of(kind)
   #       end
   #     end
@@ -44,7 +44,7 @@ describe Picky::Backends::File do
       ].each do |type, kind|
         it "creates and returns a(n) #{type} index" do
           @backend.send(:"create_#{type}",
-                        stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
+                        stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
           ).should be_kind_of(kind)
         end
       end

data/spec/lib/backends/memory/basic_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Picky::Backends::Memory::Basic do
   context 'without options' do
-    let(:basic) { described_class.new 'some/cache/path/to/file' }
+    let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
     describe 'empty' do
       it 'returns the container that is used for indexing' do
@@ -19,14 +19,14 @@ describe Picky::Backends::Memory::Basic do
     describe 'to_s' do
       it 'returns the cache path with the default file extension' do
-        basic.to_s.should == 'Picky::Backends::Memory::Basic(some/cache/path/to/file.memory.index)'
+        basic.to_s.should == 'Picky::Backends::Memory::Basic(spec/temp/some/cache/path/to/file.memory.index)'
       end
     end
   end
   context 'with options' do
     let(:basic) do
-      described_class.new 'some/cache/path/to/file',
+      described_class.new 'spec/temp/some/cache/path/to/file',
                           empty: [],
                           initial: []
     end

data/spec/lib/backends/memory/json_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Picky::Backends::Memory::JSON do
   context 'hash-based indexes' do
-    let(:json) { described_class.new 'some/cache/path/to/file' }
+    let(:json) { described_class.new 'spec/temp/some/cache/path/to/file' }
     describe 'extension' do
       it 'is correct' do
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::JSON do
     describe 'to_s' do
       it 'returns the cache path with the default file extension' do
-        json.to_s.should == 'Picky::Backends::Memory::JSON(some/cache/path/to/file.memory.json)'
+        json.to_s.should == 'Picky::Backends::Memory::JSON(spec/temp/some/cache/path/to/file.memory.json)'
       end
     end
   end

data/spec/lib/backends/memory/marshal_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Picky::Backends::Memory::Marshal do
   context 'hash-based indexes' do
-    let(:marshal) { described_class.new 'some/cache/path/to/file' }
+    let(:marshal) { described_class.new 'spec/temp/some/cache/path/to/file' }
     describe 'extension' do
       it 'is correct' do
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::Marshal do
     describe 'to_s' do
       it 'returns the cache path with the default file extension' do
-        marshal.to_s.should == 'Picky::Backends::Memory::Marshal(some/cache/path/to/file.memory.dump)'
+        marshal.to_s.should == 'Picky::Backends::Memory::Marshal(spec/temp/some/cache/path/to/file.memory.dump)'
       end
     end
   end

data/spec/lib/backends/memory_spec.rb CHANGED Viewed

@@ -21,7 +21,7 @@ describe Picky::Backends::Memory do
   #     ].each do |type, kind|
   #       it "creates and returns a(n) #{type} index" do
   #         @backend.send(:"create_#{type}",
-  #                       stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
+  #                       stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
   #         ).should be_kind_of(kind)
   #       end
   #     end
@@ -47,7 +47,7 @@ describe Picky::Backends::Memory do
   #     ].each do |type, kind|
   #       it "creates and returns a(n) #{type} index" do
   #         to_a_able_stub = Object.new
-  #         to_a_able_stub.stub! :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}"
+  #         to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
   #         @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
   #       end
   #     end
@@ -70,7 +70,7 @@ describe Picky::Backends::Memory do
       ].each do |type, kind|
         it "creates and returns a(n) #{type} index" do
           @backend.send(:"create_#{type}",
-                        stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
+                        stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
           ).should be_kind_of(kind)
         end
       end

data/spec/lib/backends/sqlite/array_spec.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'sqlite3'
 describe Picky::Backends::SQLite::Array do
   context 'hash-based indexes' do
-    let(:db) { described_class.new 'some/cache/path/to/file' }
+    let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
     describe 'dump' do
       it 'delegates to the given hash' do
@@ -67,13 +67,13 @@ describe Picky::Backends::SQLite::Array do
     describe 'to_s' do
       it 'returns the cache path with the default file extension' do
-        db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
+        db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
       end
     end
   end
   context 'hash-based indexes' do
-    let(:db) { described_class.new 'some/cache/path/to/file', realtime: true }
+    let(:db) { described_class.new 'spec/temp/some/cache/path/to/file', realtime: true }
     describe 'dump' do
       it 'delegates to the given hash' do
@@ -135,7 +135,7 @@ describe Picky::Backends::SQLite::Array do
     describe 'to_s' do
       it 'returns the cache path with the default file extension' do
-        db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
+        db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
       end
     end
   end

data/spec/lib/backends/sqlite/value_spec.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'sqlite3'
 describe Picky::Backends::SQLite::Value do
   context 'hash-based indexes' do
-    let(:db) { described_class.new 'some/cache/path/to/file' }
+    let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
     describe 'dump' do
       it 'delegates to the given hash' do
@@ -67,7 +67,7 @@ describe Picky::Backends::SQLite::Value do
     describe 'to_s' do
       it 'returns the cache path with the default file extension' do
-        db.to_s.should == 'Picky::Backends::SQLite::Value(some/cache/path/to/file.sqlite3)'
+        db.to_s.should == 'Picky::Backends::SQLite::Value(spec/temp/some/cache/path/to/file.sqlite3)'
       end
     end
   end

data/spec/lib/backends/sqlite_spec.rb CHANGED Viewed

@@ -23,7 +23,7 @@ describe Picky::Backends::SQLite do
   #     ].each do |type, kind|
   #       it "creates and returns a(n) #{type} index" do
   #         @backend.send(:"create_#{type}",
-  #                       stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
+  #                       stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
   #         ).should be_kind_of(kind)
   #       end
   #     end
@@ -49,7 +49,7 @@ describe Picky::Backends::SQLite do
   #     ].each do |type, kind|
   #       it "creates and returns a(n) #{type} index" do
   #         to_a_able_stub = Object.new
-  #         to_a_able_stub.stub! :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}"
+  #         to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
   #         @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
   #       end
   #     end
@@ -72,7 +72,7 @@ describe Picky::Backends::SQLite do
       ].each do |type, kind|
         it "creates and returns a(n) #{type} index" do
           @backend.send(:"create_#{type}",
-                        stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
+                        stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
           ).should be_kind_of(kind)
         end
       end

data/spec/lib/bundle_indexed_spec.rb CHANGED Viewed

@@ -119,7 +119,7 @@ describe Picky::Bundle do
       it "uses the right file" do
         MultiJson.stub! :decode
-        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
+        File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
         @bundle.load_inverted
       end
@@ -128,7 +128,7 @@ describe Picky::Bundle do
       it "uses the right file" do
         MultiJson.stub! :decode
-        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
+        File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
         @bundle.load_weights
       end
@@ -137,7 +137,7 @@ describe Picky::Bundle do
       it "uses the right file" do
         Marshal.stub! :load
-        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
+        File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
         @bundle.load_similarity
       end
@@ -146,7 +146,7 @@ describe Picky::Bundle do
       it "uses the right file" do
         MultiJson.stub! :decode
-        File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
+        File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
         @bundle.load_configuration
       end

data/spec/lib/bundle_spec.rb CHANGED Viewed

@@ -22,10 +22,10 @@ describe Picky::Bundle do
   describe 'index_path' do
     it 'is correct' do
-      bundle.index_path(:some_type).should == 'spec/test_directory/index/test/some_index/some_category_some_name_some_type'
+      bundle.index_path(:some_type).should == 'spec/temp/index/test/some_index/some_category_some_name_some_type'
     end
     it 'is correct' do
-      bundle.index_path.should == 'spec/test_directory/index/test/some_index/some_category_some_name'
+      bundle.index_path.should == 'spec/temp/index/test/some_index/some_category_some_name'
     end
   end

data/spec/{category_realtime_spec.rb → lib/category_realtime_spec.rb} RENAMED Viewed

File without changes

data/spec/lib/category_spec.rb CHANGED Viewed

@@ -27,7 +27,7 @@ describe Picky::Category do
   context 'directories' do
     let(:category) { described_class.new :some_category, index }
     it 'is correct' do
-      category.prepared_index_path.should == 'spec/test_directory/index/test/some_index/some_category'
+      category.prepared_index_path.should == 'spec/temp/index/test/some_index/some_category'
     end
   end

data/spec/lib/index_spec.rb CHANGED Viewed

@@ -58,7 +58,7 @@ describe Picky::Index do
     describe 'directory' do
       it 'is correct' do
-        api.directory.should == 'spec/test_directory/index/test/some_index_name'
+        api.directory.should == 'spec/temp/index/test/some_index_name'
       end
     end

data/spec/lib/loader_spec.rb CHANGED Viewed

@@ -18,7 +18,7 @@ describe Picky::Loader do
   describe 'load_application' do
     it 'does ok' do
-      Kernel.should_receive(:load).once.with 'spec/test_directory/app.rb'
+      Kernel.should_receive(:load).once.with 'spec/temp/app.rb'
       lambda { described_class.load_application }.should_not raise_error
     end

data/spec/{ext/performant_spec.rb → performant_spec.rb} RENAMED Viewed

@@ -36,13 +36,15 @@ describe Performant::Array do
     it "should be optimal for 2 small arrays of 50/10_000" do
       arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
-      # brute force
+      # Brute force.
+      #
       performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
     end
     it "should be optimal for 2 small arrays of 50/10_000" do
       arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
       # &
+      #
       performance_of do
         arys.inject(arys.shift.dup) do |total, ary|
           total & arys
@@ -75,13 +77,56 @@ describe Performant::Array do
     it "should be optimal for 2 small arrays of 50/10_000" do
       arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a]
-      # brute force
+      # Brute force.
+      #
       performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
     end
     it "should be optimal for 2 small arrays of 50/10_000" do
       arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
       # &
+      #
+      performance_of do
+        arys.inject(arys.shift.dup) do |total, ary|
+          total & arys
+        end
+      end.should < 0.0015
+    end
+  end
+  describe "memory_efficient_intersect with strings" do
+    it "should intersect empty arrays correctly" do
+      arys = [['c','d'], ['a','b','c'], []]
+      Performant::Array.memory_efficient_intersect(arys).should == []
+    end
+    it "should handle intermediate empty results correctly" do
+      arys = [['e','d'], ['a','b','c'], ['c','d','e','h','i']]
+      Performant::Array.memory_efficient_intersect(arys).should == []
+    end
+    it "should intersect correctly" do
+      arys = [['c','d'], ['a','b','c'], ['c','d','e','h','i']]
+      Performant::Array.memory_efficient_intersect(arys).should == ['c']
+    end
+    it "should intersect many arrays" do
+      arys = [['c','d','e','f','g'], ['a','b','c','e','f','g'], ['c','d','e','f','g','h','i'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s']]
+      Performant::Array.memory_efficient_intersect(arys).should == ['c','e','f','g']
+    end
+    it "should be optimal for 2 small arrays of 50/10_000" do
+      arys = [('1'..'50').to_a, ('10000'..'20000').to_a]
+      # Brute force - note that it is slower than the Symbols/Integers version.
+      #
+      performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
+    end
+    it "should be optimal for 2 small arrays of 50/10_000" do
+      arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]
+      # &
+      #
       performance_of do
         arys.inject(arys.shift.dup) do |total, ary|
           total & arys

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: picky
 version: !ruby/object:Gem::Version
-  version: 4.11.3
+  version: 4.12.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-11-08 00:00:00.000000000 Z
+date: 2012-11-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -34,7 +34,7 @@ dependencies:
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 4.11.3
+        version: 4.12.0
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
@@ -42,7 +42,7 @@ dependencies:
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 4.11.3
+        version: 4.12.0
 - !ruby/object:Gem::Dependency
   name: text
   requirement: !ruby/object:Gem::Requirement
@@ -242,6 +242,7 @@ files:
 - lib/picky/sinatra/index_actions.rb
 - lib/picky/sinatra.rb
 - lib/picky/source.rb
+- lib/picky/splitters/automatic.rb
 - lib/picky/statistics.rb
 - lib/picky/tasks.rb
 - lib/picky/tokenizer.rb
@@ -261,9 +262,8 @@ files:
 - lib/tasks/try.rake
 - lib/performant.c
 - spec/aux/picky/cli_spec.rb
-- spec/category_realtime_spec.rb
-- spec/ext/performant_spec.rb
 - spec/functional/allocations_uniq_by_definition_spec.rb
+- spec/functional/automatic_segmentation_spec.rb
 - spec/functional/backends/file_spec.rb
 - spec/functional/backends/memory_bundle_realtime_spec.rb
 - spec/functional/backends/memory_json_utf8_spec.rb
@@ -328,6 +328,7 @@ files:
 - spec/lib/category/location_spec.rb
 - spec/lib/category_indexed_spec.rb
 - spec/lib/category_indexing_spec.rb
+- spec/lib/category_realtime_spec.rb
 - spec/lib/category_spec.rb
 - spec/lib/character_substituters/west_european_spec.rb
 - spec/lib/extensions/array_spec.rb
@@ -398,6 +399,7 @@ files:
 - spec/lib/statistics_spec.rb
 - spec/lib/tasks/try_spec.rb
 - spec/lib/tokenizer_spec.rb
+- spec/performant_spec.rb
 - bin/picky
 homepage: http://florianhanke.com/picky
 licenses: []
@@ -425,9 +427,8 @@ specification_version: 3
 summary: ! 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
 test_files:
 - spec/aux/picky/cli_spec.rb
-- spec/category_realtime_spec.rb
-- spec/ext/performant_spec.rb
 - spec/functional/allocations_uniq_by_definition_spec.rb
+- spec/functional/automatic_segmentation_spec.rb
 - spec/functional/backends/file_spec.rb
 - spec/functional/backends/memory_bundle_realtime_spec.rb
 - spec/functional/backends/memory_json_utf8_spec.rb
@@ -492,6 +493,7 @@ test_files:
 - spec/lib/category/location_spec.rb
 - spec/lib/category_indexed_spec.rb
 - spec/lib/category_indexing_spec.rb
+- spec/lib/category_realtime_spec.rb
 - spec/lib/category_spec.rb
 - spec/lib/character_substituters/west_european_spec.rb
 - spec/lib/extensions/array_spec.rb
@@ -562,4 +564,5 @@ test_files:
 - spec/lib/statistics_spec.rb
 - spec/lib/tasks/try_spec.rb
 - spec/lib/tokenizer_spec.rb
+- spec/performant_spec.rb
 has_rdoc: