picky 4.11.3 → 4.12.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/performant.c CHANGED
@@ -1,17 +1,5 @@
1
1
  #include "ruby.h"
2
2
 
3
- // Copying internal ruby methods.
4
- //
5
- static inline VALUE rb_ary_elt(ary, offset)
6
- VALUE ary;
7
- long offset;
8
- {
9
- if (RARRAY_LEN(ary) == 0) return Qnil;
10
- if (offset < 0 || RARRAY_LEN(ary) <= offset) {
11
- return Qnil;
12
- }
13
- return RARRAY_PTR(ary)[offset];
14
- }
15
3
  static inline VALUE ary_make_hash(ary1, ary2)
16
4
  VALUE ary1, ary2;
17
5
  {
@@ -19,11 +7,11 @@ static inline VALUE ary_make_hash(ary1, ary2)
19
7
  long i;
20
8
 
21
9
  for (i=0; i<RARRAY_LEN(ary1); i++) {
22
- rb_hash_aset(hash, RARRAY_PTR(ary1)[i], Qtrue);
10
+ rb_hash_aset(hash, rb_ary_entry(ary1,i), Qtrue);
23
11
  }
24
12
  if (ary2) {
25
13
  for (i=0; i<RARRAY_LEN(ary2); i++) {
26
- rb_hash_aset(hash, RARRAY_PTR(ary2)[i], Qtrue);
14
+ rb_hash_aset(hash, rb_ary_entry(ary2, i), Qtrue);
27
15
  }
28
16
  }
29
17
  return hash;
@@ -44,19 +32,19 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
44
32
 
45
33
  // Vars.
46
34
  //
47
- struct RArray *rb_array_of_arrays;
35
+ VALUE rb_array_of_arrays;
48
36
  VALUE smallest_array;
49
37
  VALUE current_array;
50
38
  VALUE hash;
51
39
 
52
40
  // Temps.
53
41
  //
54
- VALUE v, vv;
42
+ VALUE v;
55
43
 
56
44
  // Conversions & presorting.
57
45
  //
58
- rb_array_of_arrays = (struct RArray*) rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
59
- smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
46
+ rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
47
+ smallest_array = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
60
48
 
61
49
  // Iterate through all arrays.
62
50
  //
@@ -77,10 +65,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
77
65
 
78
66
  // Iterate through all array elements.
79
67
  //
80
- current_array = RARRAY_PTR(rb_array_of_arrays)[i];
68
+ current_array = rb_ary_entry(rb_array_of_arrays, i);
81
69
  for (j = 0; j < RARRAY_LEN(current_array); j++) {
82
- v = vv = rb_ary_elt(current_array, j);
83
- if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
70
+ v = rb_ary_entry(current_array, j);
71
+ if (rb_hash_delete(hash, v) != Qnil) {
84
72
  rb_ary_push(smallest_array, v);
85
73
  }
86
74
  }
@@ -95,4 +83,4 @@ void Init_performant() {
95
83
  p_mPerformant = rb_define_module("Performant");
96
84
  p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
97
85
  rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
98
- }
86
+ }
data/lib/picky/loader.rb CHANGED
@@ -268,6 +268,7 @@ module Picky
268
268
  load_relative 'tokenizer'
269
269
  # load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
270
270
  load_relative 'character_substituters/west_european'
271
+ load_relative 'splitters/automatic'
271
272
  load_generators
272
273
  load_inner_api
273
274
  load_results
@@ -0,0 +1,82 @@
1
+ module Picky
2
+
3
+ module Splitters
4
+
5
+ # Automatic Splitter.
6
+ #
7
+ # Use as a splitter for the splits_text_on option
8
+ # for Searches. You need to give it an index category
9
+ # to use for the splitting.
10
+ #
11
+ # Example:
12
+ # Picky::Search.new index do
13
+ # searching splits_text_on: Picky::Splitters::Automatic.new(index[:name])
14
+ # end
15
+ #
16
+ # Will split most queries correctly.
17
+ # However, has the following problems:
18
+ # * "cannot" is usually split as ['can', 'not']
19
+ # * "rainbow" is usually split as ['rain', 'bow']
20
+ #
21
+ # Reference: http://norvig.com/ngrams/ch14.pdf.
22
+ #
23
+ # Adapted from a script submitted
24
+ # by Andy Kitchen.
25
+ #
26
+ class Automatic
27
+
28
+ def initialize category, options = {}
29
+ @exact = category.exact
30
+ @partial = category.partial
31
+ @with_partial = options[:partial]
32
+
33
+ reset_memoization
34
+ end
35
+
36
+ # Reset the memoization.
37
+ #
38
+ def reset_memoization
39
+ @exact_memo = {}
40
+ @partial_memo = {}
41
+ end
42
+
43
+ # Split the given text into its most
44
+ # likely constituents.
45
+ #
46
+ def split text
47
+ segment(text, @with_partial).first
48
+ end
49
+
50
+ # Return all splits of a given string.
51
+ #
52
+ def splits text
53
+ l = text.length
54
+ (0..l-1).map do |x|
55
+ [text.slice(0,x), text.slice(x,l)]
56
+ end
57
+ end
58
+
59
+ # Segments the given text recursively.
60
+ #
61
+ def segment text, use_partial = false
62
+ (use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
63
+ tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
64
+
65
+ segments, head_weight = segment head
66
+
67
+ weight = (head_weight && tail_weight &&
68
+ (head_weight + tail_weight) ||
69
+ tail_weight || head_weight)
70
+ if (weight || -1) > (heaviest || 0)
71
+ [tail_weight ? segments + [tail] : segments, weight]
72
+ else
73
+ [current, heaviest]
74
+ end
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -99,16 +99,25 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
99
99
  #
100
100
  def splits_text_on thing
101
101
  raise ArgumentError.new "#{__method__} takes a Regexp or String or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
102
- @splits_text_on = thing
103
- if thing.respond_to? :split
104
- def split text
105
- @splits_text_on.split text
106
- end
102
+ @splits_text_on = if thing.respond_to? :split
103
+ thing
107
104
  else
108
- def split text
109
- text.split @splits_text_on
110
- end
105
+ RegexpWrapper.new thing
106
+ end
107
+ end
108
+ class RegexpWrapper
109
+ def initialize regexp
110
+ @regexp = regexp
111
+ end
112
+ def split text
113
+ text.split @regexp
111
114
  end
115
+ def source
116
+ @regexp.source
117
+ end
118
+ end
119
+ def split text
120
+ @splits_text_on.split text
112
121
  end
113
122
 
114
123
  # Normalizing.
@@ -0,0 +1,98 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe "automatic splitting" do
6
+
7
+ let(:index) do
8
+ index = Picky::Index.new :automatic_text_splitting do
9
+ indexing removes_characters: /[^a-z\s]/i,
10
+ stopwords: /\b(in|a)\b/
11
+ category :text
12
+ end
13
+
14
+ require 'ostruct'
15
+ index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
16
+ index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
17
+ index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
18
+ index.add OpenStruct.new(id: 4, text: 'The color purple.')
19
+ index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
20
+ index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
21
+
22
+ index
23
+ end
24
+
25
+ it 'can split the text automatically' do
26
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text]
27
+
28
+ # It splits the text correctly.
29
+ #
30
+ automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
31
+ automatic_splitter.split('purplerain').should == ['purple', 'rain']
32
+ automatic_splitter.split('purple').should == ['purple']
33
+
34
+ # When it can't, it splits it using the partial index (correctly).
35
+ #
36
+ automatic_splitter.split('purplerainbo').should == ['purple', 'rain']
37
+ automatic_splitter.split('purplerainb').should == ['purple', 'rain']
38
+ #
39
+ automatic_splitter.split('purplerai').should == ['purple']
40
+ automatic_splitter.split('purplera').should == ['purple']
41
+ automatic_splitter.split('purpler').should == ['purple']
42
+ #
43
+ automatic_splitter.split('purpl').should == []
44
+ automatic_splitter.split('purp').should == []
45
+ automatic_splitter.split('pur').should == []
46
+ automatic_splitter.split('pu').should == []
47
+ automatic_splitter.split('p').should == []
48
+ end
49
+
50
+ it 'can split text automatically (with partial)' do
51
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text], partial: true
52
+
53
+ # It splits the text correctly.
54
+ #
55
+ automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
56
+ automatic_splitter.split('purplerain').should == ['purple', 'rain']
57
+ automatic_splitter.split('purple').should == ['purple']
58
+
59
+ # When it can't, it splits it using the partial index (correctly).
60
+ #
61
+ automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo']
62
+ automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b']
63
+ #
64
+ automatic_splitter.split('purplerai').should == ['purple', 'rai']
65
+ automatic_splitter.split('purplera').should == ['purple', 'ra']
66
+ automatic_splitter.split('purpler').should == ['purple'] # No 'r' in partial index.
67
+ #
68
+ automatic_splitter.split('purpl').should == ['purpl']
69
+ automatic_splitter.split('purp').should == ['purp']
70
+ automatic_splitter.split('pur').should == [] # No 'pur' in partial index etc.
71
+ automatic_splitter.split('pu').should == []
72
+ automatic_splitter.split('p').should == []
73
+
74
+ try = Picky::Search.new index do
75
+ searching splits_text_on: automatic_splitter
76
+ end
77
+
78
+ # Should find the one with all parts.
79
+ #
80
+ try.search('purplerainbow').ids.should == [1]
81
+ try.search('sunandrain').ids.should == [5]
82
+
83
+ # Common parts are found in multiple examples.
84
+ #
85
+ try.search('colorpurple').ids.should == [4,1]
86
+ try.search('bownew').ids.should == [3,1]
87
+ try.search('spainisking').ids.should == [6,1]
88
+ end
89
+
90
+ it 'is fast enough' do
91
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text]
92
+
93
+ performance_of do
94
+ automatic_splitter.split('purplerainbow')
95
+ end.should < 0.0002
96
+ end
97
+
98
+ end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::File::Basic do
4
4
 
5
5
  context 'without options' do
6
- let(:basic) { described_class.new 'some/cache/path/to/file' }
6
+ let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'empty' do
9
9
  it 'returns the container that is used for indexing' do
@@ -19,14 +19,14 @@ describe Picky::Backends::File::Basic do
19
19
 
20
20
  describe 'to_s' do
21
21
  it 'returns the cache path with the default file extension' do
22
- basic.to_s.should == 'Picky::Backends::File::Basic(some/cache/path/to/file.file.index,some/cache/path/to/file.file_mapping.index.memory.json)'
22
+ basic.to_s.should == 'Picky::Backends::File::Basic(spec/temp/some/cache/path/to/file.file.index,spec/temp/some/cache/path/to/file.file_mapping.index.memory.json)'
23
23
  end
24
24
  end
25
25
  end
26
26
 
27
27
  context 'with options' do
28
28
  let(:basic) do
29
- described_class.new 'some/cache/path/to/file',
29
+ described_class.new 'spec/temp/some/cache/path/to/file',
30
30
  empty: [],
31
31
  initial: []
32
32
  end
@@ -21,7 +21,7 @@ describe Picky::Backends::File do
21
21
  # ].each do |type, kind|
22
22
  # it "creates and returns a(n) #{type} index" do
23
23
  # @backend.send(:"create_#{type}",
24
- # stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
24
+ # stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
25
25
  # ).should be_kind_of(kind)
26
26
  # end
27
27
  # end
@@ -44,7 +44,7 @@ describe Picky::Backends::File do
44
44
  ].each do |type, kind|
45
45
  it "creates and returns a(n) #{type} index" do
46
46
  @backend.send(:"create_#{type}",
47
- stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
47
+ stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
48
48
  ).should be_kind_of(kind)
49
49
  end
50
50
  end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::Memory::Basic do
4
4
 
5
5
  context 'without options' do
6
- let(:basic) { described_class.new 'some/cache/path/to/file' }
6
+ let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'empty' do
9
9
  it 'returns the container that is used for indexing' do
@@ -19,14 +19,14 @@ describe Picky::Backends::Memory::Basic do
19
19
 
20
20
  describe 'to_s' do
21
21
  it 'returns the cache path with the default file extension' do
22
- basic.to_s.should == 'Picky::Backends::Memory::Basic(some/cache/path/to/file.memory.index)'
22
+ basic.to_s.should == 'Picky::Backends::Memory::Basic(spec/temp/some/cache/path/to/file.memory.index)'
23
23
  end
24
24
  end
25
25
  end
26
26
 
27
27
  context 'with options' do
28
28
  let(:basic) do
29
- described_class.new 'some/cache/path/to/file',
29
+ described_class.new 'spec/temp/some/cache/path/to/file',
30
30
  empty: [],
31
31
  initial: []
32
32
  end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::Memory::JSON do
4
4
 
5
5
  context 'hash-based indexes' do
6
- let(:json) { described_class.new 'some/cache/path/to/file' }
6
+ let(:json) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'extension' do
9
9
  it 'is correct' do
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::JSON do
37
37
 
38
38
  describe 'to_s' do
39
39
  it 'returns the cache path with the default file extension' do
40
- json.to_s.should == 'Picky::Backends::Memory::JSON(some/cache/path/to/file.memory.json)'
40
+ json.to_s.should == 'Picky::Backends::Memory::JSON(spec/temp/some/cache/path/to/file.memory.json)'
41
41
  end
42
42
  end
43
43
  end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::Memory::Marshal do
4
4
 
5
5
  context 'hash-based indexes' do
6
- let(:marshal) { described_class.new 'some/cache/path/to/file' }
6
+ let(:marshal) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'extension' do
9
9
  it 'is correct' do
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::Marshal do
37
37
 
38
38
  describe 'to_s' do
39
39
  it 'returns the cache path with the default file extension' do
40
- marshal.to_s.should == 'Picky::Backends::Memory::Marshal(some/cache/path/to/file.memory.dump)'
40
+ marshal.to_s.should == 'Picky::Backends::Memory::Marshal(spec/temp/some/cache/path/to/file.memory.dump)'
41
41
  end
42
42
  end
43
43
  end
@@ -21,7 +21,7 @@ describe Picky::Backends::Memory do
21
21
  # ].each do |type, kind|
22
22
  # it "creates and returns a(n) #{type} index" do
23
23
  # @backend.send(:"create_#{type}",
24
- # stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
24
+ # stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
25
25
  # ).should be_kind_of(kind)
26
26
  # end
27
27
  # end
@@ -47,7 +47,7 @@ describe Picky::Backends::Memory do
47
47
  # ].each do |type, kind|
48
48
  # it "creates and returns a(n) #{type} index" do
49
49
  # to_a_able_stub = Object.new
50
- # to_a_able_stub.stub! :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}"
50
+ # to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
51
51
  # @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
52
52
  # end
53
53
  # end
@@ -70,7 +70,7 @@ describe Picky::Backends::Memory do
70
70
  ].each do |type, kind|
71
71
  it "creates and returns a(n) #{type} index" do
72
72
  @backend.send(:"create_#{type}",
73
- stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
73
+ stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
74
74
  ).should be_kind_of(kind)
75
75
  end
76
76
  end
@@ -5,7 +5,7 @@ require 'sqlite3'
5
5
  describe Picky::Backends::SQLite::Array do
6
6
 
7
7
  context 'hash-based indexes' do
8
- let(:db) { described_class.new 'some/cache/path/to/file' }
8
+ let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
9
9
 
10
10
  describe 'dump' do
11
11
  it 'delegates to the given hash' do
@@ -67,13 +67,13 @@ describe Picky::Backends::SQLite::Array do
67
67
 
68
68
  describe 'to_s' do
69
69
  it 'returns the cache path with the default file extension' do
70
- db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
70
+ db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
71
71
  end
72
72
  end
73
73
  end
74
74
 
75
75
  context 'hash-based indexes' do
76
- let(:db) { described_class.new 'some/cache/path/to/file', realtime: true }
76
+ let(:db) { described_class.new 'spec/temp/some/cache/path/to/file', realtime: true }
77
77
 
78
78
  describe 'dump' do
79
79
  it 'delegates to the given hash' do
@@ -135,7 +135,7 @@ describe Picky::Backends::SQLite::Array do
135
135
 
136
136
  describe 'to_s' do
137
137
  it 'returns the cache path with the default file extension' do
138
- db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
138
+ db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
139
139
  end
140
140
  end
141
141
  end
@@ -5,7 +5,7 @@ require 'sqlite3'
5
5
  describe Picky::Backends::SQLite::Value do
6
6
 
7
7
  context 'hash-based indexes' do
8
- let(:db) { described_class.new 'some/cache/path/to/file' }
8
+ let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
9
9
 
10
10
  describe 'dump' do
11
11
  it 'delegates to the given hash' do
@@ -67,7 +67,7 @@ describe Picky::Backends::SQLite::Value do
67
67
 
68
68
  describe 'to_s' do
69
69
  it 'returns the cache path with the default file extension' do
70
- db.to_s.should == 'Picky::Backends::SQLite::Value(some/cache/path/to/file.sqlite3)'
70
+ db.to_s.should == 'Picky::Backends::SQLite::Value(spec/temp/some/cache/path/to/file.sqlite3)'
71
71
  end
72
72
  end
73
73
  end
@@ -23,7 +23,7 @@ describe Picky::Backends::SQLite do
23
23
  # ].each do |type, kind|
24
24
  # it "creates and returns a(n) #{type} index" do
25
25
  # @backend.send(:"create_#{type}",
26
- # stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
26
+ # stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
27
27
  # ).should be_kind_of(kind)
28
28
  # end
29
29
  # end
@@ -49,7 +49,7 @@ describe Picky::Backends::SQLite do
49
49
  # ].each do |type, kind|
50
50
  # it "creates and returns a(n) #{type} index" do
51
51
  # to_a_able_stub = Object.new
52
- # to_a_able_stub.stub! :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}"
52
+ # to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
53
53
  # @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
54
54
  # end
55
55
  # end
@@ -72,7 +72,7 @@ describe Picky::Backends::SQLite do
72
72
  ].each do |type, kind|
73
73
  it "creates and returns a(n) #{type} index" do
74
74
  @backend.send(:"create_#{type}",
75
- stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
75
+ stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
76
76
  ).should be_kind_of(kind)
77
77
  end
78
78
  end
@@ -119,7 +119,7 @@ describe Picky::Bundle do
119
119
  it "uses the right file" do
120
120
  MultiJson.stub! :decode
121
121
 
122
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
122
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
123
123
 
124
124
  @bundle.load_inverted
125
125
  end
@@ -128,7 +128,7 @@ describe Picky::Bundle do
128
128
  it "uses the right file" do
129
129
  MultiJson.stub! :decode
130
130
 
131
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
131
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
132
132
 
133
133
  @bundle.load_weights
134
134
  end
@@ -137,7 +137,7 @@ describe Picky::Bundle do
137
137
  it "uses the right file" do
138
138
  Marshal.stub! :load
139
139
 
140
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
140
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
141
141
 
142
142
  @bundle.load_similarity
143
143
  end
@@ -146,7 +146,7 @@ describe Picky::Bundle do
146
146
  it "uses the right file" do
147
147
  MultiJson.stub! :decode
148
148
 
149
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
149
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
150
150
 
151
151
  @bundle.load_configuration
152
152
  end
@@ -22,10 +22,10 @@ describe Picky::Bundle do
22
22
 
23
23
  describe 'index_path' do
24
24
  it 'is correct' do
25
- bundle.index_path(:some_type).should == 'spec/test_directory/index/test/some_index/some_category_some_name_some_type'
25
+ bundle.index_path(:some_type).should == 'spec/temp/index/test/some_index/some_category_some_name_some_type'
26
26
  end
27
27
  it 'is correct' do
28
- bundle.index_path.should == 'spec/test_directory/index/test/some_index/some_category_some_name'
28
+ bundle.index_path.should == 'spec/temp/index/test/some_index/some_category_some_name'
29
29
  end
30
30
  end
31
31
 
@@ -27,7 +27,7 @@ describe Picky::Category do
27
27
  context 'directories' do
28
28
  let(:category) { described_class.new :some_category, index }
29
29
  it 'is correct' do
30
- category.prepared_index_path.should == 'spec/test_directory/index/test/some_index/some_category'
30
+ category.prepared_index_path.should == 'spec/temp/index/test/some_index/some_category'
31
31
  end
32
32
  end
33
33
 
@@ -58,7 +58,7 @@ describe Picky::Index do
58
58
 
59
59
  describe 'directory' do
60
60
  it 'is correct' do
61
- api.directory.should == 'spec/test_directory/index/test/some_index_name'
61
+ api.directory.should == 'spec/temp/index/test/some_index_name'
62
62
  end
63
63
  end
64
64
 
@@ -18,7 +18,7 @@ describe Picky::Loader do
18
18
 
19
19
  describe 'load_application' do
20
20
  it 'does ok' do
21
- Kernel.should_receive(:load).once.with 'spec/test_directory/app.rb'
21
+ Kernel.should_receive(:load).once.with 'spec/temp/app.rb'
22
22
 
23
23
  lambda { described_class.load_application }.should_not raise_error
24
24
  end
@@ -36,13 +36,15 @@ describe Performant::Array do
36
36
  it "should be optimal for 2 small arrays of 50/10_000" do
37
37
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
38
38
 
39
- # brute force
39
+ # Brute force.
40
+ #
40
41
  performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
41
42
  end
42
43
  it "should be optimal for 2 small arrays of 50/10_000" do
43
44
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
44
45
 
45
46
  # &
47
+ #
46
48
  performance_of do
47
49
  arys.inject(arys.shift.dup) do |total, ary|
48
50
  total & arys
@@ -75,13 +77,56 @@ describe Performant::Array do
75
77
  it "should be optimal for 2 small arrays of 50/10_000" do
76
78
  arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a]
77
79
 
78
- # brute force
80
+ # Brute force.
81
+ #
79
82
  performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
80
83
  end
81
84
  it "should be optimal for 2 small arrays of 50/10_000" do
82
85
  arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
83
86
 
84
87
  # &
88
+ #
89
+ performance_of do
90
+ arys.inject(arys.shift.dup) do |total, ary|
91
+ total & arys
92
+ end
93
+ end.should < 0.0015
94
+ end
95
+ end
96
+
97
+ describe "memory_efficient_intersect with strings" do
98
+ it "should intersect empty arrays correctly" do
99
+ arys = [['c','d'], ['a','b','c'], []]
100
+
101
+ Performant::Array.memory_efficient_intersect(arys).should == []
102
+ end
103
+ it "should handle intermediate empty results correctly" do
104
+ arys = [['e','d'], ['a','b','c'], ['c','d','e','h','i']]
105
+
106
+ Performant::Array.memory_efficient_intersect(arys).should == []
107
+ end
108
+ it "should intersect correctly" do
109
+ arys = [['c','d'], ['a','b','c'], ['c','d','e','h','i']]
110
+
111
+ Performant::Array.memory_efficient_intersect(arys).should == ['c']
112
+ end
113
+ it "should intersect many arrays" do
114
+ arys = [['c','d','e','f','g'], ['a','b','c','e','f','g'], ['c','d','e','f','g','h','i'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s']]
115
+
116
+ Performant::Array.memory_efficient_intersect(arys).should == ['c','e','f','g']
117
+ end
118
+ it "should be optimal for 2 small arrays of 50/10_000" do
119
+ arys = [('1'..'50').to_a, ('10000'..'20000').to_a]
120
+
121
+ # Brute force - note that it is slower than the Symbols/Integers version.
122
+ #
123
+ performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
124
+ end
125
+ it "should be optimal for 2 small arrays of 50/10_000" do
126
+ arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]
127
+
128
+ # &
129
+ #
85
130
  performance_of do
86
131
  arys.inject(arys.shift.dup) do |total, ary|
87
132
  total & arys
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.11.3
4
+ version: 4.12.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-08 00:00:00.000000000 Z
12
+ date: 2012-11-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -34,7 +34,7 @@ dependencies:
34
34
  requirements:
35
35
  - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: 4.11.3
37
+ version: 4.12.0
38
38
  type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -42,7 +42,7 @@ dependencies:
42
42
  requirements:
43
43
  - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: 4.11.3
45
+ version: 4.12.0
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: text
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -242,6 +242,7 @@ files:
242
242
  - lib/picky/sinatra/index_actions.rb
243
243
  - lib/picky/sinatra.rb
244
244
  - lib/picky/source.rb
245
+ - lib/picky/splitters/automatic.rb
245
246
  - lib/picky/statistics.rb
246
247
  - lib/picky/tasks.rb
247
248
  - lib/picky/tokenizer.rb
@@ -261,9 +262,8 @@ files:
261
262
  - lib/tasks/try.rake
262
263
  - lib/performant.c
263
264
  - spec/aux/picky/cli_spec.rb
264
- - spec/category_realtime_spec.rb
265
- - spec/ext/performant_spec.rb
266
265
  - spec/functional/allocations_uniq_by_definition_spec.rb
266
+ - spec/functional/automatic_segmentation_spec.rb
267
267
  - spec/functional/backends/file_spec.rb
268
268
  - spec/functional/backends/memory_bundle_realtime_spec.rb
269
269
  - spec/functional/backends/memory_json_utf8_spec.rb
@@ -328,6 +328,7 @@ files:
328
328
  - spec/lib/category/location_spec.rb
329
329
  - spec/lib/category_indexed_spec.rb
330
330
  - spec/lib/category_indexing_spec.rb
331
+ - spec/lib/category_realtime_spec.rb
331
332
  - spec/lib/category_spec.rb
332
333
  - spec/lib/character_substituters/west_european_spec.rb
333
334
  - spec/lib/extensions/array_spec.rb
@@ -398,6 +399,7 @@ files:
398
399
  - spec/lib/statistics_spec.rb
399
400
  - spec/lib/tasks/try_spec.rb
400
401
  - spec/lib/tokenizer_spec.rb
402
+ - spec/performant_spec.rb
401
403
  - bin/picky
402
404
  homepage: http://florianhanke.com/picky
403
405
  licenses: []
@@ -425,9 +427,8 @@ specification_version: 3
425
427
  summary: ! 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
426
428
  test_files:
427
429
  - spec/aux/picky/cli_spec.rb
428
- - spec/category_realtime_spec.rb
429
- - spec/ext/performant_spec.rb
430
430
  - spec/functional/allocations_uniq_by_definition_spec.rb
431
+ - spec/functional/automatic_segmentation_spec.rb
431
432
  - spec/functional/backends/file_spec.rb
432
433
  - spec/functional/backends/memory_bundle_realtime_spec.rb
433
434
  - spec/functional/backends/memory_json_utf8_spec.rb
@@ -492,6 +493,7 @@ test_files:
492
493
  - spec/lib/category/location_spec.rb
493
494
  - spec/lib/category_indexed_spec.rb
494
495
  - spec/lib/category_indexing_spec.rb
496
+ - spec/lib/category_realtime_spec.rb
495
497
  - spec/lib/category_spec.rb
496
498
  - spec/lib/character_substituters/west_european_spec.rb
497
499
  - spec/lib/extensions/array_spec.rb
@@ -562,4 +564,5 @@ test_files:
562
564
  - spec/lib/statistics_spec.rb
563
565
  - spec/lib/tasks/try_spec.rb
564
566
  - spec/lib/tokenizer_spec.rb
567
+ - spec/performant_spec.rb
565
568
  has_rdoc: