picky 4.11.3 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/performant.c CHANGED
@@ -1,17 +1,5 @@
1
1
  #include "ruby.h"
2
2
 
3
- // Copying internal ruby methods.
4
- //
5
- static inline VALUE rb_ary_elt(ary, offset)
6
- VALUE ary;
7
- long offset;
8
- {
9
- if (RARRAY_LEN(ary) == 0) return Qnil;
10
- if (offset < 0 || RARRAY_LEN(ary) <= offset) {
11
- return Qnil;
12
- }
13
- return RARRAY_PTR(ary)[offset];
14
- }
15
3
  static inline VALUE ary_make_hash(ary1, ary2)
16
4
  VALUE ary1, ary2;
17
5
  {
@@ -19,11 +7,11 @@ static inline VALUE ary_make_hash(ary1, ary2)
19
7
  long i;
20
8
 
21
9
  for (i=0; i<RARRAY_LEN(ary1); i++) {
22
- rb_hash_aset(hash, RARRAY_PTR(ary1)[i], Qtrue);
10
+ rb_hash_aset(hash, rb_ary_entry(ary1,i), Qtrue);
23
11
  }
24
12
  if (ary2) {
25
13
  for (i=0; i<RARRAY_LEN(ary2); i++) {
26
- rb_hash_aset(hash, RARRAY_PTR(ary2)[i], Qtrue);
14
+ rb_hash_aset(hash, rb_ary_entry(ary2, i), Qtrue);
27
15
  }
28
16
  }
29
17
  return hash;
@@ -44,19 +32,19 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
44
32
 
45
33
  // Vars.
46
34
  //
47
- struct RArray *rb_array_of_arrays;
35
+ VALUE rb_array_of_arrays;
48
36
  VALUE smallest_array;
49
37
  VALUE current_array;
50
38
  VALUE hash;
51
39
 
52
40
  // Temps.
53
41
  //
54
- VALUE v, vv;
42
+ VALUE v;
55
43
 
56
44
  // Conversions & presorting.
57
45
  //
58
- rb_array_of_arrays = (struct RArray*) rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
59
- smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
46
+ rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
47
+ smallest_array = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
60
48
 
61
49
  // Iterate through all arrays.
62
50
  //
@@ -77,10 +65,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
77
65
 
78
66
  // Iterate through all array elements.
79
67
  //
80
- current_array = RARRAY_PTR(rb_array_of_arrays)[i];
68
+ current_array = rb_ary_entry(rb_array_of_arrays, i);
81
69
  for (j = 0; j < RARRAY_LEN(current_array); j++) {
82
- v = vv = rb_ary_elt(current_array, j);
83
- if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
70
+ v = rb_ary_entry(current_array, j);
71
+ if (rb_hash_delete(hash, v) != Qnil) {
84
72
  rb_ary_push(smallest_array, v);
85
73
  }
86
74
  }
@@ -95,4 +83,4 @@ void Init_performant() {
95
83
  p_mPerformant = rb_define_module("Performant");
96
84
  p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
97
85
  rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
98
- }
86
+ }
data/lib/picky/loader.rb CHANGED
@@ -268,6 +268,7 @@ module Picky
268
268
  load_relative 'tokenizer'
269
269
  # load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
270
270
  load_relative 'character_substituters/west_european'
271
+ load_relative 'splitters/automatic'
271
272
  load_generators
272
273
  load_inner_api
273
274
  load_results
@@ -0,0 +1,82 @@
1
+ module Picky
2
+
3
+ module Splitters
4
+
5
+ # Automatic Splitter.
6
+ #
7
+ # Use as a splitter for the splits_text_on option
8
+ # for Searches. You need to give it an index category
9
+ # to use for the splitting.
10
+ #
11
+ # Example:
12
+ # Picky::Search.new index do
13
+ # searching splits_text_on: Picky::Splitters::Automatic.new(index[:name])
14
+ # end
15
+ #
16
+ # Will split most queries correctly.
17
+ # However, has the following problems:
18
+ # * "cannot" is usually split as ['can', 'not']
19
+ # * "rainbow" is usually split as ['rain', 'bow']
20
+ #
21
+ # Reference: http://norvig.com/ngrams/ch14.pdf.
22
+ #
23
+ # Adapted from a script submitted
24
+ # by Andy Kitchen.
25
+ #
26
+ class Automatic
27
+
28
+ def initialize category, options = {}
29
+ @exact = category.exact
30
+ @partial = category.partial
31
+ @with_partial = options[:partial]
32
+
33
+ reset_memoization
34
+ end
35
+
36
+ # Reset the memoization.
37
+ #
38
+ def reset_memoization
39
+ @exact_memo = {}
40
+ @partial_memo = {}
41
+ end
42
+
43
+ # Split the given text into its most
44
+ # likely constituents.
45
+ #
46
+ def split text
47
+ segment(text, @with_partial).first
48
+ end
49
+
50
+ # Return all splits of a given string.
51
+ #
52
+ def splits text
53
+ l = text.length
54
+ (0..l-1).map do |x|
55
+ [text.slice(0,x), text.slice(x,l)]
56
+ end
57
+ end
58
+
59
+ # Segments the given text recursively.
60
+ #
61
+ def segment text, use_partial = false
62
+ (use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
63
+ tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
64
+
65
+ segments, head_weight = segment head
66
+
67
+ weight = (head_weight && tail_weight &&
68
+ (head_weight + tail_weight) ||
69
+ tail_weight || head_weight)
70
+ if (weight || -1) > (heaviest || 0)
71
+ [tail_weight ? segments + [tail] : segments, weight]
72
+ else
73
+ [current, heaviest]
74
+ end
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -99,16 +99,25 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
99
99
  #
100
100
  def splits_text_on thing
101
101
  raise ArgumentError.new "#{__method__} takes a Regexp or String or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
102
- @splits_text_on = thing
103
- if thing.respond_to? :split
104
- def split text
105
- @splits_text_on.split text
106
- end
102
+ @splits_text_on = if thing.respond_to? :split
103
+ thing
107
104
  else
108
- def split text
109
- text.split @splits_text_on
110
- end
105
+ RegexpWrapper.new thing
106
+ end
107
+ end
108
+ class RegexpWrapper
109
+ def initialize regexp
110
+ @regexp = regexp
111
+ end
112
+ def split text
113
+ text.split @regexp
111
114
  end
115
+ def source
116
+ @regexp.source
117
+ end
118
+ end
119
+ def split text
120
+ @splits_text_on.split text
112
121
  end
113
122
 
114
123
  # Normalizing.
@@ -0,0 +1,98 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe "automatic splitting" do
6
+
7
+ let(:index) do
8
+ index = Picky::Index.new :automatic_text_splitting do
9
+ indexing removes_characters: /[^a-z\s]/i,
10
+ stopwords: /\b(in|a)\b/
11
+ category :text
12
+ end
13
+
14
+ require 'ostruct'
15
+ index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
16
+ index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
17
+ index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
18
+ index.add OpenStruct.new(id: 4, text: 'The color purple.')
19
+ index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
20
+ index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
21
+
22
+ index
23
+ end
24
+
25
+ it 'can split the text automatically' do
26
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text]
27
+
28
+ # It splits the text correctly.
29
+ #
30
+ automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
31
+ automatic_splitter.split('purplerain').should == ['purple', 'rain']
32
+ automatic_splitter.split('purple').should == ['purple']
33
+
34
+ # When it can't, it splits it using the partial index (correctly).
35
+ #
36
+ automatic_splitter.split('purplerainbo').should == ['purple', 'rain']
37
+ automatic_splitter.split('purplerainb').should == ['purple', 'rain']
38
+ #
39
+ automatic_splitter.split('purplerai').should == ['purple']
40
+ automatic_splitter.split('purplera').should == ['purple']
41
+ automatic_splitter.split('purpler').should == ['purple']
42
+ #
43
+ automatic_splitter.split('purpl').should == []
44
+ automatic_splitter.split('purp').should == []
45
+ automatic_splitter.split('pur').should == []
46
+ automatic_splitter.split('pu').should == []
47
+ automatic_splitter.split('p').should == []
48
+ end
49
+
50
+ it 'can split text automatically (with partial)' do
51
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text], partial: true
52
+
53
+ # It splits the text correctly.
54
+ #
55
+ automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
56
+ automatic_splitter.split('purplerain').should == ['purple', 'rain']
57
+ automatic_splitter.split('purple').should == ['purple']
58
+
59
+ # When it can't, it splits it using the partial index (correctly).
60
+ #
61
+ automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo']
62
+ automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b']
63
+ #
64
+ automatic_splitter.split('purplerai').should == ['purple', 'rai']
65
+ automatic_splitter.split('purplera').should == ['purple', 'ra']
66
+ automatic_splitter.split('purpler').should == ['purple'] # No 'r' in partial index.
67
+ #
68
+ automatic_splitter.split('purpl').should == ['purpl']
69
+ automatic_splitter.split('purp').should == ['purp']
70
+ automatic_splitter.split('pur').should == [] # No 'pur' in partial index etc.
71
+ automatic_splitter.split('pu').should == []
72
+ automatic_splitter.split('p').should == []
73
+
74
+ try = Picky::Search.new index do
75
+ searching splits_text_on: automatic_splitter
76
+ end
77
+
78
+ # Should find the one with all parts.
79
+ #
80
+ try.search('purplerainbow').ids.should == [1]
81
+ try.search('sunandrain').ids.should == [5]
82
+
83
+ # Common parts are found in multiple examples.
84
+ #
85
+ try.search('colorpurple').ids.should == [4,1]
86
+ try.search('bownew').ids.should == [3,1]
87
+ try.search('spainisking').ids.should == [6,1]
88
+ end
89
+
90
+ it 'is fast enough' do
91
+ automatic_splitter = Picky::Splitters::Automatic.new index[:text]
92
+
93
+ performance_of do
94
+ automatic_splitter.split('purplerainbow')
95
+ end.should < 0.0002
96
+ end
97
+
98
+ end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::File::Basic do
4
4
 
5
5
  context 'without options' do
6
- let(:basic) { described_class.new 'some/cache/path/to/file' }
6
+ let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'empty' do
9
9
  it 'returns the container that is used for indexing' do
@@ -19,14 +19,14 @@ describe Picky::Backends::File::Basic do
19
19
 
20
20
  describe 'to_s' do
21
21
  it 'returns the cache path with the default file extension' do
22
- basic.to_s.should == 'Picky::Backends::File::Basic(some/cache/path/to/file.file.index,some/cache/path/to/file.file_mapping.index.memory.json)'
22
+ basic.to_s.should == 'Picky::Backends::File::Basic(spec/temp/some/cache/path/to/file.file.index,spec/temp/some/cache/path/to/file.file_mapping.index.memory.json)'
23
23
  end
24
24
  end
25
25
  end
26
26
 
27
27
  context 'with options' do
28
28
  let(:basic) do
29
- described_class.new 'some/cache/path/to/file',
29
+ described_class.new 'spec/temp/some/cache/path/to/file',
30
30
  empty: [],
31
31
  initial: []
32
32
  end
@@ -21,7 +21,7 @@ describe Picky::Backends::File do
21
21
  # ].each do |type, kind|
22
22
  # it "creates and returns a(n) #{type} index" do
23
23
  # @backend.send(:"create_#{type}",
24
- # stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
24
+ # stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
25
25
  # ).should be_kind_of(kind)
26
26
  # end
27
27
  # end
@@ -44,7 +44,7 @@ describe Picky::Backends::File do
44
44
  ].each do |type, kind|
45
45
  it "creates and returns a(n) #{type} index" do
46
46
  @backend.send(:"create_#{type}",
47
- stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
47
+ stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
48
48
  ).should be_kind_of(kind)
49
49
  end
50
50
  end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::Memory::Basic do
4
4
 
5
5
  context 'without options' do
6
- let(:basic) { described_class.new 'some/cache/path/to/file' }
6
+ let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'empty' do
9
9
  it 'returns the container that is used for indexing' do
@@ -19,14 +19,14 @@ describe Picky::Backends::Memory::Basic do
19
19
 
20
20
  describe 'to_s' do
21
21
  it 'returns the cache path with the default file extension' do
22
- basic.to_s.should == 'Picky::Backends::Memory::Basic(some/cache/path/to/file.memory.index)'
22
+ basic.to_s.should == 'Picky::Backends::Memory::Basic(spec/temp/some/cache/path/to/file.memory.index)'
23
23
  end
24
24
  end
25
25
  end
26
26
 
27
27
  context 'with options' do
28
28
  let(:basic) do
29
- described_class.new 'some/cache/path/to/file',
29
+ described_class.new 'spec/temp/some/cache/path/to/file',
30
30
  empty: [],
31
31
  initial: []
32
32
  end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::Memory::JSON do
4
4
 
5
5
  context 'hash-based indexes' do
6
- let(:json) { described_class.new 'some/cache/path/to/file' }
6
+ let(:json) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'extension' do
9
9
  it 'is correct' do
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::JSON do
37
37
 
38
38
  describe 'to_s' do
39
39
  it 'returns the cache path with the default file extension' do
40
- json.to_s.should == 'Picky::Backends::Memory::JSON(some/cache/path/to/file.memory.json)'
40
+ json.to_s.should == 'Picky::Backends::Memory::JSON(spec/temp/some/cache/path/to/file.memory.json)'
41
41
  end
42
42
  end
43
43
  end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Picky::Backends::Memory::Marshal do
4
4
 
5
5
  context 'hash-based indexes' do
6
- let(:marshal) { described_class.new 'some/cache/path/to/file' }
6
+ let(:marshal) { described_class.new 'spec/temp/some/cache/path/to/file' }
7
7
 
8
8
  describe 'extension' do
9
9
  it 'is correct' do
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::Marshal do
37
37
 
38
38
  describe 'to_s' do
39
39
  it 'returns the cache path with the default file extension' do
40
- marshal.to_s.should == 'Picky::Backends::Memory::Marshal(some/cache/path/to/file.memory.dump)'
40
+ marshal.to_s.should == 'Picky::Backends::Memory::Marshal(spec/temp/some/cache/path/to/file.memory.dump)'
41
41
  end
42
42
  end
43
43
  end
@@ -21,7 +21,7 @@ describe Picky::Backends::Memory do
21
21
  # ].each do |type, kind|
22
22
  # it "creates and returns a(n) #{type} index" do
23
23
  # @backend.send(:"create_#{type}",
24
- # stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
24
+ # stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
25
25
  # ).should be_kind_of(kind)
26
26
  # end
27
27
  # end
@@ -47,7 +47,7 @@ describe Picky::Backends::Memory do
47
47
  # ].each do |type, kind|
48
48
  # it "creates and returns a(n) #{type} index" do
49
49
  # to_a_able_stub = Object.new
50
- # to_a_able_stub.stub! :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}"
50
+ # to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
51
51
  # @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
52
52
  # end
53
53
  # end
@@ -70,7 +70,7 @@ describe Picky::Backends::Memory do
70
70
  ].each do |type, kind|
71
71
  it "creates and returns a(n) #{type} index" do
72
72
  @backend.send(:"create_#{type}",
73
- stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
73
+ stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
74
74
  ).should be_kind_of(kind)
75
75
  end
76
76
  end
@@ -5,7 +5,7 @@ require 'sqlite3'
5
5
  describe Picky::Backends::SQLite::Array do
6
6
 
7
7
  context 'hash-based indexes' do
8
- let(:db) { described_class.new 'some/cache/path/to/file' }
8
+ let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
9
9
 
10
10
  describe 'dump' do
11
11
  it 'delegates to the given hash' do
@@ -67,13 +67,13 @@ describe Picky::Backends::SQLite::Array do
67
67
 
68
68
  describe 'to_s' do
69
69
  it 'returns the cache path with the default file extension' do
70
- db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
70
+ db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
71
71
  end
72
72
  end
73
73
  end
74
74
 
75
75
  context 'hash-based indexes' do
76
- let(:db) { described_class.new 'some/cache/path/to/file', realtime: true }
76
+ let(:db) { described_class.new 'spec/temp/some/cache/path/to/file', realtime: true }
77
77
 
78
78
  describe 'dump' do
79
79
  it 'delegates to the given hash' do
@@ -135,7 +135,7 @@ describe Picky::Backends::SQLite::Array do
135
135
 
136
136
  describe 'to_s' do
137
137
  it 'returns the cache path with the default file extension' do
138
- db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
138
+ db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
139
139
  end
140
140
  end
141
141
  end
@@ -5,7 +5,7 @@ require 'sqlite3'
5
5
  describe Picky::Backends::SQLite::Value do
6
6
 
7
7
  context 'hash-based indexes' do
8
- let(:db) { described_class.new 'some/cache/path/to/file' }
8
+ let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
9
9
 
10
10
  describe 'dump' do
11
11
  it 'delegates to the given hash' do
@@ -67,7 +67,7 @@ describe Picky::Backends::SQLite::Value do
67
67
 
68
68
  describe 'to_s' do
69
69
  it 'returns the cache path with the default file extension' do
70
- db.to_s.should == 'Picky::Backends::SQLite::Value(some/cache/path/to/file.sqlite3)'
70
+ db.to_s.should == 'Picky::Backends::SQLite::Value(spec/temp/some/cache/path/to/file.sqlite3)'
71
71
  end
72
72
  end
73
73
  end
@@ -23,7 +23,7 @@ describe Picky::Backends::SQLite do
23
23
  # ].each do |type, kind|
24
24
  # it "creates and returns a(n) #{type} index" do
25
25
  # @backend.send(:"create_#{type}",
26
- # stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
26
+ # stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
27
27
  # ).should be_kind_of(kind)
28
28
  # end
29
29
  # end
@@ -49,7 +49,7 @@ describe Picky::Backends::SQLite do
49
49
  # ].each do |type, kind|
50
50
  # it "creates and returns a(n) #{type} index" do
51
51
  # to_a_able_stub = Object.new
52
- # to_a_able_stub.stub! :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}"
52
+ # to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
53
53
  # @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
54
54
  # end
55
55
  # end
@@ -72,7 +72,7 @@ describe Picky::Backends::SQLite do
72
72
  ].each do |type, kind|
73
73
  it "creates and returns a(n) #{type} index" do
74
74
  @backend.send(:"create_#{type}",
75
- stub(type, :index_path => "spec/test_directory/index/test/some_index/some_category_some_bundle_#{type}")
75
+ stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
76
76
  ).should be_kind_of(kind)
77
77
  end
78
78
  end
@@ -119,7 +119,7 @@ describe Picky::Bundle do
119
119
  it "uses the right file" do
120
120
  MultiJson.stub! :decode
121
121
 
122
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
122
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
123
123
 
124
124
  @bundle.load_inverted
125
125
  end
@@ -128,7 +128,7 @@ describe Picky::Bundle do
128
128
  it "uses the right file" do
129
129
  MultiJson.stub! :decode
130
130
 
131
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
131
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
132
132
 
133
133
  @bundle.load_weights
134
134
  end
@@ -137,7 +137,7 @@ describe Picky::Bundle do
137
137
  it "uses the right file" do
138
138
  Marshal.stub! :load
139
139
 
140
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
140
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
141
141
 
142
142
  @bundle.load_similarity
143
143
  end
@@ -146,7 +146,7 @@ describe Picky::Bundle do
146
146
  it "uses the right file" do
147
147
  MultiJson.stub! :decode
148
148
 
149
- File.should_receive(:open).once.with 'spec/test_directory/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
149
+ File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
150
150
 
151
151
  @bundle.load_configuration
152
152
  end
@@ -22,10 +22,10 @@ describe Picky::Bundle do
22
22
 
23
23
  describe 'index_path' do
24
24
  it 'is correct' do
25
- bundle.index_path(:some_type).should == 'spec/test_directory/index/test/some_index/some_category_some_name_some_type'
25
+ bundle.index_path(:some_type).should == 'spec/temp/index/test/some_index/some_category_some_name_some_type'
26
26
  end
27
27
  it 'is correct' do
28
- bundle.index_path.should == 'spec/test_directory/index/test/some_index/some_category_some_name'
28
+ bundle.index_path.should == 'spec/temp/index/test/some_index/some_category_some_name'
29
29
  end
30
30
  end
31
31
 
@@ -27,7 +27,7 @@ describe Picky::Category do
27
27
  context 'directories' do
28
28
  let(:category) { described_class.new :some_category, index }
29
29
  it 'is correct' do
30
- category.prepared_index_path.should == 'spec/test_directory/index/test/some_index/some_category'
30
+ category.prepared_index_path.should == 'spec/temp/index/test/some_index/some_category'
31
31
  end
32
32
  end
33
33
 
@@ -58,7 +58,7 @@ describe Picky::Index do
58
58
 
59
59
  describe 'directory' do
60
60
  it 'is correct' do
61
- api.directory.should == 'spec/test_directory/index/test/some_index_name'
61
+ api.directory.should == 'spec/temp/index/test/some_index_name'
62
62
  end
63
63
  end
64
64
 
@@ -18,7 +18,7 @@ describe Picky::Loader do
18
18
 
19
19
  describe 'load_application' do
20
20
  it 'does ok' do
21
- Kernel.should_receive(:load).once.with 'spec/test_directory/app.rb'
21
+ Kernel.should_receive(:load).once.with 'spec/temp/app.rb'
22
22
 
23
23
  lambda { described_class.load_application }.should_not raise_error
24
24
  end
@@ -36,13 +36,15 @@ describe Performant::Array do
36
36
  it "should be optimal for 2 small arrays of 50/10_000" do
37
37
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
38
38
 
39
- # brute force
39
+ # Brute force.
40
+ #
40
41
  performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
41
42
  end
42
43
  it "should be optimal for 2 small arrays of 50/10_000" do
43
44
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
44
45
 
45
46
  # &
47
+ #
46
48
  performance_of do
47
49
  arys.inject(arys.shift.dup) do |total, ary|
48
50
  total & arys
@@ -75,13 +77,56 @@ describe Performant::Array do
75
77
  it "should be optimal for 2 small arrays of 50/10_000" do
76
78
  arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a]
77
79
 
78
- # brute force
80
+ # Brute force.
81
+ #
79
82
  performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
80
83
  end
81
84
  it "should be optimal for 2 small arrays of 50/10_000" do
82
85
  arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
83
86
 
84
87
  # &
88
+ #
89
+ performance_of do
90
+ arys.inject(arys.shift.dup) do |total, ary|
91
+ total & arys
92
+ end
93
+ end.should < 0.0015
94
+ end
95
+ end
96
+
97
+ describe "memory_efficient_intersect with strings" do
98
+ it "should intersect empty arrays correctly" do
99
+ arys = [['c','d'], ['a','b','c'], []]
100
+
101
+ Performant::Array.memory_efficient_intersect(arys).should == []
102
+ end
103
+ it "should handle intermediate empty results correctly" do
104
+ arys = [['e','d'], ['a','b','c'], ['c','d','e','h','i']]
105
+
106
+ Performant::Array.memory_efficient_intersect(arys).should == []
107
+ end
108
+ it "should intersect correctly" do
109
+ arys = [['c','d'], ['a','b','c'], ['c','d','e','h','i']]
110
+
111
+ Performant::Array.memory_efficient_intersect(arys).should == ['c']
112
+ end
113
+ it "should intersect many arrays" do
114
+ arys = [['c','d','e','f','g'], ['a','b','c','e','f','g'], ['c','d','e','f','g','h','i'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s']]
115
+
116
+ Performant::Array.memory_efficient_intersect(arys).should == ['c','e','f','g']
117
+ end
118
+ it "should be optimal for 2 small arrays of 50/10_000" do
119
+ arys = [('1'..'50').to_a, ('10000'..'20000').to_a]
120
+
121
+ # Brute force - note that it is slower than the Symbols/Integers version.
122
+ #
123
+ performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
124
+ end
125
+ it "should be optimal for 2 small arrays of 50/10_000" do
126
+ arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]
127
+
128
+ # &
129
+ #
85
130
  performance_of do
86
131
  arys.inject(arys.shift.dup) do |total, ary|
87
132
  total & arys
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.11.3
4
+ version: 4.12.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-08 00:00:00.000000000 Z
12
+ date: 2012-11-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -34,7 +34,7 @@ dependencies:
34
34
  requirements:
35
35
  - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: 4.11.3
37
+ version: 4.12.0
38
38
  type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -42,7 +42,7 @@ dependencies:
42
42
  requirements:
43
43
  - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: 4.11.3
45
+ version: 4.12.0
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: text
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -242,6 +242,7 @@ files:
242
242
  - lib/picky/sinatra/index_actions.rb
243
243
  - lib/picky/sinatra.rb
244
244
  - lib/picky/source.rb
245
+ - lib/picky/splitters/automatic.rb
245
246
  - lib/picky/statistics.rb
246
247
  - lib/picky/tasks.rb
247
248
  - lib/picky/tokenizer.rb
@@ -261,9 +262,8 @@ files:
261
262
  - lib/tasks/try.rake
262
263
  - lib/performant.c
263
264
  - spec/aux/picky/cli_spec.rb
264
- - spec/category_realtime_spec.rb
265
- - spec/ext/performant_spec.rb
266
265
  - spec/functional/allocations_uniq_by_definition_spec.rb
266
+ - spec/functional/automatic_segmentation_spec.rb
267
267
  - spec/functional/backends/file_spec.rb
268
268
  - spec/functional/backends/memory_bundle_realtime_spec.rb
269
269
  - spec/functional/backends/memory_json_utf8_spec.rb
@@ -328,6 +328,7 @@ files:
328
328
  - spec/lib/category/location_spec.rb
329
329
  - spec/lib/category_indexed_spec.rb
330
330
  - spec/lib/category_indexing_spec.rb
331
+ - spec/lib/category_realtime_spec.rb
331
332
  - spec/lib/category_spec.rb
332
333
  - spec/lib/character_substituters/west_european_spec.rb
333
334
  - spec/lib/extensions/array_spec.rb
@@ -398,6 +399,7 @@ files:
398
399
  - spec/lib/statistics_spec.rb
399
400
  - spec/lib/tasks/try_spec.rb
400
401
  - spec/lib/tokenizer_spec.rb
402
+ - spec/performant_spec.rb
401
403
  - bin/picky
402
404
  homepage: http://florianhanke.com/picky
403
405
  licenses: []
@@ -425,9 +427,8 @@ specification_version: 3
425
427
  summary: ! 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
426
428
  test_files:
427
429
  - spec/aux/picky/cli_spec.rb
428
- - spec/category_realtime_spec.rb
429
- - spec/ext/performant_spec.rb
430
430
  - spec/functional/allocations_uniq_by_definition_spec.rb
431
+ - spec/functional/automatic_segmentation_spec.rb
431
432
  - spec/functional/backends/file_spec.rb
432
433
  - spec/functional/backends/memory_bundle_realtime_spec.rb
433
434
  - spec/functional/backends/memory_json_utf8_spec.rb
@@ -492,6 +493,7 @@ test_files:
492
493
  - spec/lib/category/location_spec.rb
493
494
  - spec/lib/category_indexed_spec.rb
494
495
  - spec/lib/category_indexing_spec.rb
496
+ - spec/lib/category_realtime_spec.rb
495
497
  - spec/lib/category_spec.rb
496
498
  - spec/lib/character_substituters/west_european_spec.rb
497
499
  - spec/lib/extensions/array_spec.rb
@@ -562,4 +564,5 @@ test_files:
562
564
  - spec/lib/statistics_spec.rb
563
565
  - spec/lib/tasks/try_spec.rb
564
566
  - spec/lib/tokenizer_spec.rb
567
+ - spec/performant_spec.rb
565
568
  has_rdoc: