picky 4.11.3 → 4.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/performant.c +10 -22
- data/lib/picky/loader.rb +1 -0
- data/lib/picky/splitters/automatic.rb +82 -0
- data/lib/picky/tokenizer.rb +17 -8
- data/spec/functional/automatic_segmentation_spec.rb +98 -0
- data/spec/lib/backends/file/basic_spec.rb +3 -3
- data/spec/lib/backends/file_spec.rb +2 -2
- data/spec/lib/backends/memory/basic_spec.rb +3 -3
- data/spec/lib/backends/memory/json_spec.rb +2 -2
- data/spec/lib/backends/memory/marshal_spec.rb +2 -2
- data/spec/lib/backends/memory_spec.rb +3 -3
- data/spec/lib/backends/sqlite/array_spec.rb +4 -4
- data/spec/lib/backends/sqlite/value_spec.rb +2 -2
- data/spec/lib/backends/sqlite_spec.rb +3 -3
- data/spec/lib/bundle_indexed_spec.rb +4 -4
- data/spec/lib/bundle_spec.rb +2 -2
- data/spec/{category_realtime_spec.rb → lib/category_realtime_spec.rb} +0 -0
- data/spec/lib/category_spec.rb +1 -1
- data/spec/lib/index_spec.rb +1 -1
- data/spec/lib/loader_spec.rb +1 -1
- data/spec/{ext/performant_spec.rb → performant_spec.rb} +47 -2
- metadata +11 -8
data/lib/performant.c
CHANGED
@@ -1,17 +1,5 @@
|
|
1
1
|
#include "ruby.h"
|
2
2
|
|
3
|
-
// Copying internal ruby methods.
|
4
|
-
//
|
5
|
-
static inline VALUE rb_ary_elt(ary, offset)
|
6
|
-
VALUE ary;
|
7
|
-
long offset;
|
8
|
-
{
|
9
|
-
if (RARRAY_LEN(ary) == 0) return Qnil;
|
10
|
-
if (offset < 0 || RARRAY_LEN(ary) <= offset) {
|
11
|
-
return Qnil;
|
12
|
-
}
|
13
|
-
return RARRAY_PTR(ary)[offset];
|
14
|
-
}
|
15
3
|
static inline VALUE ary_make_hash(ary1, ary2)
|
16
4
|
VALUE ary1, ary2;
|
17
5
|
{
|
@@ -19,11 +7,11 @@ static inline VALUE ary_make_hash(ary1, ary2)
|
|
19
7
|
long i;
|
20
8
|
|
21
9
|
for (i=0; i<RARRAY_LEN(ary1); i++) {
|
22
|
-
rb_hash_aset(hash,
|
10
|
+
rb_hash_aset(hash, rb_ary_entry(ary1,i), Qtrue);
|
23
11
|
}
|
24
12
|
if (ary2) {
|
25
13
|
for (i=0; i<RARRAY_LEN(ary2); i++) {
|
26
|
-
rb_hash_aset(hash,
|
14
|
+
rb_hash_aset(hash, rb_ary_entry(ary2, i), Qtrue);
|
27
15
|
}
|
28
16
|
}
|
29
17
|
return hash;
|
@@ -44,19 +32,19 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
44
32
|
|
45
33
|
// Vars.
|
46
34
|
//
|
47
|
-
|
35
|
+
VALUE rb_array_of_arrays;
|
48
36
|
VALUE smallest_array;
|
49
37
|
VALUE current_array;
|
50
38
|
VALUE hash;
|
51
39
|
|
52
40
|
// Temps.
|
53
41
|
//
|
54
|
-
VALUE v
|
42
|
+
VALUE v;
|
55
43
|
|
56
44
|
// Conversions & presorting.
|
57
45
|
//
|
58
|
-
rb_array_of_arrays =
|
59
|
-
smallest_array =
|
46
|
+
rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
|
47
|
+
smallest_array = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
|
60
48
|
|
61
49
|
// Iterate through all arrays.
|
62
50
|
//
|
@@ -77,10 +65,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
77
65
|
|
78
66
|
// Iterate through all array elements.
|
79
67
|
//
|
80
|
-
current_array =
|
68
|
+
current_array = rb_ary_entry(rb_array_of_arrays, i);
|
81
69
|
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
82
|
-
v =
|
83
|
-
if (
|
70
|
+
v = rb_ary_entry(current_array, j);
|
71
|
+
if (rb_hash_delete(hash, v) != Qnil) {
|
84
72
|
rb_ary_push(smallest_array, v);
|
85
73
|
}
|
86
74
|
}
|
@@ -95,4 +83,4 @@ void Init_performant() {
|
|
95
83
|
p_mPerformant = rb_define_module("Performant");
|
96
84
|
p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
|
97
85
|
rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
|
98
|
-
}
|
86
|
+
}
|
data/lib/picky/loader.rb
CHANGED
@@ -268,6 +268,7 @@ module Picky
|
|
268
268
|
load_relative 'tokenizer'
|
269
269
|
# load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
|
270
270
|
load_relative 'character_substituters/west_european'
|
271
|
+
load_relative 'splitters/automatic'
|
271
272
|
load_generators
|
272
273
|
load_inner_api
|
273
274
|
load_results
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Splitters
|
4
|
+
|
5
|
+
# Automatic Splitter.
|
6
|
+
#
|
7
|
+
# Use as a splitter for the splits_text_on option
|
8
|
+
# for Searches. You need to give it an index category
|
9
|
+
# to use for the splitting.
|
10
|
+
#
|
11
|
+
# Example:
|
12
|
+
# Picky::Search.new index do
|
13
|
+
# searching splits_text_on: Picky::Splitters::Automatic.new(index[:name])
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# Will split most queries correctly.
|
17
|
+
# However, has the following problems:
|
18
|
+
# * "cannot" is usually split as ['can', 'not']
|
19
|
+
# * "rainbow" is usually split as ['rain', 'bow']
|
20
|
+
#
|
21
|
+
# Reference: http://norvig.com/ngrams/ch14.pdf.
|
22
|
+
#
|
23
|
+
# Adapted from a script submitted
|
24
|
+
# by Andy Kitchen.
|
25
|
+
#
|
26
|
+
class Automatic
|
27
|
+
|
28
|
+
def initialize category, options = {}
|
29
|
+
@exact = category.exact
|
30
|
+
@partial = category.partial
|
31
|
+
@with_partial = options[:partial]
|
32
|
+
|
33
|
+
reset_memoization
|
34
|
+
end
|
35
|
+
|
36
|
+
# Reset the memoization.
|
37
|
+
#
|
38
|
+
def reset_memoization
|
39
|
+
@exact_memo = {}
|
40
|
+
@partial_memo = {}
|
41
|
+
end
|
42
|
+
|
43
|
+
# Split the given text into its most
|
44
|
+
# likely constituents.
|
45
|
+
#
|
46
|
+
def split text
|
47
|
+
segment(text, @with_partial).first
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return all splits of a given string.
|
51
|
+
#
|
52
|
+
def splits text
|
53
|
+
l = text.length
|
54
|
+
(0..l-1).map do |x|
|
55
|
+
[text.slice(0,x), text.slice(x,l)]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Segments the given text recursively.
|
60
|
+
#
|
61
|
+
def segment text, use_partial = false
|
62
|
+
(use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
|
63
|
+
tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
|
64
|
+
|
65
|
+
segments, head_weight = segment head
|
66
|
+
|
67
|
+
weight = (head_weight && tail_weight &&
|
68
|
+
(head_weight + tail_weight) ||
|
69
|
+
tail_weight || head_weight)
|
70
|
+
if (weight || -1) > (heaviest || 0)
|
71
|
+
[tail_weight ? segments + [tail] : segments, weight]
|
72
|
+
else
|
73
|
+
[current, heaviest]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -99,16 +99,25 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
99
99
|
#
|
100
100
|
def splits_text_on thing
|
101
101
|
raise ArgumentError.new "#{__method__} takes a Regexp or String or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
|
102
|
-
@splits_text_on = thing
|
103
|
-
|
104
|
-
def split text
|
105
|
-
@splits_text_on.split text
|
106
|
-
end
|
102
|
+
@splits_text_on = if thing.respond_to? :split
|
103
|
+
thing
|
107
104
|
else
|
108
|
-
|
109
|
-
|
110
|
-
|
105
|
+
RegexpWrapper.new thing
|
106
|
+
end
|
107
|
+
end
|
108
|
+
class RegexpWrapper
|
109
|
+
def initialize regexp
|
110
|
+
@regexp = regexp
|
111
|
+
end
|
112
|
+
def split text
|
113
|
+
text.split @regexp
|
111
114
|
end
|
115
|
+
def source
|
116
|
+
@regexp.source
|
117
|
+
end
|
118
|
+
end
|
119
|
+
def split text
|
120
|
+
@splits_text_on.split text
|
112
121
|
end
|
113
122
|
|
114
123
|
# Normalizing.
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "automatic splitting" do
|
6
|
+
|
7
|
+
let(:index) do
|
8
|
+
index = Picky::Index.new :automatic_text_splitting do
|
9
|
+
indexing removes_characters: /[^a-z\s]/i,
|
10
|
+
stopwords: /\b(in|a)\b/
|
11
|
+
category :text
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'ostruct'
|
15
|
+
index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
|
16
|
+
index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
|
17
|
+
index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
|
18
|
+
index.add OpenStruct.new(id: 4, text: 'The color purple.')
|
19
|
+
index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
|
20
|
+
index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
|
21
|
+
|
22
|
+
index
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'can split the text automatically' do
|
26
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text]
|
27
|
+
|
28
|
+
# It splits the text correctly.
|
29
|
+
#
|
30
|
+
automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
|
31
|
+
automatic_splitter.split('purplerain').should == ['purple', 'rain']
|
32
|
+
automatic_splitter.split('purple').should == ['purple']
|
33
|
+
|
34
|
+
# When it can't, it splits it using the partial index (correctly).
|
35
|
+
#
|
36
|
+
automatic_splitter.split('purplerainbo').should == ['purple', 'rain']
|
37
|
+
automatic_splitter.split('purplerainb').should == ['purple', 'rain']
|
38
|
+
#
|
39
|
+
automatic_splitter.split('purplerai').should == ['purple']
|
40
|
+
automatic_splitter.split('purplera').should == ['purple']
|
41
|
+
automatic_splitter.split('purpler').should == ['purple']
|
42
|
+
#
|
43
|
+
automatic_splitter.split('purpl').should == []
|
44
|
+
automatic_splitter.split('purp').should == []
|
45
|
+
automatic_splitter.split('pur').should == []
|
46
|
+
automatic_splitter.split('pu').should == []
|
47
|
+
automatic_splitter.split('p').should == []
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'can split text automatically (with partial)' do
|
51
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text], partial: true
|
52
|
+
|
53
|
+
# It splits the text correctly.
|
54
|
+
#
|
55
|
+
automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
|
56
|
+
automatic_splitter.split('purplerain').should == ['purple', 'rain']
|
57
|
+
automatic_splitter.split('purple').should == ['purple']
|
58
|
+
|
59
|
+
# When it can't, it splits it using the partial index (correctly).
|
60
|
+
#
|
61
|
+
automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo']
|
62
|
+
automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b']
|
63
|
+
#
|
64
|
+
automatic_splitter.split('purplerai').should == ['purple', 'rai']
|
65
|
+
automatic_splitter.split('purplera').should == ['purple', 'ra']
|
66
|
+
automatic_splitter.split('purpler').should == ['purple'] # No 'r' in partial index.
|
67
|
+
#
|
68
|
+
automatic_splitter.split('purpl').should == ['purpl']
|
69
|
+
automatic_splitter.split('purp').should == ['purp']
|
70
|
+
automatic_splitter.split('pur').should == [] # No 'pur' in partial index etc.
|
71
|
+
automatic_splitter.split('pu').should == []
|
72
|
+
automatic_splitter.split('p').should == []
|
73
|
+
|
74
|
+
try = Picky::Search.new index do
|
75
|
+
searching splits_text_on: automatic_splitter
|
76
|
+
end
|
77
|
+
|
78
|
+
# Should find the one with all parts.
|
79
|
+
#
|
80
|
+
try.search('purplerainbow').ids.should == [1]
|
81
|
+
try.search('sunandrain').ids.should == [5]
|
82
|
+
|
83
|
+
# Common parts are found in multiple examples.
|
84
|
+
#
|
85
|
+
try.search('colorpurple').ids.should == [4,1]
|
86
|
+
try.search('bownew').ids.should == [3,1]
|
87
|
+
try.search('spainisking').ids.should == [6,1]
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'is fast enough' do
|
91
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text]
|
92
|
+
|
93
|
+
performance_of do
|
94
|
+
automatic_splitter.split('purplerainbow')
|
95
|
+
end.should < 0.0002
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::File::Basic do
|
4
4
|
|
5
5
|
context 'without options' do
|
6
|
-
let(:basic) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'empty' do
|
9
9
|
it 'returns the container that is used for indexing' do
|
@@ -19,14 +19,14 @@ describe Picky::Backends::File::Basic do
|
|
19
19
|
|
20
20
|
describe 'to_s' do
|
21
21
|
it 'returns the cache path with the default file extension' do
|
22
|
-
basic.to_s.should == 'Picky::Backends::File::Basic(some/cache/path/to/file.file.index,some/cache/path/to/file.file_mapping.index.memory.json)'
|
22
|
+
basic.to_s.should == 'Picky::Backends::File::Basic(spec/temp/some/cache/path/to/file.file.index,spec/temp/some/cache/path/to/file.file_mapping.index.memory.json)'
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
context 'with options' do
|
28
28
|
let(:basic) do
|
29
|
-
described_class.new 'some/cache/path/to/file',
|
29
|
+
described_class.new 'spec/temp/some/cache/path/to/file',
|
30
30
|
empty: [],
|
31
31
|
initial: []
|
32
32
|
end
|
@@ -21,7 +21,7 @@ describe Picky::Backends::File do
|
|
21
21
|
# ].each do |type, kind|
|
22
22
|
# it "creates and returns a(n) #{type} index" do
|
23
23
|
# @backend.send(:"create_#{type}",
|
24
|
-
# stub(type, :index_path => "spec/
|
24
|
+
# stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
25
25
|
# ).should be_kind_of(kind)
|
26
26
|
# end
|
27
27
|
# end
|
@@ -44,7 +44,7 @@ describe Picky::Backends::File do
|
|
44
44
|
].each do |type, kind|
|
45
45
|
it "creates and returns a(n) #{type} index" do
|
46
46
|
@backend.send(:"create_#{type}",
|
47
|
-
stub(type, :index_path => "spec/
|
47
|
+
stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
48
48
|
).should be_kind_of(kind)
|
49
49
|
end
|
50
50
|
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::Memory::Basic do
|
4
4
|
|
5
5
|
context 'without options' do
|
6
|
-
let(:basic) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'empty' do
|
9
9
|
it 'returns the container that is used for indexing' do
|
@@ -19,14 +19,14 @@ describe Picky::Backends::Memory::Basic do
|
|
19
19
|
|
20
20
|
describe 'to_s' do
|
21
21
|
it 'returns the cache path with the default file extension' do
|
22
|
-
basic.to_s.should == 'Picky::Backends::Memory::Basic(some/cache/path/to/file.memory.index)'
|
22
|
+
basic.to_s.should == 'Picky::Backends::Memory::Basic(spec/temp/some/cache/path/to/file.memory.index)'
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
context 'with options' do
|
28
28
|
let(:basic) do
|
29
|
-
described_class.new 'some/cache/path/to/file',
|
29
|
+
described_class.new 'spec/temp/some/cache/path/to/file',
|
30
30
|
empty: [],
|
31
31
|
initial: []
|
32
32
|
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::Memory::JSON do
|
4
4
|
|
5
5
|
context 'hash-based indexes' do
|
6
|
-
let(:json) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:json) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'extension' do
|
9
9
|
it 'is correct' do
|
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::JSON do
|
|
37
37
|
|
38
38
|
describe 'to_s' do
|
39
39
|
it 'returns the cache path with the default file extension' do
|
40
|
-
json.to_s.should == 'Picky::Backends::Memory::JSON(some/cache/path/to/file.memory.json)'
|
40
|
+
json.to_s.should == 'Picky::Backends::Memory::JSON(spec/temp/some/cache/path/to/file.memory.json)'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::Memory::Marshal do
|
4
4
|
|
5
5
|
context 'hash-based indexes' do
|
6
|
-
let(:marshal) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:marshal) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'extension' do
|
9
9
|
it 'is correct' do
|
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::Marshal do
|
|
37
37
|
|
38
38
|
describe 'to_s' do
|
39
39
|
it 'returns the cache path with the default file extension' do
|
40
|
-
marshal.to_s.should == 'Picky::Backends::Memory::Marshal(some/cache/path/to/file.memory.dump)'
|
40
|
+
marshal.to_s.should == 'Picky::Backends::Memory::Marshal(spec/temp/some/cache/path/to/file.memory.dump)'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -21,7 +21,7 @@ describe Picky::Backends::Memory do
|
|
21
21
|
# ].each do |type, kind|
|
22
22
|
# it "creates and returns a(n) #{type} index" do
|
23
23
|
# @backend.send(:"create_#{type}",
|
24
|
-
# stub(type, :index_path => "spec/
|
24
|
+
# stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
25
25
|
# ).should be_kind_of(kind)
|
26
26
|
# end
|
27
27
|
# end
|
@@ -47,7 +47,7 @@ describe Picky::Backends::Memory do
|
|
47
47
|
# ].each do |type, kind|
|
48
48
|
# it "creates and returns a(n) #{type} index" do
|
49
49
|
# to_a_able_stub = Object.new
|
50
|
-
# to_a_able_stub.stub! :index_path => "spec/
|
50
|
+
# to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
|
51
51
|
# @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
|
52
52
|
# end
|
53
53
|
# end
|
@@ -70,7 +70,7 @@ describe Picky::Backends::Memory do
|
|
70
70
|
].each do |type, kind|
|
71
71
|
it "creates and returns a(n) #{type} index" do
|
72
72
|
@backend.send(:"create_#{type}",
|
73
|
-
stub(type, :index_path => "spec/
|
73
|
+
stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
74
74
|
).should be_kind_of(kind)
|
75
75
|
end
|
76
76
|
end
|
@@ -5,7 +5,7 @@ require 'sqlite3'
|
|
5
5
|
describe Picky::Backends::SQLite::Array do
|
6
6
|
|
7
7
|
context 'hash-based indexes' do
|
8
|
-
let(:db) { described_class.new 'some/cache/path/to/file' }
|
8
|
+
let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
9
9
|
|
10
10
|
describe 'dump' do
|
11
11
|
it 'delegates to the given hash' do
|
@@ -67,13 +67,13 @@ describe Picky::Backends::SQLite::Array do
|
|
67
67
|
|
68
68
|
describe 'to_s' do
|
69
69
|
it 'returns the cache path with the default file extension' do
|
70
|
-
db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
|
70
|
+
db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
75
|
context 'hash-based indexes' do
|
76
|
-
let(:db) { described_class.new 'some/cache/path/to/file', realtime: true }
|
76
|
+
let(:db) { described_class.new 'spec/temp/some/cache/path/to/file', realtime: true }
|
77
77
|
|
78
78
|
describe 'dump' do
|
79
79
|
it 'delegates to the given hash' do
|
@@ -135,7 +135,7 @@ describe Picky::Backends::SQLite::Array do
|
|
135
135
|
|
136
136
|
describe 'to_s' do
|
137
137
|
it 'returns the cache path with the default file extension' do
|
138
|
-
db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
|
138
|
+
db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
|
139
139
|
end
|
140
140
|
end
|
141
141
|
end
|
@@ -5,7 +5,7 @@ require 'sqlite3'
|
|
5
5
|
describe Picky::Backends::SQLite::Value do
|
6
6
|
|
7
7
|
context 'hash-based indexes' do
|
8
|
-
let(:db) { described_class.new 'some/cache/path/to/file' }
|
8
|
+
let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
9
9
|
|
10
10
|
describe 'dump' do
|
11
11
|
it 'delegates to the given hash' do
|
@@ -67,7 +67,7 @@ describe Picky::Backends::SQLite::Value do
|
|
67
67
|
|
68
68
|
describe 'to_s' do
|
69
69
|
it 'returns the cache path with the default file extension' do
|
70
|
-
db.to_s.should == 'Picky::Backends::SQLite::Value(some/cache/path/to/file.sqlite3)'
|
70
|
+
db.to_s.should == 'Picky::Backends::SQLite::Value(spec/temp/some/cache/path/to/file.sqlite3)'
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
@@ -23,7 +23,7 @@ describe Picky::Backends::SQLite do
|
|
23
23
|
# ].each do |type, kind|
|
24
24
|
# it "creates and returns a(n) #{type} index" do
|
25
25
|
# @backend.send(:"create_#{type}",
|
26
|
-
# stub(type, :index_path => "spec/
|
26
|
+
# stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
27
27
|
# ).should be_kind_of(kind)
|
28
28
|
# end
|
29
29
|
# end
|
@@ -49,7 +49,7 @@ describe Picky::Backends::SQLite do
|
|
49
49
|
# ].each do |type, kind|
|
50
50
|
# it "creates and returns a(n) #{type} index" do
|
51
51
|
# to_a_able_stub = Object.new
|
52
|
-
# to_a_able_stub.stub! :index_path => "spec/
|
52
|
+
# to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
|
53
53
|
# @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
|
54
54
|
# end
|
55
55
|
# end
|
@@ -72,7 +72,7 @@ describe Picky::Backends::SQLite do
|
|
72
72
|
].each do |type, kind|
|
73
73
|
it "creates and returns a(n) #{type} index" do
|
74
74
|
@backend.send(:"create_#{type}",
|
75
|
-
stub(type, :index_path => "spec/
|
75
|
+
stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
76
76
|
).should be_kind_of(kind)
|
77
77
|
end
|
78
78
|
end
|
@@ -119,7 +119,7 @@ describe Picky::Bundle do
|
|
119
119
|
it "uses the right file" do
|
120
120
|
MultiJson.stub! :decode
|
121
121
|
|
122
|
-
File.should_receive(:open).once.with 'spec/
|
122
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
|
123
123
|
|
124
124
|
@bundle.load_inverted
|
125
125
|
end
|
@@ -128,7 +128,7 @@ describe Picky::Bundle do
|
|
128
128
|
it "uses the right file" do
|
129
129
|
MultiJson.stub! :decode
|
130
130
|
|
131
|
-
File.should_receive(:open).once.with 'spec/
|
131
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
|
132
132
|
|
133
133
|
@bundle.load_weights
|
134
134
|
end
|
@@ -137,7 +137,7 @@ describe Picky::Bundle do
|
|
137
137
|
it "uses the right file" do
|
138
138
|
Marshal.stub! :load
|
139
139
|
|
140
|
-
File.should_receive(:open).once.with 'spec/
|
140
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
|
141
141
|
|
142
142
|
@bundle.load_similarity
|
143
143
|
end
|
@@ -146,7 +146,7 @@ describe Picky::Bundle do
|
|
146
146
|
it "uses the right file" do
|
147
147
|
MultiJson.stub! :decode
|
148
148
|
|
149
|
-
File.should_receive(:open).once.with 'spec/
|
149
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
|
150
150
|
|
151
151
|
@bundle.load_configuration
|
152
152
|
end
|
data/spec/lib/bundle_spec.rb
CHANGED
@@ -22,10 +22,10 @@ describe Picky::Bundle do
|
|
22
22
|
|
23
23
|
describe 'index_path' do
|
24
24
|
it 'is correct' do
|
25
|
-
bundle.index_path(:some_type).should == 'spec/
|
25
|
+
bundle.index_path(:some_type).should == 'spec/temp/index/test/some_index/some_category_some_name_some_type'
|
26
26
|
end
|
27
27
|
it 'is correct' do
|
28
|
-
bundle.index_path.should == 'spec/
|
28
|
+
bundle.index_path.should == 'spec/temp/index/test/some_index/some_category_some_name'
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
File without changes
|
data/spec/lib/category_spec.rb
CHANGED
@@ -27,7 +27,7 @@ describe Picky::Category do
|
|
27
27
|
context 'directories' do
|
28
28
|
let(:category) { described_class.new :some_category, index }
|
29
29
|
it 'is correct' do
|
30
|
-
category.prepared_index_path.should == 'spec/
|
30
|
+
category.prepared_index_path.should == 'spec/temp/index/test/some_index/some_category'
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
data/spec/lib/index_spec.rb
CHANGED
data/spec/lib/loader_spec.rb
CHANGED
@@ -18,7 +18,7 @@ describe Picky::Loader do
|
|
18
18
|
|
19
19
|
describe 'load_application' do
|
20
20
|
it 'does ok' do
|
21
|
-
Kernel.should_receive(:load).once.with 'spec/
|
21
|
+
Kernel.should_receive(:load).once.with 'spec/temp/app.rb'
|
22
22
|
|
23
23
|
lambda { described_class.load_application }.should_not raise_error
|
24
24
|
end
|
@@ -36,13 +36,15 @@ describe Performant::Array do
|
|
36
36
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
37
37
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
38
38
|
|
39
|
-
#
|
39
|
+
# Brute force.
|
40
|
+
#
|
40
41
|
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
41
42
|
end
|
42
43
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
43
44
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
44
45
|
|
45
46
|
# &
|
47
|
+
#
|
46
48
|
performance_of do
|
47
49
|
arys.inject(arys.shift.dup) do |total, ary|
|
48
50
|
total & arys
|
@@ -75,13 +77,56 @@ describe Performant::Array do
|
|
75
77
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
76
78
|
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a]
|
77
79
|
|
78
|
-
#
|
80
|
+
# Brute force.
|
81
|
+
#
|
79
82
|
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
80
83
|
end
|
81
84
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
82
85
|
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
83
86
|
|
84
87
|
# &
|
88
|
+
#
|
89
|
+
performance_of do
|
90
|
+
arys.inject(arys.shift.dup) do |total, ary|
|
91
|
+
total & arys
|
92
|
+
end
|
93
|
+
end.should < 0.0015
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "memory_efficient_intersect with strings" do
|
98
|
+
it "should intersect empty arrays correctly" do
|
99
|
+
arys = [['c','d'], ['a','b','c'], []]
|
100
|
+
|
101
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
102
|
+
end
|
103
|
+
it "should handle intermediate empty results correctly" do
|
104
|
+
arys = [['e','d'], ['a','b','c'], ['c','d','e','h','i']]
|
105
|
+
|
106
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
107
|
+
end
|
108
|
+
it "should intersect correctly" do
|
109
|
+
arys = [['c','d'], ['a','b','c'], ['c','d','e','h','i']]
|
110
|
+
|
111
|
+
Performant::Array.memory_efficient_intersect(arys).should == ['c']
|
112
|
+
end
|
113
|
+
it "should intersect many arrays" do
|
114
|
+
arys = [['c','d','e','f','g'], ['a','b','c','e','f','g'], ['c','d','e','f','g','h','i'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s']]
|
115
|
+
|
116
|
+
Performant::Array.memory_efficient_intersect(arys).should == ['c','e','f','g']
|
117
|
+
end
|
118
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
119
|
+
arys = [('1'..'50').to_a, ('10000'..'20000').to_a]
|
120
|
+
|
121
|
+
# Brute force - note that it is slower than the Symbols/Integers version.
|
122
|
+
#
|
123
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
|
124
|
+
end
|
125
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
126
|
+
arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]
|
127
|
+
|
128
|
+
# &
|
129
|
+
#
|
85
130
|
performance_of do
|
86
131
|
arys.inject(arys.shift.dup) do |total, ary|
|
87
132
|
total & arys
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.12.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
requirements:
|
35
35
|
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 4.
|
37
|
+
version: 4.12.0
|
38
38
|
type: :development
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -42,7 +42,7 @@ dependencies:
|
|
42
42
|
requirements:
|
43
43
|
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 4.
|
45
|
+
version: 4.12.0
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: text
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -242,6 +242,7 @@ files:
|
|
242
242
|
- lib/picky/sinatra/index_actions.rb
|
243
243
|
- lib/picky/sinatra.rb
|
244
244
|
- lib/picky/source.rb
|
245
|
+
- lib/picky/splitters/automatic.rb
|
245
246
|
- lib/picky/statistics.rb
|
246
247
|
- lib/picky/tasks.rb
|
247
248
|
- lib/picky/tokenizer.rb
|
@@ -261,9 +262,8 @@ files:
|
|
261
262
|
- lib/tasks/try.rake
|
262
263
|
- lib/performant.c
|
263
264
|
- spec/aux/picky/cli_spec.rb
|
264
|
-
- spec/category_realtime_spec.rb
|
265
|
-
- spec/ext/performant_spec.rb
|
266
265
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
266
|
+
- spec/functional/automatic_segmentation_spec.rb
|
267
267
|
- spec/functional/backends/file_spec.rb
|
268
268
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
269
269
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
@@ -328,6 +328,7 @@ files:
|
|
328
328
|
- spec/lib/category/location_spec.rb
|
329
329
|
- spec/lib/category_indexed_spec.rb
|
330
330
|
- spec/lib/category_indexing_spec.rb
|
331
|
+
- spec/lib/category_realtime_spec.rb
|
331
332
|
- spec/lib/category_spec.rb
|
332
333
|
- spec/lib/character_substituters/west_european_spec.rb
|
333
334
|
- spec/lib/extensions/array_spec.rb
|
@@ -398,6 +399,7 @@ files:
|
|
398
399
|
- spec/lib/statistics_spec.rb
|
399
400
|
- spec/lib/tasks/try_spec.rb
|
400
401
|
- spec/lib/tokenizer_spec.rb
|
402
|
+
- spec/performant_spec.rb
|
401
403
|
- bin/picky
|
402
404
|
homepage: http://florianhanke.com/picky
|
403
405
|
licenses: []
|
@@ -425,9 +427,8 @@ specification_version: 3
|
|
425
427
|
summary: ! 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
|
426
428
|
test_files:
|
427
429
|
- spec/aux/picky/cli_spec.rb
|
428
|
-
- spec/category_realtime_spec.rb
|
429
|
-
- spec/ext/performant_spec.rb
|
430
430
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
431
|
+
- spec/functional/automatic_segmentation_spec.rb
|
431
432
|
- spec/functional/backends/file_spec.rb
|
432
433
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
433
434
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
@@ -492,6 +493,7 @@ test_files:
|
|
492
493
|
- spec/lib/category/location_spec.rb
|
493
494
|
- spec/lib/category_indexed_spec.rb
|
494
495
|
- spec/lib/category_indexing_spec.rb
|
496
|
+
- spec/lib/category_realtime_spec.rb
|
495
497
|
- spec/lib/category_spec.rb
|
496
498
|
- spec/lib/character_substituters/west_european_spec.rb
|
497
499
|
- spec/lib/extensions/array_spec.rb
|
@@ -562,4 +564,5 @@ test_files:
|
|
562
564
|
- spec/lib/statistics_spec.rb
|
563
565
|
- spec/lib/tasks/try_spec.rb
|
564
566
|
- spec/lib/tokenizer_spec.rb
|
567
|
+
- spec/performant_spec.rb
|
565
568
|
has_rdoc:
|