picky 4.11.3 → 4.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/performant.c +10 -22
- data/lib/picky/loader.rb +1 -0
- data/lib/picky/splitters/automatic.rb +82 -0
- data/lib/picky/tokenizer.rb +17 -8
- data/spec/functional/automatic_segmentation_spec.rb +98 -0
- data/spec/lib/backends/file/basic_spec.rb +3 -3
- data/spec/lib/backends/file_spec.rb +2 -2
- data/spec/lib/backends/memory/basic_spec.rb +3 -3
- data/spec/lib/backends/memory/json_spec.rb +2 -2
- data/spec/lib/backends/memory/marshal_spec.rb +2 -2
- data/spec/lib/backends/memory_spec.rb +3 -3
- data/spec/lib/backends/sqlite/array_spec.rb +4 -4
- data/spec/lib/backends/sqlite/value_spec.rb +2 -2
- data/spec/lib/backends/sqlite_spec.rb +3 -3
- data/spec/lib/bundle_indexed_spec.rb +4 -4
- data/spec/lib/bundle_spec.rb +2 -2
- data/spec/{category_realtime_spec.rb → lib/category_realtime_spec.rb} +0 -0
- data/spec/lib/category_spec.rb +1 -1
- data/spec/lib/index_spec.rb +1 -1
- data/spec/lib/loader_spec.rb +1 -1
- data/spec/{ext/performant_spec.rb → performant_spec.rb} +47 -2
- metadata +11 -8
data/lib/performant.c
CHANGED
@@ -1,17 +1,5 @@
|
|
1
1
|
#include "ruby.h"
|
2
2
|
|
3
|
-
// Copying internal ruby methods.
|
4
|
-
//
|
5
|
-
static inline VALUE rb_ary_elt(ary, offset)
|
6
|
-
VALUE ary;
|
7
|
-
long offset;
|
8
|
-
{
|
9
|
-
if (RARRAY_LEN(ary) == 0) return Qnil;
|
10
|
-
if (offset < 0 || RARRAY_LEN(ary) <= offset) {
|
11
|
-
return Qnil;
|
12
|
-
}
|
13
|
-
return RARRAY_PTR(ary)[offset];
|
14
|
-
}
|
15
3
|
static inline VALUE ary_make_hash(ary1, ary2)
|
16
4
|
VALUE ary1, ary2;
|
17
5
|
{
|
@@ -19,11 +7,11 @@ static inline VALUE ary_make_hash(ary1, ary2)
|
|
19
7
|
long i;
|
20
8
|
|
21
9
|
for (i=0; i<RARRAY_LEN(ary1); i++) {
|
22
|
-
rb_hash_aset(hash,
|
10
|
+
rb_hash_aset(hash, rb_ary_entry(ary1,i), Qtrue);
|
23
11
|
}
|
24
12
|
if (ary2) {
|
25
13
|
for (i=0; i<RARRAY_LEN(ary2); i++) {
|
26
|
-
rb_hash_aset(hash,
|
14
|
+
rb_hash_aset(hash, rb_ary_entry(ary2, i), Qtrue);
|
27
15
|
}
|
28
16
|
}
|
29
17
|
return hash;
|
@@ -44,19 +32,19 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
44
32
|
|
45
33
|
// Vars.
|
46
34
|
//
|
47
|
-
|
35
|
+
VALUE rb_array_of_arrays;
|
48
36
|
VALUE smallest_array;
|
49
37
|
VALUE current_array;
|
50
38
|
VALUE hash;
|
51
39
|
|
52
40
|
// Temps.
|
53
41
|
//
|
54
|
-
VALUE v
|
42
|
+
VALUE v;
|
55
43
|
|
56
44
|
// Conversions & presorting.
|
57
45
|
//
|
58
|
-
rb_array_of_arrays =
|
59
|
-
smallest_array =
|
46
|
+
rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
|
47
|
+
smallest_array = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
|
60
48
|
|
61
49
|
// Iterate through all arrays.
|
62
50
|
//
|
@@ -77,10 +65,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
77
65
|
|
78
66
|
// Iterate through all array elements.
|
79
67
|
//
|
80
|
-
current_array =
|
68
|
+
current_array = rb_ary_entry(rb_array_of_arrays, i);
|
81
69
|
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
82
|
-
v =
|
83
|
-
if (
|
70
|
+
v = rb_ary_entry(current_array, j);
|
71
|
+
if (rb_hash_delete(hash, v) != Qnil) {
|
84
72
|
rb_ary_push(smallest_array, v);
|
85
73
|
}
|
86
74
|
}
|
@@ -95,4 +83,4 @@ void Init_performant() {
|
|
95
83
|
p_mPerformant = rb_define_module("Performant");
|
96
84
|
p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
|
97
85
|
rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
|
98
|
-
}
|
86
|
+
}
|
data/lib/picky/loader.rb
CHANGED
@@ -268,6 +268,7 @@ module Picky
|
|
268
268
|
load_relative 'tokenizer'
|
269
269
|
# load_relative 'rack/harakiri' # Needs to be explicitly loaded/required.
|
270
270
|
load_relative 'character_substituters/west_european'
|
271
|
+
load_relative 'splitters/automatic'
|
271
272
|
load_generators
|
272
273
|
load_inner_api
|
273
274
|
load_results
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Splitters
|
4
|
+
|
5
|
+
# Automatic Splitter.
|
6
|
+
#
|
7
|
+
# Use as a splitter for the splits_text_on option
|
8
|
+
# for Searches. You need to give it an index category
|
9
|
+
# to use for the splitting.
|
10
|
+
#
|
11
|
+
# Example:
|
12
|
+
# Picky::Search.new index do
|
13
|
+
# searching splits_text_on: Picky::Splitters::Automatic.new(index[:name])
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# Will split most queries correctly.
|
17
|
+
# However, has the following problems:
|
18
|
+
# * "cannot" is usually split as ['can', 'not']
|
19
|
+
# * "rainbow" is usually split as ['rain', 'bow']
|
20
|
+
#
|
21
|
+
# Reference: http://norvig.com/ngrams/ch14.pdf.
|
22
|
+
#
|
23
|
+
# Adapted from a script submitted
|
24
|
+
# by Andy Kitchen.
|
25
|
+
#
|
26
|
+
class Automatic
|
27
|
+
|
28
|
+
def initialize category, options = {}
|
29
|
+
@exact = category.exact
|
30
|
+
@partial = category.partial
|
31
|
+
@with_partial = options[:partial]
|
32
|
+
|
33
|
+
reset_memoization
|
34
|
+
end
|
35
|
+
|
36
|
+
# Reset the memoization.
|
37
|
+
#
|
38
|
+
def reset_memoization
|
39
|
+
@exact_memo = {}
|
40
|
+
@partial_memo = {}
|
41
|
+
end
|
42
|
+
|
43
|
+
# Split the given text into its most
|
44
|
+
# likely constituents.
|
45
|
+
#
|
46
|
+
def split text
|
47
|
+
segment(text, @with_partial).first
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return all splits of a given string.
|
51
|
+
#
|
52
|
+
def splits text
|
53
|
+
l = text.length
|
54
|
+
(0..l-1).map do |x|
|
55
|
+
[text.slice(0,x), text.slice(x,l)]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Segments the given text recursively.
|
60
|
+
#
|
61
|
+
def segment text, use_partial = false
|
62
|
+
(use_partial ? @partial_memo : @exact_memo)[text] ||= splits(text).inject([[], nil]) do |(current, heaviest), (head, tail)|
|
63
|
+
tail_weight = use_partial ? @partial.weight(tail) : @exact.weight(tail)
|
64
|
+
|
65
|
+
segments, head_weight = segment head
|
66
|
+
|
67
|
+
weight = (head_weight && tail_weight &&
|
68
|
+
(head_weight + tail_weight) ||
|
69
|
+
tail_weight || head_weight)
|
70
|
+
if (weight || -1) > (heaviest || 0)
|
71
|
+
[tail_weight ? segments + [tail] : segments, weight]
|
72
|
+
else
|
73
|
+
[current, heaviest]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -99,16 +99,25 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
99
99
|
#
|
100
100
|
def splits_text_on thing
|
101
101
|
raise ArgumentError.new "#{__method__} takes a Regexp or String or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
|
102
|
-
@splits_text_on = thing
|
103
|
-
|
104
|
-
def split text
|
105
|
-
@splits_text_on.split text
|
106
|
-
end
|
102
|
+
@splits_text_on = if thing.respond_to? :split
|
103
|
+
thing
|
107
104
|
else
|
108
|
-
|
109
|
-
|
110
|
-
|
105
|
+
RegexpWrapper.new thing
|
106
|
+
end
|
107
|
+
end
|
108
|
+
class RegexpWrapper
|
109
|
+
def initialize regexp
|
110
|
+
@regexp = regexp
|
111
|
+
end
|
112
|
+
def split text
|
113
|
+
text.split @regexp
|
111
114
|
end
|
115
|
+
def source
|
116
|
+
@regexp.source
|
117
|
+
end
|
118
|
+
end
|
119
|
+
def split text
|
120
|
+
@splits_text_on.split text
|
112
121
|
end
|
113
122
|
|
114
123
|
# Normalizing.
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "automatic splitting" do
|
6
|
+
|
7
|
+
let(:index) do
|
8
|
+
index = Picky::Index.new :automatic_text_splitting do
|
9
|
+
indexing removes_characters: /[^a-z\s]/i,
|
10
|
+
stopwords: /\b(in|a)\b/
|
11
|
+
category :text
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'ostruct'
|
15
|
+
index.add OpenStruct.new(id: 1, text: 'It does rain in Spain. Purple is a new color. Bow to the king.')
|
16
|
+
index.add OpenStruct.new(id: 2, text: 'Rainbow rainbow.')
|
17
|
+
index.add OpenStruct.new(id: 3, text: 'Bow and arrow in Papua New Guinea.')
|
18
|
+
index.add OpenStruct.new(id: 4, text: 'The color purple.')
|
19
|
+
index.add OpenStruct.new(id: 5, text: 'Sun and rain.')
|
20
|
+
index.add OpenStruct.new(id: 6, text: 'The king is in Spain.')
|
21
|
+
|
22
|
+
index
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'can split the text automatically' do
|
26
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text]
|
27
|
+
|
28
|
+
# It splits the text correctly.
|
29
|
+
#
|
30
|
+
automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
|
31
|
+
automatic_splitter.split('purplerain').should == ['purple', 'rain']
|
32
|
+
automatic_splitter.split('purple').should == ['purple']
|
33
|
+
|
34
|
+
# When it can't, it splits it using the partial index (correctly).
|
35
|
+
#
|
36
|
+
automatic_splitter.split('purplerainbo').should == ['purple', 'rain']
|
37
|
+
automatic_splitter.split('purplerainb').should == ['purple', 'rain']
|
38
|
+
#
|
39
|
+
automatic_splitter.split('purplerai').should == ['purple']
|
40
|
+
automatic_splitter.split('purplera').should == ['purple']
|
41
|
+
automatic_splitter.split('purpler').should == ['purple']
|
42
|
+
#
|
43
|
+
automatic_splitter.split('purpl').should == []
|
44
|
+
automatic_splitter.split('purp').should == []
|
45
|
+
automatic_splitter.split('pur').should == []
|
46
|
+
automatic_splitter.split('pu').should == []
|
47
|
+
automatic_splitter.split('p').should == []
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'can split text automatically (with partial)' do
|
51
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text], partial: true
|
52
|
+
|
53
|
+
# It splits the text correctly.
|
54
|
+
#
|
55
|
+
automatic_splitter.split('purplerainbow').should == ['purple', 'rain', 'bow']
|
56
|
+
automatic_splitter.split('purplerain').should == ['purple', 'rain']
|
57
|
+
automatic_splitter.split('purple').should == ['purple']
|
58
|
+
|
59
|
+
# When it can't, it splits it using the partial index (correctly).
|
60
|
+
#
|
61
|
+
automatic_splitter.split('purplerainbo').should == ['purple', 'rain', 'bo']
|
62
|
+
automatic_splitter.split('purplerainb').should == ['purple', 'rain', 'b']
|
63
|
+
#
|
64
|
+
automatic_splitter.split('purplerai').should == ['purple', 'rai']
|
65
|
+
automatic_splitter.split('purplera').should == ['purple', 'ra']
|
66
|
+
automatic_splitter.split('purpler').should == ['purple'] # No 'r' in partial index.
|
67
|
+
#
|
68
|
+
automatic_splitter.split('purpl').should == ['purpl']
|
69
|
+
automatic_splitter.split('purp').should == ['purp']
|
70
|
+
automatic_splitter.split('pur').should == [] # No 'pur' in partial index etc.
|
71
|
+
automatic_splitter.split('pu').should == []
|
72
|
+
automatic_splitter.split('p').should == []
|
73
|
+
|
74
|
+
try = Picky::Search.new index do
|
75
|
+
searching splits_text_on: automatic_splitter
|
76
|
+
end
|
77
|
+
|
78
|
+
# Should find the one with all parts.
|
79
|
+
#
|
80
|
+
try.search('purplerainbow').ids.should == [1]
|
81
|
+
try.search('sunandrain').ids.should == [5]
|
82
|
+
|
83
|
+
# Common parts are found in multiple examples.
|
84
|
+
#
|
85
|
+
try.search('colorpurple').ids.should == [4,1]
|
86
|
+
try.search('bownew').ids.should == [3,1]
|
87
|
+
try.search('spainisking').ids.should == [6,1]
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'is fast enough' do
|
91
|
+
automatic_splitter = Picky::Splitters::Automatic.new index[:text]
|
92
|
+
|
93
|
+
performance_of do
|
94
|
+
automatic_splitter.split('purplerainbow')
|
95
|
+
end.should < 0.0002
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::File::Basic do
|
4
4
|
|
5
5
|
context 'without options' do
|
6
|
-
let(:basic) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'empty' do
|
9
9
|
it 'returns the container that is used for indexing' do
|
@@ -19,14 +19,14 @@ describe Picky::Backends::File::Basic do
|
|
19
19
|
|
20
20
|
describe 'to_s' do
|
21
21
|
it 'returns the cache path with the default file extension' do
|
22
|
-
basic.to_s.should == 'Picky::Backends::File::Basic(some/cache/path/to/file.file.index,some/cache/path/to/file.file_mapping.index.memory.json)'
|
22
|
+
basic.to_s.should == 'Picky::Backends::File::Basic(spec/temp/some/cache/path/to/file.file.index,spec/temp/some/cache/path/to/file.file_mapping.index.memory.json)'
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
context 'with options' do
|
28
28
|
let(:basic) do
|
29
|
-
described_class.new 'some/cache/path/to/file',
|
29
|
+
described_class.new 'spec/temp/some/cache/path/to/file',
|
30
30
|
empty: [],
|
31
31
|
initial: []
|
32
32
|
end
|
@@ -21,7 +21,7 @@ describe Picky::Backends::File do
|
|
21
21
|
# ].each do |type, kind|
|
22
22
|
# it "creates and returns a(n) #{type} index" do
|
23
23
|
# @backend.send(:"create_#{type}",
|
24
|
-
# stub(type, :index_path => "spec/
|
24
|
+
# stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
25
25
|
# ).should be_kind_of(kind)
|
26
26
|
# end
|
27
27
|
# end
|
@@ -44,7 +44,7 @@ describe Picky::Backends::File do
|
|
44
44
|
].each do |type, kind|
|
45
45
|
it "creates and returns a(n) #{type} index" do
|
46
46
|
@backend.send(:"create_#{type}",
|
47
|
-
stub(type, :index_path => "spec/
|
47
|
+
stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
48
48
|
).should be_kind_of(kind)
|
49
49
|
end
|
50
50
|
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::Memory::Basic do
|
4
4
|
|
5
5
|
context 'without options' do
|
6
|
-
let(:basic) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:basic) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'empty' do
|
9
9
|
it 'returns the container that is used for indexing' do
|
@@ -19,14 +19,14 @@ describe Picky::Backends::Memory::Basic do
|
|
19
19
|
|
20
20
|
describe 'to_s' do
|
21
21
|
it 'returns the cache path with the default file extension' do
|
22
|
-
basic.to_s.should == 'Picky::Backends::Memory::Basic(some/cache/path/to/file.memory.index)'
|
22
|
+
basic.to_s.should == 'Picky::Backends::Memory::Basic(spec/temp/some/cache/path/to/file.memory.index)'
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
context 'with options' do
|
28
28
|
let(:basic) do
|
29
|
-
described_class.new 'some/cache/path/to/file',
|
29
|
+
described_class.new 'spec/temp/some/cache/path/to/file',
|
30
30
|
empty: [],
|
31
31
|
initial: []
|
32
32
|
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::Memory::JSON do
|
4
4
|
|
5
5
|
context 'hash-based indexes' do
|
6
|
-
let(:json) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:json) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'extension' do
|
9
9
|
it 'is correct' do
|
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::JSON do
|
|
37
37
|
|
38
38
|
describe 'to_s' do
|
39
39
|
it 'returns the cache path with the default file extension' do
|
40
|
-
json.to_s.should == 'Picky::Backends::Memory::JSON(some/cache/path/to/file.memory.json)'
|
40
|
+
json.to_s.should == 'Picky::Backends::Memory::JSON(spec/temp/some/cache/path/to/file.memory.json)'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe Picky::Backends::Memory::Marshal do
|
4
4
|
|
5
5
|
context 'hash-based indexes' do
|
6
|
-
let(:marshal) { described_class.new 'some/cache/path/to/file' }
|
6
|
+
let(:marshal) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
7
7
|
|
8
8
|
describe 'extension' do
|
9
9
|
it 'is correct' do
|
@@ -37,7 +37,7 @@ describe Picky::Backends::Memory::Marshal do
|
|
37
37
|
|
38
38
|
describe 'to_s' do
|
39
39
|
it 'returns the cache path with the default file extension' do
|
40
|
-
marshal.to_s.should == 'Picky::Backends::Memory::Marshal(some/cache/path/to/file.memory.dump)'
|
40
|
+
marshal.to_s.should == 'Picky::Backends::Memory::Marshal(spec/temp/some/cache/path/to/file.memory.dump)'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -21,7 +21,7 @@ describe Picky::Backends::Memory do
|
|
21
21
|
# ].each do |type, kind|
|
22
22
|
# it "creates and returns a(n) #{type} index" do
|
23
23
|
# @backend.send(:"create_#{type}",
|
24
|
-
# stub(type, :index_path => "spec/
|
24
|
+
# stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
25
25
|
# ).should be_kind_of(kind)
|
26
26
|
# end
|
27
27
|
# end
|
@@ -47,7 +47,7 @@ describe Picky::Backends::Memory do
|
|
47
47
|
# ].each do |type, kind|
|
48
48
|
# it "creates and returns a(n) #{type} index" do
|
49
49
|
# to_a_able_stub = Object.new
|
50
|
-
# to_a_able_stub.stub! :index_path => "spec/
|
50
|
+
# to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
|
51
51
|
# @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
|
52
52
|
# end
|
53
53
|
# end
|
@@ -70,7 +70,7 @@ describe Picky::Backends::Memory do
|
|
70
70
|
].each do |type, kind|
|
71
71
|
it "creates and returns a(n) #{type} index" do
|
72
72
|
@backend.send(:"create_#{type}",
|
73
|
-
stub(type, :index_path => "spec/
|
73
|
+
stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
74
74
|
).should be_kind_of(kind)
|
75
75
|
end
|
76
76
|
end
|
@@ -5,7 +5,7 @@ require 'sqlite3'
|
|
5
5
|
describe Picky::Backends::SQLite::Array do
|
6
6
|
|
7
7
|
context 'hash-based indexes' do
|
8
|
-
let(:db) { described_class.new 'some/cache/path/to/file' }
|
8
|
+
let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
9
9
|
|
10
10
|
describe 'dump' do
|
11
11
|
it 'delegates to the given hash' do
|
@@ -67,13 +67,13 @@ describe Picky::Backends::SQLite::Array do
|
|
67
67
|
|
68
68
|
describe 'to_s' do
|
69
69
|
it 'returns the cache path with the default file extension' do
|
70
|
-
db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
|
70
|
+
db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
75
|
context 'hash-based indexes' do
|
76
|
-
let(:db) { described_class.new 'some/cache/path/to/file', realtime: true }
|
76
|
+
let(:db) { described_class.new 'spec/temp/some/cache/path/to/file', realtime: true }
|
77
77
|
|
78
78
|
describe 'dump' do
|
79
79
|
it 'delegates to the given hash' do
|
@@ -135,7 +135,7 @@ describe Picky::Backends::SQLite::Array do
|
|
135
135
|
|
136
136
|
describe 'to_s' do
|
137
137
|
it 'returns the cache path with the default file extension' do
|
138
|
-
db.to_s.should == 'Picky::Backends::SQLite::Array(some/cache/path/to/file.sqlite3)'
|
138
|
+
db.to_s.should == 'Picky::Backends::SQLite::Array(spec/temp/some/cache/path/to/file.sqlite3)'
|
139
139
|
end
|
140
140
|
end
|
141
141
|
end
|
@@ -5,7 +5,7 @@ require 'sqlite3'
|
|
5
5
|
describe Picky::Backends::SQLite::Value do
|
6
6
|
|
7
7
|
context 'hash-based indexes' do
|
8
|
-
let(:db) { described_class.new 'some/cache/path/to/file' }
|
8
|
+
let(:db) { described_class.new 'spec/temp/some/cache/path/to/file' }
|
9
9
|
|
10
10
|
describe 'dump' do
|
11
11
|
it 'delegates to the given hash' do
|
@@ -67,7 +67,7 @@ describe Picky::Backends::SQLite::Value do
|
|
67
67
|
|
68
68
|
describe 'to_s' do
|
69
69
|
it 'returns the cache path with the default file extension' do
|
70
|
-
db.to_s.should == 'Picky::Backends::SQLite::Value(some/cache/path/to/file.sqlite3)'
|
70
|
+
db.to_s.should == 'Picky::Backends::SQLite::Value(spec/temp/some/cache/path/to/file.sqlite3)'
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
@@ -23,7 +23,7 @@ describe Picky::Backends::SQLite do
|
|
23
23
|
# ].each do |type, kind|
|
24
24
|
# it "creates and returns a(n) #{type} index" do
|
25
25
|
# @backend.send(:"create_#{type}",
|
26
|
-
# stub(type, :index_path => "spec/
|
26
|
+
# stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
27
27
|
# ).should be_kind_of(kind)
|
28
28
|
# end
|
29
29
|
# end
|
@@ -49,7 +49,7 @@ describe Picky::Backends::SQLite do
|
|
49
49
|
# ].each do |type, kind|
|
50
50
|
# it "creates and returns a(n) #{type} index" do
|
51
51
|
# to_a_able_stub = Object.new
|
52
|
-
# to_a_able_stub.stub! :index_path => "spec/
|
52
|
+
# to_a_able_stub.stub! :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}"
|
53
53
|
# @backend.send(:"create_#{type}", to_a_able_stub).should be_kind_of(kind)
|
54
54
|
# end
|
55
55
|
# end
|
@@ -72,7 +72,7 @@ describe Picky::Backends::SQLite do
|
|
72
72
|
].each do |type, kind|
|
73
73
|
it "creates and returns a(n) #{type} index" do
|
74
74
|
@backend.send(:"create_#{type}",
|
75
|
-
stub(type, :index_path => "spec/
|
75
|
+
stub(type, :index_path => "spec/temp/index/test/some_index/some_category_some_bundle_#{type}")
|
76
76
|
).should be_kind_of(kind)
|
77
77
|
end
|
78
78
|
end
|
@@ -119,7 +119,7 @@ describe Picky::Bundle do
|
|
119
119
|
it "uses the right file" do
|
120
120
|
MultiJson.stub! :decode
|
121
121
|
|
122
|
-
File.should_receive(:open).once.with 'spec/
|
122
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_inverted.memory.json', 'r'
|
123
123
|
|
124
124
|
@bundle.load_inverted
|
125
125
|
end
|
@@ -128,7 +128,7 @@ describe Picky::Bundle do
|
|
128
128
|
it "uses the right file" do
|
129
129
|
MultiJson.stub! :decode
|
130
130
|
|
131
|
-
File.should_receive(:open).once.with 'spec/
|
131
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_weights.memory.json', 'r'
|
132
132
|
|
133
133
|
@bundle.load_weights
|
134
134
|
end
|
@@ -137,7 +137,7 @@ describe Picky::Bundle do
|
|
137
137
|
it "uses the right file" do
|
138
138
|
Marshal.stub! :load
|
139
139
|
|
140
|
-
File.should_receive(:open).once.with 'spec/
|
140
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_similarity.memory.dump', 'r:binary'
|
141
141
|
|
142
142
|
@bundle.load_similarity
|
143
143
|
end
|
@@ -146,7 +146,7 @@ describe Picky::Bundle do
|
|
146
146
|
it "uses the right file" do
|
147
147
|
MultiJson.stub! :decode
|
148
148
|
|
149
|
-
File.should_receive(:open).once.with 'spec/
|
149
|
+
File.should_receive(:open).once.with 'spec/temp/index/test/some_index/some_category_some_name_configuration.memory.json', 'r'
|
150
150
|
|
151
151
|
@bundle.load_configuration
|
152
152
|
end
|
data/spec/lib/bundle_spec.rb
CHANGED
@@ -22,10 +22,10 @@ describe Picky::Bundle do
|
|
22
22
|
|
23
23
|
describe 'index_path' do
|
24
24
|
it 'is correct' do
|
25
|
-
bundle.index_path(:some_type).should == 'spec/
|
25
|
+
bundle.index_path(:some_type).should == 'spec/temp/index/test/some_index/some_category_some_name_some_type'
|
26
26
|
end
|
27
27
|
it 'is correct' do
|
28
|
-
bundle.index_path.should == 'spec/
|
28
|
+
bundle.index_path.should == 'spec/temp/index/test/some_index/some_category_some_name'
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
File without changes
|
data/spec/lib/category_spec.rb
CHANGED
@@ -27,7 +27,7 @@ describe Picky::Category do
|
|
27
27
|
context 'directories' do
|
28
28
|
let(:category) { described_class.new :some_category, index }
|
29
29
|
it 'is correct' do
|
30
|
-
category.prepared_index_path.should == 'spec/
|
30
|
+
category.prepared_index_path.should == 'spec/temp/index/test/some_index/some_category'
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
data/spec/lib/index_spec.rb
CHANGED
data/spec/lib/loader_spec.rb
CHANGED
@@ -18,7 +18,7 @@ describe Picky::Loader do
|
|
18
18
|
|
19
19
|
describe 'load_application' do
|
20
20
|
it 'does ok' do
|
21
|
-
Kernel.should_receive(:load).once.with 'spec/
|
21
|
+
Kernel.should_receive(:load).once.with 'spec/temp/app.rb'
|
22
22
|
|
23
23
|
lambda { described_class.load_application }.should_not raise_error
|
24
24
|
end
|
@@ -36,13 +36,15 @@ describe Performant::Array do
|
|
36
36
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
37
37
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
38
38
|
|
39
|
-
#
|
39
|
+
# Brute force.
|
40
|
+
#
|
40
41
|
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
41
42
|
end
|
42
43
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
43
44
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
44
45
|
|
45
46
|
# &
|
47
|
+
#
|
46
48
|
performance_of do
|
47
49
|
arys.inject(arys.shift.dup) do |total, ary|
|
48
50
|
total & arys
|
@@ -75,13 +77,56 @@ describe Performant::Array do
|
|
75
77
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
76
78
|
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a]
|
77
79
|
|
78
|
-
#
|
80
|
+
# Brute force.
|
81
|
+
#
|
79
82
|
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
80
83
|
end
|
81
84
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
82
85
|
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
83
86
|
|
84
87
|
# &
|
88
|
+
#
|
89
|
+
performance_of do
|
90
|
+
arys.inject(arys.shift.dup) do |total, ary|
|
91
|
+
total & arys
|
92
|
+
end
|
93
|
+
end.should < 0.0015
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "memory_efficient_intersect with strings" do
|
98
|
+
it "should intersect empty arrays correctly" do
|
99
|
+
arys = [['c','d'], ['a','b','c'], []]
|
100
|
+
|
101
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
102
|
+
end
|
103
|
+
it "should handle intermediate empty results correctly" do
|
104
|
+
arys = [['e','d'], ['a','b','c'], ['c','d','e','h','i']]
|
105
|
+
|
106
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
107
|
+
end
|
108
|
+
it "should intersect correctly" do
|
109
|
+
arys = [['c','d'], ['a','b','c'], ['c','d','e','h','i']]
|
110
|
+
|
111
|
+
Performant::Array.memory_efficient_intersect(arys).should == ['c']
|
112
|
+
end
|
113
|
+
it "should intersect many arrays" do
|
114
|
+
arys = [['c','d','e','f','g'], ['a','b','c','e','f','g'], ['c','d','e','f','g','h','i'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s'], ['a','b','c','d','e','f','g','h','i','j'], ['b','c','e','f','g','s']]
|
115
|
+
|
116
|
+
Performant::Array.memory_efficient_intersect(arys).should == ['c','e','f','g']
|
117
|
+
end
|
118
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
119
|
+
arys = [('1'..'50').to_a, ('10000'..'20000').to_a]
|
120
|
+
|
121
|
+
# Brute force - note that it is slower than the Symbols/Integers version.
|
122
|
+
#
|
123
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
|
124
|
+
end
|
125
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
126
|
+
arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]
|
127
|
+
|
128
|
+
# &
|
129
|
+
#
|
85
130
|
performance_of do
|
86
131
|
arys.inject(arys.shift.dup) do |total, ary|
|
87
132
|
total & arys
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.12.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
requirements:
|
35
35
|
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 4.
|
37
|
+
version: 4.12.0
|
38
38
|
type: :development
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -42,7 +42,7 @@ dependencies:
|
|
42
42
|
requirements:
|
43
43
|
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 4.
|
45
|
+
version: 4.12.0
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: text
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -242,6 +242,7 @@ files:
|
|
242
242
|
- lib/picky/sinatra/index_actions.rb
|
243
243
|
- lib/picky/sinatra.rb
|
244
244
|
- lib/picky/source.rb
|
245
|
+
- lib/picky/splitters/automatic.rb
|
245
246
|
- lib/picky/statistics.rb
|
246
247
|
- lib/picky/tasks.rb
|
247
248
|
- lib/picky/tokenizer.rb
|
@@ -261,9 +262,8 @@ files:
|
|
261
262
|
- lib/tasks/try.rake
|
262
263
|
- lib/performant.c
|
263
264
|
- spec/aux/picky/cli_spec.rb
|
264
|
-
- spec/category_realtime_spec.rb
|
265
|
-
- spec/ext/performant_spec.rb
|
266
265
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
266
|
+
- spec/functional/automatic_segmentation_spec.rb
|
267
267
|
- spec/functional/backends/file_spec.rb
|
268
268
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
269
269
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
@@ -328,6 +328,7 @@ files:
|
|
328
328
|
- spec/lib/category/location_spec.rb
|
329
329
|
- spec/lib/category_indexed_spec.rb
|
330
330
|
- spec/lib/category_indexing_spec.rb
|
331
|
+
- spec/lib/category_realtime_spec.rb
|
331
332
|
- spec/lib/category_spec.rb
|
332
333
|
- spec/lib/character_substituters/west_european_spec.rb
|
333
334
|
- spec/lib/extensions/array_spec.rb
|
@@ -398,6 +399,7 @@ files:
|
|
398
399
|
- spec/lib/statistics_spec.rb
|
399
400
|
- spec/lib/tasks/try_spec.rb
|
400
401
|
- spec/lib/tokenizer_spec.rb
|
402
|
+
- spec/performant_spec.rb
|
401
403
|
- bin/picky
|
402
404
|
homepage: http://florianhanke.com/picky
|
403
405
|
licenses: []
|
@@ -425,9 +427,8 @@ specification_version: 3
|
|
425
427
|
summary: ! 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
|
426
428
|
test_files:
|
427
429
|
- spec/aux/picky/cli_spec.rb
|
428
|
-
- spec/category_realtime_spec.rb
|
429
|
-
- spec/ext/performant_spec.rb
|
430
430
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
431
|
+
- spec/functional/automatic_segmentation_spec.rb
|
431
432
|
- spec/functional/backends/file_spec.rb
|
432
433
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
433
434
|
- spec/functional/backends/memory_json_utf8_spec.rb
|
@@ -492,6 +493,7 @@ test_files:
|
|
492
493
|
- spec/lib/category/location_spec.rb
|
493
494
|
- spec/lib/category_indexed_spec.rb
|
494
495
|
- spec/lib/category_indexing_spec.rb
|
496
|
+
- spec/lib/category_realtime_spec.rb
|
495
497
|
- spec/lib/category_spec.rb
|
496
498
|
- spec/lib/character_substituters/west_european_spec.rb
|
497
499
|
- spec/lib/extensions/array_spec.rb
|
@@ -562,4 +564,5 @@ test_files:
|
|
562
564
|
- spec/lib/statistics_spec.rb
|
563
565
|
- spec/lib/tasks/try_spec.rb
|
564
566
|
- spec/lib/tokenizer_spec.rb
|
567
|
+
- spec/performant_spec.rb
|
565
568
|
has_rdoc:
|