picky 4.18.0 → 4.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/picky/picky.c +54 -19
- data/lib/picky/loader.rb +2 -1
- data/lib/picky/query/token.rb +18 -23
- data/lib/picky/query/tokens.rb +5 -3
- data/lib/picky/splitter.rb +27 -0
- data/lib/picky/tokenizer/regexp_wrapper.rb +22 -13
- data/lib/picky/tokenizer.rb +5 -3
- data/spec/functional/custom_delimiters_spec.rb +4 -4
- data/spec/functional/object_use_spec.rb +93 -0
- data/spec/lib/query/token_spec.rb +4 -4
- data/spec/lib/splitter_spec.rb +83 -0
- data/spec/performant_spec.rb +7 -0
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37e52d1d18d9eec4ca1545d992ec9d6c220c24fa
|
4
|
+
data.tar.gz: 468ebcbaeab07fabbdd70b4693a665a98d6eaa52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69d292af9ab42ee928e34c94818c2437e255b25e09ec405e01ce9db30fc29350ab726b71db07dbc3ac1f5f1c2d9563cb362fcdf2770a3f4448cd3e569e796d4c
|
7
|
+
data.tar.gz: 2a0e9d7399da82e484a56ee8a16c088b6e627b35cbeabfe56f078aa4271b919f14ecd27f846272652e0ba0dc8b41a96a479fc892449814e58b5d05f38a98d7ac
|
data/ext/picky/picky.c
CHANGED
@@ -33,9 +33,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
33
33
|
// Vars.
|
34
34
|
//
|
35
35
|
VALUE rb_array_of_arrays;
|
36
|
-
VALUE
|
36
|
+
VALUE result_array;
|
37
37
|
VALUE current_array;
|
38
38
|
VALUE hash;
|
39
|
+
VALUE ary;
|
39
40
|
|
40
41
|
// Temps.
|
41
42
|
//
|
@@ -44,37 +45,71 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
44
45
|
// Conversions & presorting.
|
45
46
|
//
|
46
47
|
rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
|
47
|
-
|
48
|
+
|
49
|
+
// Assume the smallest array is the result already.
|
50
|
+
//
|
51
|
+
result_array = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
|
48
52
|
|
49
|
-
// Iterate through all arrays.
|
53
|
+
// Iterate through all other arrays.
|
50
54
|
//
|
51
55
|
for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
|
52
|
-
// Break if the
|
56
|
+
// Break if the result array is empty.
|
57
|
+
// (Because intersecting anything with it will yield nothing)
|
53
58
|
//
|
54
|
-
if (RARRAY_LEN(
|
59
|
+
if (RARRAY_LEN(result_array) == 0) {
|
55
60
|
break;
|
56
61
|
}
|
57
|
-
|
58
|
-
//
|
62
|
+
|
63
|
+
// If the result array is currently larger than 10
|
64
|
+
// entries, use a hash for intersection, else
|
65
|
+
// use an array.
|
59
66
|
//
|
60
|
-
|
67
|
+
if (RARRAY_LEN(result_array) > 10) {
|
68
|
+
// Make a hash from the currently smallest version.
|
69
|
+
//
|
70
|
+
hash = ary_make_hash(result_array, 0);
|
61
71
|
|
62
|
-
|
63
|
-
|
64
|
-
|
72
|
+
// Clear for use as temp array.
|
73
|
+
//
|
74
|
+
rb_ary_clear(result_array);
|
65
75
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
76
|
+
// Get the current array.
|
77
|
+
//
|
78
|
+
current_array = rb_ary_entry(rb_array_of_arrays, i);
|
79
|
+
|
80
|
+
// Iterate through all array elements.
|
81
|
+
//
|
82
|
+
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
83
|
+
v = rb_ary_entry(current_array, j);
|
84
|
+
if (rb_hash_delete(hash, v) != Qnil) {
|
85
|
+
rb_ary_push(result_array, v);
|
86
|
+
}
|
87
|
+
}
|
88
|
+
} else {
|
89
|
+
// Make a new array from the currently smallest version.
|
90
|
+
//
|
91
|
+
ary = rb_ary_dup(result_array);
|
92
|
+
|
93
|
+
// Clear for use as temp array.
|
94
|
+
//
|
95
|
+
rb_ary_clear(result_array);
|
96
|
+
|
97
|
+
// Get the current array.
|
98
|
+
//
|
99
|
+
current_array = rb_ary_entry(rb_array_of_arrays, i);
|
100
|
+
|
101
|
+
// Iterate through all array elements.
|
102
|
+
//
|
103
|
+
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
104
|
+
v = rb_ary_entry(current_array, j);
|
105
|
+
if (rb_ary_delete(ary, v) != Qnil) {
|
106
|
+
rb_ary_push(result_array, v);
|
107
|
+
}
|
73
108
|
}
|
74
109
|
}
|
75
110
|
}
|
76
111
|
|
77
|
-
return
|
112
|
+
return result_array;
|
78
113
|
}
|
79
114
|
|
80
115
|
VALUE p_mPerformant, p_cArray;
|
data/lib/picky/loader.rb
CHANGED
data/lib/picky/query/token.rb
CHANGED
@@ -10,7 +10,7 @@ module Picky
|
|
10
10
|
# or whether it is a partial (bla*).
|
11
11
|
#
|
12
12
|
class Token
|
13
|
-
|
13
|
+
|
14
14
|
attr_reader :text, :original
|
15
15
|
attr_writer :similar
|
16
16
|
attr_writer :predefined_categories
|
@@ -237,6 +237,8 @@ module Picky
|
|
237
237
|
@text.gsub! @@illegals, EMPTY_STRING unless @text == EMPTY_STRING
|
238
238
|
end
|
239
239
|
def self.redefine_illegals
|
240
|
+
# TODO Double no similar and no partial, both ".
|
241
|
+
#
|
240
242
|
@@illegals = %r{[#@@no_similar_character#@@similar_character#@@no_partial_character#@@partial_character]}
|
241
243
|
end
|
242
244
|
redefine_illegals
|
@@ -273,49 +275,42 @@ module Picky
|
|
273
275
|
|
274
276
|
# Splits text into a qualifier and text.
|
275
277
|
#
|
276
|
-
@@qualifier_text_delimiter =
|
277
|
-
@@qualifiers_delimiter =
|
278
|
+
@@qualifier_text_delimiter = /:/
|
279
|
+
@@qualifiers_delimiter = /,/
|
280
|
+
@@qualifier_text_splitter = Splitter.new @@qualifier_text_delimiter
|
281
|
+
@@qualifiers_splitter = Splitter.new @@qualifiers_delimiter
|
278
282
|
def qualify
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
if @text.include? @@qualifier_text_delimiter
|
283
|
-
@qualifiers, @text = @text.split @@qualifier_text_delimiter, 2
|
284
|
-
if @text
|
285
|
-
@qualifiers = @qualifiers.split @@qualifiers_delimiter
|
286
|
-
else
|
287
|
-
@text, @qualifiers = @qualifiers, nil
|
288
|
-
end
|
283
|
+
@qualifiers, @text = @@qualifier_text_splitter.single @text
|
284
|
+
if @qualifiers
|
285
|
+
@qualifiers = @@qualifiers_splitter.multi @qualifiers
|
289
286
|
end
|
290
287
|
end
|
291
|
-
# Define a
|
288
|
+
# Define a regexp which separates the qualifier
|
292
289
|
# from the search text.
|
293
290
|
#
|
294
|
-
# Default is
|
295
|
-
#
|
296
|
-
# This is used in a String#split.
|
291
|
+
# Default is /:/.
|
297
292
|
#
|
298
293
|
# Example:
|
299
|
-
# Picky::Query::Token.qualifier_text_delimiter =
|
294
|
+
# Picky::Query::Token.qualifier_text_delimiter = /\?/
|
300
295
|
# try.search("text1?hello text2?world").ids.should == [1]
|
301
296
|
#
|
302
297
|
def self.qualifier_text_delimiter= character
|
303
298
|
@@qualifier_text_delimiter = character
|
299
|
+
@@qualifier_text_splitter = Splitter.new @@qualifier_text_delimiter
|
304
300
|
end
|
305
|
-
# Define a
|
301
|
+
# Define a regexp which separates the qualifiers
|
306
302
|
# (before the search text).
|
307
303
|
#
|
308
|
-
# Default is
|
309
|
-
#
|
310
|
-
# This is used in a String#split.
|
304
|
+
# Default is /,/.
|
311
305
|
#
|
312
306
|
# Example:
|
313
|
-
# Picky::Query::Token.qualifiers_delimiter =
|
307
|
+
# Picky::Query::Token.qualifiers_delimiter = /|/
|
314
308
|
# try.search("text1|text2:hello").ids.should == [1]
|
315
309
|
#
|
316
310
|
|
317
311
|
def self.qualifiers_delimiter= character
|
318
312
|
@@qualifiers_delimiter = character
|
313
|
+
@@qualifiers_splitter = Splitter.new @@qualifiers_delimiter
|
319
314
|
end
|
320
315
|
|
321
316
|
# Returns the qualifiers as an array.
|
data/lib/picky/query/tokens.rb
CHANGED
@@ -7,7 +7,7 @@ module Picky
|
|
7
7
|
# This class primarily handles switching through similar token constellations.
|
8
8
|
#
|
9
9
|
class Tokens
|
10
|
-
|
10
|
+
|
11
11
|
attr_reader :tokens, :ignore_unassigned
|
12
12
|
|
13
13
|
# Basically forwards to its internal tokens array.
|
@@ -25,11 +25,13 @@ module Picky
|
|
25
25
|
|
26
26
|
# Creates a new Tokens object from a number of Strings.
|
27
27
|
#
|
28
|
+
@@or_splitting_pattern = /\|/
|
29
|
+
@@splitter = Splitter.new @@or_splitting_pattern
|
28
30
|
def self.processed words, originals, ignore_unassigned = false
|
29
31
|
new(words.zip(originals).collect! do |word, original|
|
30
|
-
w, *middle, rest
|
32
|
+
w, *middle, rest = @@splitter.multi word
|
31
33
|
if rest
|
32
|
-
Or.new processed [w, *middle, rest], original.split(
|
34
|
+
Or.new processed [w, *middle, rest], original.split(@@or_splitting_pattern)
|
33
35
|
else
|
34
36
|
Token.processed w, original
|
35
37
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
class Splitter < StringScanner
|
4
|
+
|
5
|
+
def initialize delimiter
|
6
|
+
@delimiter = delimiter
|
7
|
+
super ''
|
8
|
+
end
|
9
|
+
|
10
|
+
def single text
|
11
|
+
self.string = text
|
12
|
+
skip_until @delimiter
|
13
|
+
[pre_match, post_match || string]
|
14
|
+
end
|
15
|
+
|
16
|
+
def multi text
|
17
|
+
self.string = text
|
18
|
+
if exist? @delimiter
|
19
|
+
text.split @delimiter
|
20
|
+
else
|
21
|
+
[text]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -1,19 +1,28 @@
|
|
1
|
-
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
class Tokenizer
|
4
|
+
|
5
|
+
class RegexpWrapper
|
6
|
+
|
7
|
+
def initialize regexp
|
8
|
+
@regexp = regexp
|
9
|
+
@splitter = Splitter.new @regexp
|
10
|
+
end
|
6
11
|
|
7
|
-
|
8
|
-
|
9
|
-
|
12
|
+
def split text
|
13
|
+
@splitter.multi text
|
14
|
+
end
|
10
15
|
|
11
|
-
|
12
|
-
|
13
|
-
|
16
|
+
def source
|
17
|
+
@regexp.source
|
18
|
+
end
|
14
19
|
|
15
|
-
|
16
|
-
|
17
|
-
|
20
|
+
def method_missing name, *args, &block
|
21
|
+
@regexp.send name, *args, &block
|
22
|
+
end
|
18
23
|
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
19
28
|
end
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -98,7 +98,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
98
98
|
# Note: We do not test against to_str since symbols do not work with String#split.
|
99
99
|
#
|
100
100
|
def splits_text_on thing
|
101
|
-
raise ArgumentError.new "#{__method__} takes a Regexp or
|
101
|
+
raise ArgumentError.new "#{__method__} takes a Regexp or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
|
102
102
|
@splits_text_on = if thing.respond_to? :split
|
103
103
|
thing
|
104
104
|
else
|
@@ -106,6 +106,8 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
def split text
|
109
|
+
# Does not create a new string if nothing is split.
|
110
|
+
#
|
109
111
|
@splits_text_on.split text
|
110
112
|
end
|
111
113
|
|
@@ -233,9 +235,9 @@ ERROR
|
|
233
235
|
# [[:token1, :token2], ["Original1", "Original2"]]
|
234
236
|
#
|
235
237
|
def tokenize text
|
236
|
-
text
|
238
|
+
text = preprocess text.to_s # processing the text
|
237
239
|
return empty_tokens if text.empty? # TODO blank?
|
238
|
-
words
|
240
|
+
words = pretokenize text # splitting and preparations for tokenizing
|
239
241
|
return empty_tokens if words.empty?
|
240
242
|
tokens = tokens_for words # creating tokens / strings
|
241
243
|
[tokens, words]
|
@@ -10,8 +10,8 @@ describe 'custom delimiters' do
|
|
10
10
|
Picky::Query::Token.similar_character = '~'
|
11
11
|
Picky::Query::Token.no_similar_character = '"'
|
12
12
|
Picky::Query::Token.range_character = '-'
|
13
|
-
Picky::Query::Token.qualifier_text_delimiter =
|
14
|
-
Picky::Query::Token.qualifiers_delimiter =
|
13
|
+
Picky::Query::Token.qualifier_text_delimiter = /:/
|
14
|
+
Picky::Query::Token.qualifiers_delimiter = /,/
|
15
15
|
end
|
16
16
|
|
17
17
|
context 'offers custom partial delimiters to be set' do
|
@@ -80,11 +80,11 @@ describe 'custom delimiters' do
|
|
80
80
|
try.search("text1:hello text2:world").ids.should == [1]
|
81
81
|
|
82
82
|
try.search("text1?hello text2?world").ids.should == []
|
83
|
-
Picky::Query::Token.qualifier_text_delimiter =
|
83
|
+
Picky::Query::Token.qualifier_text_delimiter = /\?/
|
84
84
|
try.search("text1?hello text2?world").ids.should == [1]
|
85
85
|
|
86
86
|
try.search("text1!text2?hello text2?world").ids.should == []
|
87
|
-
Picky::Query::Token.qualifiers_delimiter =
|
87
|
+
Picky::Query::Token.qualifiers_delimiter = /!/
|
88
88
|
try.search("text1!text2?hello text2?world").ids.should == [1]
|
89
89
|
end
|
90
90
|
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "Object Use" do
|
6
|
+
|
7
|
+
it 'is not too high' do
|
8
|
+
|
9
|
+
index = Picky::Index.new :object_use do
|
10
|
+
category :text1
|
11
|
+
category :text2
|
12
|
+
category :text3
|
13
|
+
category :text4
|
14
|
+
end
|
15
|
+
try = Picky::Search.new index
|
16
|
+
|
17
|
+
thing = Struct.new(:id, :text1, :text2, :text3, :text4)
|
18
|
+
index.add thing.new(1, 'one', 'two', 'three', 'four')
|
19
|
+
|
20
|
+
# Pre-run.
|
21
|
+
#
|
22
|
+
|
23
|
+
try.search 'one'
|
24
|
+
try.search 'one two three'
|
25
|
+
try.search 'text1:one'
|
26
|
+
try.search 'text1:one text2:two text3:three'
|
27
|
+
|
28
|
+
# Actual tests.
|
29
|
+
#
|
30
|
+
|
31
|
+
s = 'one'
|
32
|
+
result = mark do
|
33
|
+
try.search s
|
34
|
+
end
|
35
|
+
result.should == {} # No new strings since nothing is split.
|
36
|
+
|
37
|
+
s = 'one two three'
|
38
|
+
result = mark do
|
39
|
+
try.search s
|
40
|
+
end
|
41
|
+
result.should == {
|
42
|
+
"three" => 1,
|
43
|
+
"two" => 1,
|
44
|
+
"one" => 1,
|
45
|
+
'one two three' => 2 # TODO Is GC'd.
|
46
|
+
}
|
47
|
+
|
48
|
+
result = mark do
|
49
|
+
try.search 'text1:one'
|
50
|
+
end
|
51
|
+
result.should == {
|
52
|
+
"one" => 1,
|
53
|
+
"text1" => 1,
|
54
|
+
"text1:one" => 1
|
55
|
+
} # Only the necessary split strings.
|
56
|
+
|
57
|
+
s = 'text1:one text2:two text3:three'
|
58
|
+
result = mark do
|
59
|
+
try.search s
|
60
|
+
end
|
61
|
+
result.should == {
|
62
|
+
"three" => 1,
|
63
|
+
"two" => 1,
|
64
|
+
"one" => 1,
|
65
|
+
"text3" => 1,
|
66
|
+
"text2" => 1,
|
67
|
+
"text1" => 1,
|
68
|
+
"text3:three" => 1,
|
69
|
+
"text2:two" => 1,
|
70
|
+
"text1:one" => 1
|
71
|
+
} # Only the necessary split strings.
|
72
|
+
|
73
|
+
s = 'text1:one text2:two text3,text4:three'
|
74
|
+
result = mark do
|
75
|
+
try.search s
|
76
|
+
end
|
77
|
+
result.should == {
|
78
|
+
"three" => 1,
|
79
|
+
"two" => 1,
|
80
|
+
"one" => 1,
|
81
|
+
"text3,text4" => 2, # TODO
|
82
|
+
"text3" => 1,
|
83
|
+
"text4" => 1,
|
84
|
+
"text2" => 1,
|
85
|
+
"text1" => 1,
|
86
|
+
"text1:one" => 1,
|
87
|
+
"text2:two" => 1,
|
88
|
+
"text3,text4:three" => 1
|
89
|
+
}
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
@@ -151,19 +151,19 @@ describe Picky::Query::Token do
|
|
151
151
|
it_should_qualify 'with:qualifier', [['with'], 'qualifier']
|
152
152
|
it_should_qualify 'without qualifier', [nil, 'without qualifier']
|
153
153
|
it_should_qualify 'name:', [['name'], '']
|
154
|
-
it_should_qualify ':broken qualifier', [[],
|
154
|
+
it_should_qualify ':broken qualifier', [[''], 'broken qualifier'] # Unsure about that. Probably should recognize it as text.
|
155
155
|
it_should_qualify '', [nil, '']
|
156
156
|
it_should_qualify 'sp:text', [['sp'], 'text']
|
157
157
|
it_should_qualify '""', [nil, '""']
|
158
158
|
it_should_qualify 'name:', [['name'], '']
|
159
159
|
it_should_qualify 'name:hanke', [['name'], 'hanke']
|
160
160
|
it_should_qualify 'g:gaga', [['g'], 'gaga']
|
161
|
-
it_should_qualify ':nothing', [[],
|
161
|
+
it_should_qualify ':nothing', [[''], 'nothing']
|
162
162
|
it_should_qualify 'hello', [nil, 'hello']
|
163
163
|
it_should_qualify 'a:b:c', [['a'], 'b:c']
|
164
164
|
it_should_qualify 'a,b:c', [['a','b'], 'c']
|
165
165
|
it_should_qualify 'a,b,c:d', [['a','b','c'], 'd']
|
166
|
-
it_should_qualify ':', [[],
|
166
|
+
it_should_qualify ':', [[''], '']
|
167
167
|
it_should_qualify 'vorname:qualifier', [['vorname'], 'qualifier']
|
168
168
|
end
|
169
169
|
|
@@ -429,7 +429,7 @@ describe Picky::Query::Token do
|
|
429
429
|
context 'with missing qualifier' do
|
430
430
|
let(:token) { described_class.processed ':missingqualifier' }
|
431
431
|
it 'is correct' do
|
432
|
-
token.qualifiers.should == []
|
432
|
+
token.qualifiers.should == ['']
|
433
433
|
token.text.should == 'missingqualifier'
|
434
434
|
end
|
435
435
|
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Picky::Splitter do
|
4
|
+
|
5
|
+
describe "single" do
|
6
|
+
let(:splitter) { described_class.new /:/ }
|
7
|
+
it "splits right" do
|
8
|
+
splitter.single(':b').should == ['','b']
|
9
|
+
end
|
10
|
+
it "splits right" do
|
11
|
+
splitter.single('a:b').should == ['a','b']
|
12
|
+
end
|
13
|
+
it "splits right" do
|
14
|
+
splitter.single('a').should == [nil, 'a']
|
15
|
+
end
|
16
|
+
it "splits right" do
|
17
|
+
splitter.single('a:b c:d').should == ['a', 'b c:d']
|
18
|
+
end
|
19
|
+
it "returns the same string if not split" do
|
20
|
+
s = 'a'
|
21
|
+
splitter.single(s)[1].object_id.should == s.object_id
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "multi" do
|
26
|
+
let(:splitter) { described_class.new /\s/ }
|
27
|
+
it "splits right" do
|
28
|
+
splitter.multi(' b').should == ['', 'b']
|
29
|
+
end
|
30
|
+
it "splits right" do
|
31
|
+
splitter.multi('a b').should == ['a', 'b']
|
32
|
+
end
|
33
|
+
it "splits right" do
|
34
|
+
splitter.multi('a b c d').should == ['a', 'b', 'c', 'd']
|
35
|
+
end
|
36
|
+
it "splits right" do
|
37
|
+
splitter.multi('a').should == ['a']
|
38
|
+
end
|
39
|
+
it "returns the same string if not split" do
|
40
|
+
s = 'a'
|
41
|
+
splitter.multi(s).first.object_id.should == s.object_id
|
42
|
+
end
|
43
|
+
# it 'is faster than split' do
|
44
|
+
# pattern = /\s/
|
45
|
+
# amount = 1000
|
46
|
+
# text = 'abcd'
|
47
|
+
# split = performance_of do
|
48
|
+
# amount.times { text.split pattern }
|
49
|
+
# end
|
50
|
+
# multi = performance_of do
|
51
|
+
# amount.times { splitter.multi text, pattern }
|
52
|
+
# end
|
53
|
+
# split.should < multi
|
54
|
+
# end
|
55
|
+
# it 'is slower than split (but uses less memory in the non-split case)' do
|
56
|
+
# pattern = /\s/
|
57
|
+
# amount = 1000
|
58
|
+
# text = 'a b'
|
59
|
+
# multi = performance_of do
|
60
|
+
# amount.times { splitter.multi text, pattern }
|
61
|
+
# end
|
62
|
+
# split = performance_of do
|
63
|
+
# amount.times { text.split pattern }
|
64
|
+
# end
|
65
|
+
# # p split
|
66
|
+
# # p multi
|
67
|
+
# end
|
68
|
+
# it 'is slower than split (but uses less memory in the non-split case)' do
|
69
|
+
# pattern = /\s/
|
70
|
+
# amount = 1000
|
71
|
+
# text = 'a b c d'
|
72
|
+
# multi = performance_of do
|
73
|
+
# amount.times { splitter.multi text, pattern }
|
74
|
+
# end
|
75
|
+
# split = performance_of do
|
76
|
+
# amount.times { text.split pattern }
|
77
|
+
# end
|
78
|
+
# # p split
|
79
|
+
# # p multi
|
80
|
+
# end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
data/spec/performant_spec.rb
CHANGED
@@ -122,6 +122,13 @@ describe Performant::Array do
|
|
122
122
|
#
|
123
123
|
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
|
124
124
|
end
|
125
|
+
it "should be optimal for many small arrays of length == 10" do
|
126
|
+
arys = [('1'..'10').to_a, ('10'..'20').to_a, ['10'] + ('10000'..'20000').to_a]
|
127
|
+
|
128
|
+
# Brute force - note that it is slower than the Symbols/Integers version.
|
129
|
+
#
|
130
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
|
131
|
+
end
|
125
132
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
126
133
|
arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]
|
127
134
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Hanke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ~>
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 4.
|
47
|
+
version: 4.19.0
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 4.
|
54
|
+
version: 4.19.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: text
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -243,6 +243,7 @@ files:
|
|
243
243
|
- lib/picky/sinatra/index_actions.rb
|
244
244
|
- lib/picky/sinatra.rb
|
245
245
|
- lib/picky/source.rb
|
246
|
+
- lib/picky/splitter.rb
|
246
247
|
- lib/picky/splitters/automatic.rb
|
247
248
|
- lib/picky/statistics.rb
|
248
249
|
- lib/picky/tasks.rb
|
@@ -289,6 +290,7 @@ files:
|
|
289
290
|
- spec/functional/multi_index_qualifier_spec.rb
|
290
291
|
- spec/functional/no_tokenize_spec.rb
|
291
292
|
- spec/functional/non_specific_ids_larger_than_20_spec.rb
|
293
|
+
- spec/functional/object_use_spec.rb
|
292
294
|
- spec/functional/only_spec.rb
|
293
295
|
- spec/functional/or_spec.rb
|
294
296
|
- spec/functional/pool_spec.rb
|
@@ -406,6 +408,7 @@ files:
|
|
406
408
|
- spec/lib/sinatra_spec.rb
|
407
409
|
- spec/lib/solr/schema_generator_spec.rb
|
408
410
|
- spec/lib/source_spec.rb
|
411
|
+
- spec/lib/splitter_spec.rb
|
409
412
|
- spec/lib/statistics_spec.rb
|
410
413
|
- spec/lib/tasks/try_spec.rb
|
411
414
|
- spec/lib/tokenizer_spec.rb
|
@@ -463,6 +466,7 @@ test_files:
|
|
463
466
|
- spec/functional/multi_index_qualifier_spec.rb
|
464
467
|
- spec/functional/no_tokenize_spec.rb
|
465
468
|
- spec/functional/non_specific_ids_larger_than_20_spec.rb
|
469
|
+
- spec/functional/object_use_spec.rb
|
466
470
|
- spec/functional/only_spec.rb
|
467
471
|
- spec/functional/or_spec.rb
|
468
472
|
- spec/functional/pool_spec.rb
|
@@ -580,6 +584,7 @@ test_files:
|
|
580
584
|
- spec/lib/sinatra_spec.rb
|
581
585
|
- spec/lib/solr/schema_generator_spec.rb
|
582
586
|
- spec/lib/source_spec.rb
|
587
|
+
- spec/lib/splitter_spec.rb
|
583
588
|
- spec/lib/statistics_spec.rb
|
584
589
|
- spec/lib/tasks/try_spec.rb
|
585
590
|
- spec/lib/tokenizer_spec.rb
|