picky 4.18.0 → 4.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/picky/picky.c +54 -19
- data/lib/picky/loader.rb +2 -1
- data/lib/picky/query/token.rb +18 -23
- data/lib/picky/query/tokens.rb +5 -3
- data/lib/picky/splitter.rb +27 -0
- data/lib/picky/tokenizer/regexp_wrapper.rb +22 -13
- data/lib/picky/tokenizer.rb +5 -3
- data/spec/functional/custom_delimiters_spec.rb +4 -4
- data/spec/functional/object_use_spec.rb +93 -0
- data/spec/lib/query/token_spec.rb +4 -4
- data/spec/lib/splitter_spec.rb +83 -0
- data/spec/performant_spec.rb +7 -0
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37e52d1d18d9eec4ca1545d992ec9d6c220c24fa
|
4
|
+
data.tar.gz: 468ebcbaeab07fabbdd70b4693a665a98d6eaa52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69d292af9ab42ee928e34c94818c2437e255b25e09ec405e01ce9db30fc29350ab726b71db07dbc3ac1f5f1c2d9563cb362fcdf2770a3f4448cd3e569e796d4c
|
7
|
+
data.tar.gz: 2a0e9d7399da82e484a56ee8a16c088b6e627b35cbeabfe56f078aa4271b919f14ecd27f846272652e0ba0dc8b41a96a479fc892449814e58b5d05f38a98d7ac
|
data/ext/picky/picky.c
CHANGED
@@ -33,9 +33,10 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
33
33
|
// Vars.
|
34
34
|
//
|
35
35
|
VALUE rb_array_of_arrays;
|
36
|
-
VALUE
|
36
|
+
VALUE result_array;
|
37
37
|
VALUE current_array;
|
38
38
|
VALUE hash;
|
39
|
+
VALUE ary;
|
39
40
|
|
40
41
|
// Temps.
|
41
42
|
//
|
@@ -44,37 +45,71 @@ static inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_
|
|
44
45
|
// Conversions & presorting.
|
45
46
|
//
|
46
47
|
rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
|
47
|
-
|
48
|
+
|
49
|
+
// Assume the smallest array is the result already.
|
50
|
+
//
|
51
|
+
result_array = rb_ary_dup(rb_ary_entry(rb_array_of_arrays, 0));
|
48
52
|
|
49
|
-
// Iterate through all arrays.
|
53
|
+
// Iterate through all other arrays.
|
50
54
|
//
|
51
55
|
for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
|
52
|
-
// Break if the
|
56
|
+
// Break if the result array is empty.
|
57
|
+
// (Because intersecting anything with it will yield nothing)
|
53
58
|
//
|
54
|
-
if (RARRAY_LEN(
|
59
|
+
if (RARRAY_LEN(result_array) == 0) {
|
55
60
|
break;
|
56
61
|
}
|
57
|
-
|
58
|
-
//
|
62
|
+
|
63
|
+
// If the result array is currently larger than 10
|
64
|
+
// entries, use a hash for intersection, else
|
65
|
+
// use an array.
|
59
66
|
//
|
60
|
-
|
67
|
+
if (RARRAY_LEN(result_array) > 10) {
|
68
|
+
// Make a hash from the currently smallest version.
|
69
|
+
//
|
70
|
+
hash = ary_make_hash(result_array, 0);
|
61
71
|
|
62
|
-
|
63
|
-
|
64
|
-
|
72
|
+
// Clear for use as temp array.
|
73
|
+
//
|
74
|
+
rb_ary_clear(result_array);
|
65
75
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
76
|
+
// Get the current array.
|
77
|
+
//
|
78
|
+
current_array = rb_ary_entry(rb_array_of_arrays, i);
|
79
|
+
|
80
|
+
// Iterate through all array elements.
|
81
|
+
//
|
82
|
+
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
83
|
+
v = rb_ary_entry(current_array, j);
|
84
|
+
if (rb_hash_delete(hash, v) != Qnil) {
|
85
|
+
rb_ary_push(result_array, v);
|
86
|
+
}
|
87
|
+
}
|
88
|
+
} else {
|
89
|
+
// Make a new array from the currently smallest version.
|
90
|
+
//
|
91
|
+
ary = rb_ary_dup(result_array);
|
92
|
+
|
93
|
+
// Clear for use as temp array.
|
94
|
+
//
|
95
|
+
rb_ary_clear(result_array);
|
96
|
+
|
97
|
+
// Get the current array.
|
98
|
+
//
|
99
|
+
current_array = rb_ary_entry(rb_array_of_arrays, i);
|
100
|
+
|
101
|
+
// Iterate through all array elements.
|
102
|
+
//
|
103
|
+
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
104
|
+
v = rb_ary_entry(current_array, j);
|
105
|
+
if (rb_ary_delete(ary, v) != Qnil) {
|
106
|
+
rb_ary_push(result_array, v);
|
107
|
+
}
|
73
108
|
}
|
74
109
|
}
|
75
110
|
}
|
76
111
|
|
77
|
-
return
|
112
|
+
return result_array;
|
78
113
|
}
|
79
114
|
|
80
115
|
VALUE p_mPerformant, p_cArray;
|
data/lib/picky/loader.rb
CHANGED
data/lib/picky/query/token.rb
CHANGED
@@ -10,7 +10,7 @@ module Picky
|
|
10
10
|
# or whether it is a partial (bla*).
|
11
11
|
#
|
12
12
|
class Token
|
13
|
-
|
13
|
+
|
14
14
|
attr_reader :text, :original
|
15
15
|
attr_writer :similar
|
16
16
|
attr_writer :predefined_categories
|
@@ -237,6 +237,8 @@ module Picky
|
|
237
237
|
@text.gsub! @@illegals, EMPTY_STRING unless @text == EMPTY_STRING
|
238
238
|
end
|
239
239
|
def self.redefine_illegals
|
240
|
+
# TODO Double no similar and no partial, both ".
|
241
|
+
#
|
240
242
|
@@illegals = %r{[#@@no_similar_character#@@similar_character#@@no_partial_character#@@partial_character]}
|
241
243
|
end
|
242
244
|
redefine_illegals
|
@@ -273,49 +275,42 @@ module Picky
|
|
273
275
|
|
274
276
|
# Splits text into a qualifier and text.
|
275
277
|
#
|
276
|
-
@@qualifier_text_delimiter =
|
277
|
-
@@qualifiers_delimiter =
|
278
|
+
@@qualifier_text_delimiter = /:/
|
279
|
+
@@qualifiers_delimiter = /,/
|
280
|
+
@@qualifier_text_splitter = Splitter.new @@qualifier_text_delimiter
|
281
|
+
@@qualifiers_splitter = Splitter.new @@qualifiers_delimiter
|
278
282
|
def qualify
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
if @text.include? @@qualifier_text_delimiter
|
283
|
-
@qualifiers, @text = @text.split @@qualifier_text_delimiter, 2
|
284
|
-
if @text
|
285
|
-
@qualifiers = @qualifiers.split @@qualifiers_delimiter
|
286
|
-
else
|
287
|
-
@text, @qualifiers = @qualifiers, nil
|
288
|
-
end
|
283
|
+
@qualifiers, @text = @@qualifier_text_splitter.single @text
|
284
|
+
if @qualifiers
|
285
|
+
@qualifiers = @@qualifiers_splitter.multi @qualifiers
|
289
286
|
end
|
290
287
|
end
|
291
|
-
# Define a
|
288
|
+
# Define a regexp which separates the qualifier
|
292
289
|
# from the search text.
|
293
290
|
#
|
294
|
-
# Default is
|
295
|
-
#
|
296
|
-
# This is used in a String#split.
|
291
|
+
# Default is /:/.
|
297
292
|
#
|
298
293
|
# Example:
|
299
|
-
# Picky::Query::Token.qualifier_text_delimiter =
|
294
|
+
# Picky::Query::Token.qualifier_text_delimiter = /\?/
|
300
295
|
# try.search("text1?hello text2?world").ids.should == [1]
|
301
296
|
#
|
302
297
|
def self.qualifier_text_delimiter= character
|
303
298
|
@@qualifier_text_delimiter = character
|
299
|
+
@@qualifier_text_splitter = Splitter.new @@qualifier_text_delimiter
|
304
300
|
end
|
305
|
-
# Define a
|
301
|
+
# Define a regexp which separates the qualifiers
|
306
302
|
# (before the search text).
|
307
303
|
#
|
308
|
-
# Default is
|
309
|
-
#
|
310
|
-
# This is used in a String#split.
|
304
|
+
# Default is /,/.
|
311
305
|
#
|
312
306
|
# Example:
|
313
|
-
# Picky::Query::Token.qualifiers_delimiter =
|
307
|
+
# Picky::Query::Token.qualifiers_delimiter = /|/
|
314
308
|
# try.search("text1|text2:hello").ids.should == [1]
|
315
309
|
#
|
316
310
|
|
317
311
|
def self.qualifiers_delimiter= character
|
318
312
|
@@qualifiers_delimiter = character
|
313
|
+
@@qualifiers_splitter = Splitter.new @@qualifiers_delimiter
|
319
314
|
end
|
320
315
|
|
321
316
|
# Returns the qualifiers as an array.
|
data/lib/picky/query/tokens.rb
CHANGED
@@ -7,7 +7,7 @@ module Picky
|
|
7
7
|
# This class primarily handles switching through similar token constellations.
|
8
8
|
#
|
9
9
|
class Tokens
|
10
|
-
|
10
|
+
|
11
11
|
attr_reader :tokens, :ignore_unassigned
|
12
12
|
|
13
13
|
# Basically forwards to its internal tokens array.
|
@@ -25,11 +25,13 @@ module Picky
|
|
25
25
|
|
26
26
|
# Creates a new Tokens object from a number of Strings.
|
27
27
|
#
|
28
|
+
@@or_splitting_pattern = /\|/
|
29
|
+
@@splitter = Splitter.new @@or_splitting_pattern
|
28
30
|
def self.processed words, originals, ignore_unassigned = false
|
29
31
|
new(words.zip(originals).collect! do |word, original|
|
30
|
-
w, *middle, rest
|
32
|
+
w, *middle, rest = @@splitter.multi word
|
31
33
|
if rest
|
32
|
-
Or.new processed [w, *middle, rest], original.split(
|
34
|
+
Or.new processed [w, *middle, rest], original.split(@@or_splitting_pattern)
|
33
35
|
else
|
34
36
|
Token.processed w, original
|
35
37
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
class Splitter < StringScanner
|
4
|
+
|
5
|
+
def initialize delimiter
|
6
|
+
@delimiter = delimiter
|
7
|
+
super ''
|
8
|
+
end
|
9
|
+
|
10
|
+
def single text
|
11
|
+
self.string = text
|
12
|
+
skip_until @delimiter
|
13
|
+
[pre_match, post_match || string]
|
14
|
+
end
|
15
|
+
|
16
|
+
def multi text
|
17
|
+
self.string = text
|
18
|
+
if exist? @delimiter
|
19
|
+
text.split @delimiter
|
20
|
+
else
|
21
|
+
[text]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -1,19 +1,28 @@
|
|
1
|
-
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
class Tokenizer
|
4
|
+
|
5
|
+
class RegexpWrapper
|
6
|
+
|
7
|
+
def initialize regexp
|
8
|
+
@regexp = regexp
|
9
|
+
@splitter = Splitter.new @regexp
|
10
|
+
end
|
6
11
|
|
7
|
-
|
8
|
-
|
9
|
-
|
12
|
+
def split text
|
13
|
+
@splitter.multi text
|
14
|
+
end
|
10
15
|
|
11
|
-
|
12
|
-
|
13
|
-
|
16
|
+
def source
|
17
|
+
@regexp.source
|
18
|
+
end
|
14
19
|
|
15
|
-
|
16
|
-
|
17
|
-
|
20
|
+
def method_missing name, *args, &block
|
21
|
+
@regexp.send name, *args, &block
|
22
|
+
end
|
18
23
|
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
19
28
|
end
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -98,7 +98,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
98
98
|
# Note: We do not test against to_str since symbols do not work with String#split.
|
99
99
|
#
|
100
100
|
def splits_text_on thing
|
101
|
-
raise ArgumentError.new "#{__method__} takes a Regexp or
|
101
|
+
raise ArgumentError.new "#{__method__} takes a Regexp or a thing that responds to #split as argument, not a #{thing.class}." unless Regexp === thing || thing.respond_to?(:split)
|
102
102
|
@splits_text_on = if thing.respond_to? :split
|
103
103
|
thing
|
104
104
|
else
|
@@ -106,6 +106,8 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
def split text
|
109
|
+
# Does not create a new string if nothing is split.
|
110
|
+
#
|
109
111
|
@splits_text_on.split text
|
110
112
|
end
|
111
113
|
|
@@ -233,9 +235,9 @@ ERROR
|
|
233
235
|
# [[:token1, :token2], ["Original1", "Original2"]]
|
234
236
|
#
|
235
237
|
def tokenize text
|
236
|
-
text
|
238
|
+
text = preprocess text.to_s # processing the text
|
237
239
|
return empty_tokens if text.empty? # TODO blank?
|
238
|
-
words
|
240
|
+
words = pretokenize text # splitting and preparations for tokenizing
|
239
241
|
return empty_tokens if words.empty?
|
240
242
|
tokens = tokens_for words # creating tokens / strings
|
241
243
|
[tokens, words]
|
@@ -10,8 +10,8 @@ describe 'custom delimiters' do
|
|
10
10
|
Picky::Query::Token.similar_character = '~'
|
11
11
|
Picky::Query::Token.no_similar_character = '"'
|
12
12
|
Picky::Query::Token.range_character = '-'
|
13
|
-
Picky::Query::Token.qualifier_text_delimiter =
|
14
|
-
Picky::Query::Token.qualifiers_delimiter =
|
13
|
+
Picky::Query::Token.qualifier_text_delimiter = /:/
|
14
|
+
Picky::Query::Token.qualifiers_delimiter = /,/
|
15
15
|
end
|
16
16
|
|
17
17
|
context 'offers custom partial delimiters to be set' do
|
@@ -80,11 +80,11 @@ describe 'custom delimiters' do
|
|
80
80
|
try.search("text1:hello text2:world").ids.should == [1]
|
81
81
|
|
82
82
|
try.search("text1?hello text2?world").ids.should == []
|
83
|
-
Picky::Query::Token.qualifier_text_delimiter =
|
83
|
+
Picky::Query::Token.qualifier_text_delimiter = /\?/
|
84
84
|
try.search("text1?hello text2?world").ids.should == [1]
|
85
85
|
|
86
86
|
try.search("text1!text2?hello text2?world").ids.should == []
|
87
|
-
Picky::Query::Token.qualifiers_delimiter =
|
87
|
+
Picky::Query::Token.qualifiers_delimiter = /!/
|
88
88
|
try.search("text1!text2?hello text2?world").ids.should == [1]
|
89
89
|
end
|
90
90
|
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "Object Use" do
|
6
|
+
|
7
|
+
it 'is not too high' do
|
8
|
+
|
9
|
+
index = Picky::Index.new :object_use do
|
10
|
+
category :text1
|
11
|
+
category :text2
|
12
|
+
category :text3
|
13
|
+
category :text4
|
14
|
+
end
|
15
|
+
try = Picky::Search.new index
|
16
|
+
|
17
|
+
thing = Struct.new(:id, :text1, :text2, :text3, :text4)
|
18
|
+
index.add thing.new(1, 'one', 'two', 'three', 'four')
|
19
|
+
|
20
|
+
# Pre-run.
|
21
|
+
#
|
22
|
+
|
23
|
+
try.search 'one'
|
24
|
+
try.search 'one two three'
|
25
|
+
try.search 'text1:one'
|
26
|
+
try.search 'text1:one text2:two text3:three'
|
27
|
+
|
28
|
+
# Actual tests.
|
29
|
+
#
|
30
|
+
|
31
|
+
s = 'one'
|
32
|
+
result = mark do
|
33
|
+
try.search s
|
34
|
+
end
|
35
|
+
result.should == {} # No new strings since nothing is split.
|
36
|
+
|
37
|
+
s = 'one two three'
|
38
|
+
result = mark do
|
39
|
+
try.search s
|
40
|
+
end
|
41
|
+
result.should == {
|
42
|
+
"three" => 1,
|
43
|
+
"two" => 1,
|
44
|
+
"one" => 1,
|
45
|
+
'one two three' => 2 # TODO Is GC'd.
|
46
|
+
}
|
47
|
+
|
48
|
+
result = mark do
|
49
|
+
try.search 'text1:one'
|
50
|
+
end
|
51
|
+
result.should == {
|
52
|
+
"one" => 1,
|
53
|
+
"text1" => 1,
|
54
|
+
"text1:one" => 1
|
55
|
+
} # Only the necessary split strings.
|
56
|
+
|
57
|
+
s = 'text1:one text2:two text3:three'
|
58
|
+
result = mark do
|
59
|
+
try.search s
|
60
|
+
end
|
61
|
+
result.should == {
|
62
|
+
"three" => 1,
|
63
|
+
"two" => 1,
|
64
|
+
"one" => 1,
|
65
|
+
"text3" => 1,
|
66
|
+
"text2" => 1,
|
67
|
+
"text1" => 1,
|
68
|
+
"text3:three" => 1,
|
69
|
+
"text2:two" => 1,
|
70
|
+
"text1:one" => 1
|
71
|
+
} # Only the necessary split strings.
|
72
|
+
|
73
|
+
s = 'text1:one text2:two text3,text4:three'
|
74
|
+
result = mark do
|
75
|
+
try.search s
|
76
|
+
end
|
77
|
+
result.should == {
|
78
|
+
"three" => 1,
|
79
|
+
"two" => 1,
|
80
|
+
"one" => 1,
|
81
|
+
"text3,text4" => 2, # TODO
|
82
|
+
"text3" => 1,
|
83
|
+
"text4" => 1,
|
84
|
+
"text2" => 1,
|
85
|
+
"text1" => 1,
|
86
|
+
"text1:one" => 1,
|
87
|
+
"text2:two" => 1,
|
88
|
+
"text3,text4:three" => 1
|
89
|
+
}
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
@@ -151,19 +151,19 @@ describe Picky::Query::Token do
|
|
151
151
|
it_should_qualify 'with:qualifier', [['with'], 'qualifier']
|
152
152
|
it_should_qualify 'without qualifier', [nil, 'without qualifier']
|
153
153
|
it_should_qualify 'name:', [['name'], '']
|
154
|
-
it_should_qualify ':broken qualifier', [[],
|
154
|
+
it_should_qualify ':broken qualifier', [[''], 'broken qualifier'] # Unsure about that. Probably should recognize it as text.
|
155
155
|
it_should_qualify '', [nil, '']
|
156
156
|
it_should_qualify 'sp:text', [['sp'], 'text']
|
157
157
|
it_should_qualify '""', [nil, '""']
|
158
158
|
it_should_qualify 'name:', [['name'], '']
|
159
159
|
it_should_qualify 'name:hanke', [['name'], 'hanke']
|
160
160
|
it_should_qualify 'g:gaga', [['g'], 'gaga']
|
161
|
-
it_should_qualify ':nothing', [[],
|
161
|
+
it_should_qualify ':nothing', [[''], 'nothing']
|
162
162
|
it_should_qualify 'hello', [nil, 'hello']
|
163
163
|
it_should_qualify 'a:b:c', [['a'], 'b:c']
|
164
164
|
it_should_qualify 'a,b:c', [['a','b'], 'c']
|
165
165
|
it_should_qualify 'a,b,c:d', [['a','b','c'], 'd']
|
166
|
-
it_should_qualify ':', [[],
|
166
|
+
it_should_qualify ':', [[''], '']
|
167
167
|
it_should_qualify 'vorname:qualifier', [['vorname'], 'qualifier']
|
168
168
|
end
|
169
169
|
|
@@ -429,7 +429,7 @@ describe Picky::Query::Token do
|
|
429
429
|
context 'with missing qualifier' do
|
430
430
|
let(:token) { described_class.processed ':missingqualifier' }
|
431
431
|
it 'is correct' do
|
432
|
-
token.qualifiers.should == []
|
432
|
+
token.qualifiers.should == ['']
|
433
433
|
token.text.should == 'missingqualifier'
|
434
434
|
end
|
435
435
|
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Picky::Splitter do
|
4
|
+
|
5
|
+
describe "single" do
|
6
|
+
let(:splitter) { described_class.new /:/ }
|
7
|
+
it "splits right" do
|
8
|
+
splitter.single(':b').should == ['','b']
|
9
|
+
end
|
10
|
+
it "splits right" do
|
11
|
+
splitter.single('a:b').should == ['a','b']
|
12
|
+
end
|
13
|
+
it "splits right" do
|
14
|
+
splitter.single('a').should == [nil, 'a']
|
15
|
+
end
|
16
|
+
it "splits right" do
|
17
|
+
splitter.single('a:b c:d').should == ['a', 'b c:d']
|
18
|
+
end
|
19
|
+
it "returns the same string if not split" do
|
20
|
+
s = 'a'
|
21
|
+
splitter.single(s)[1].object_id.should == s.object_id
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "multi" do
|
26
|
+
let(:splitter) { described_class.new /\s/ }
|
27
|
+
it "splits right" do
|
28
|
+
splitter.multi(' b').should == ['', 'b']
|
29
|
+
end
|
30
|
+
it "splits right" do
|
31
|
+
splitter.multi('a b').should == ['a', 'b']
|
32
|
+
end
|
33
|
+
it "splits right" do
|
34
|
+
splitter.multi('a b c d').should == ['a', 'b', 'c', 'd']
|
35
|
+
end
|
36
|
+
it "splits right" do
|
37
|
+
splitter.multi('a').should == ['a']
|
38
|
+
end
|
39
|
+
it "returns the same string if not split" do
|
40
|
+
s = 'a'
|
41
|
+
splitter.multi(s).first.object_id.should == s.object_id
|
42
|
+
end
|
43
|
+
# it 'is faster than split' do
|
44
|
+
# pattern = /\s/
|
45
|
+
# amount = 1000
|
46
|
+
# text = 'abcd'
|
47
|
+
# split = performance_of do
|
48
|
+
# amount.times { text.split pattern }
|
49
|
+
# end
|
50
|
+
# multi = performance_of do
|
51
|
+
# amount.times { splitter.multi text, pattern }
|
52
|
+
# end
|
53
|
+
# split.should < multi
|
54
|
+
# end
|
55
|
+
# it 'is slower than split (but uses less memory in the non-split case)' do
|
56
|
+
# pattern = /\s/
|
57
|
+
# amount = 1000
|
58
|
+
# text = 'a b'
|
59
|
+
# multi = performance_of do
|
60
|
+
# amount.times { splitter.multi text, pattern }
|
61
|
+
# end
|
62
|
+
# split = performance_of do
|
63
|
+
# amount.times { text.split pattern }
|
64
|
+
# end
|
65
|
+
# # p split
|
66
|
+
# # p multi
|
67
|
+
# end
|
68
|
+
# it 'is slower than split (but uses less memory in the non-split case)' do
|
69
|
+
# pattern = /\s/
|
70
|
+
# amount = 1000
|
71
|
+
# text = 'a b c d'
|
72
|
+
# multi = performance_of do
|
73
|
+
# amount.times { splitter.multi text, pattern }
|
74
|
+
# end
|
75
|
+
# split = performance_of do
|
76
|
+
# amount.times { text.split pattern }
|
77
|
+
# end
|
78
|
+
# # p split
|
79
|
+
# # p multi
|
80
|
+
# end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
data/spec/performant_spec.rb
CHANGED
@@ -122,6 +122,13 @@ describe Performant::Array do
|
|
122
122
|
#
|
123
123
|
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
|
124
124
|
end
|
125
|
+
it "should be optimal for many small arrays of length == 10" do
|
126
|
+
arys = [('1'..'10').to_a, ('10'..'20').to_a, ['10'] + ('10000'..'20000').to_a]
|
127
|
+
|
128
|
+
# Brute force - note that it is slower than the Symbols/Integers version.
|
129
|
+
#
|
130
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.0015
|
131
|
+
end
|
125
132
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
126
133
|
arys = [('1'..'50').to_a, ('10000'..'20000').to_a << 7]
|
127
134
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Hanke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ~>
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 4.
|
47
|
+
version: 4.19.0
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 4.
|
54
|
+
version: 4.19.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: text
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -243,6 +243,7 @@ files:
|
|
243
243
|
- lib/picky/sinatra/index_actions.rb
|
244
244
|
- lib/picky/sinatra.rb
|
245
245
|
- lib/picky/source.rb
|
246
|
+
- lib/picky/splitter.rb
|
246
247
|
- lib/picky/splitters/automatic.rb
|
247
248
|
- lib/picky/statistics.rb
|
248
249
|
- lib/picky/tasks.rb
|
@@ -289,6 +290,7 @@ files:
|
|
289
290
|
- spec/functional/multi_index_qualifier_spec.rb
|
290
291
|
- spec/functional/no_tokenize_spec.rb
|
291
292
|
- spec/functional/non_specific_ids_larger_than_20_spec.rb
|
293
|
+
- spec/functional/object_use_spec.rb
|
292
294
|
- spec/functional/only_spec.rb
|
293
295
|
- spec/functional/or_spec.rb
|
294
296
|
- spec/functional/pool_spec.rb
|
@@ -406,6 +408,7 @@ files:
|
|
406
408
|
- spec/lib/sinatra_spec.rb
|
407
409
|
- spec/lib/solr/schema_generator_spec.rb
|
408
410
|
- spec/lib/source_spec.rb
|
411
|
+
- spec/lib/splitter_spec.rb
|
409
412
|
- spec/lib/statistics_spec.rb
|
410
413
|
- spec/lib/tasks/try_spec.rb
|
411
414
|
- spec/lib/tokenizer_spec.rb
|
@@ -463,6 +466,7 @@ test_files:
|
|
463
466
|
- spec/functional/multi_index_qualifier_spec.rb
|
464
467
|
- spec/functional/no_tokenize_spec.rb
|
465
468
|
- spec/functional/non_specific_ids_larger_than_20_spec.rb
|
469
|
+
- spec/functional/object_use_spec.rb
|
466
470
|
- spec/functional/only_spec.rb
|
467
471
|
- spec/functional/or_spec.rb
|
468
472
|
- spec/functional/pool_spec.rb
|
@@ -580,6 +584,7 @@ test_files:
|
|
580
584
|
- spec/lib/sinatra_spec.rb
|
581
585
|
- spec/lib/solr/schema_generator_spec.rb
|
582
586
|
- spec/lib/source_spec.rb
|
587
|
+
- spec/lib/splitter_spec.rb
|
583
588
|
- spec/lib/statistics_spec.rb
|
584
589
|
- spec/lib/tasks/try_spec.rb
|
585
590
|
- spec/lib/tokenizer_spec.rb
|