picky 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,3 @@
|
|
1
|
-
// Note: This is the Ruby 1.9 version.
|
2
|
-
//
|
3
1
|
#include "ruby.h"
|
4
2
|
|
5
3
|
// Copying internal ruby methods.
|
@@ -14,7 +12,6 @@ static inline VALUE rb_ary_elt(ary, offset)
|
|
14
12
|
}
|
15
13
|
return RARRAY_PTR(ary)[offset];
|
16
14
|
}
|
17
|
-
VALUE rb_ary_make_hash(VALUE, VALUE);
|
18
15
|
static VALUE ary_make_hash(ary1, ary2)
|
19
16
|
VALUE ary1, ary2;
|
20
17
|
{
|
@@ -32,51 +29,50 @@ static VALUE ary_make_hash(ary1, ary2)
|
|
32
29
|
return hash;
|
33
30
|
}
|
34
31
|
|
35
|
-
// Comparison functions.
|
36
|
-
//
|
37
|
-
inline int intvaluecmp(VALUE a, VALUE b) {
|
38
|
-
return FIX2INT(a) - FIX2INT(b);
|
39
|
-
}
|
40
|
-
inline int intcmp(const int * a, const int * b) {
|
41
|
-
return (*a - *b);
|
42
|
-
}
|
43
|
-
inline long longcmp(const void * a, const void * b) {
|
44
|
-
return (*(long*) a - *(long*) b);
|
45
|
-
}
|
46
|
-
|
47
32
|
// This version just calls the & consecutively for all arrays.
|
48
|
-
//
|
33
|
+
//
|
34
|
+
// The arrays need to be pre-sorted small to large.
|
35
|
+
//
|
49
36
|
inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
|
50
|
-
//
|
37
|
+
// Counters.
|
38
|
+
//
|
51
39
|
long i, j;
|
52
|
-
|
53
|
-
//
|
40
|
+
|
41
|
+
// Vars.
|
42
|
+
//
|
54
43
|
struct RArray *rb_array_of_arrays;
|
55
|
-
|
56
|
-
|
44
|
+
VALUE smallest_array;
|
45
|
+
VALUE current_array;
|
57
46
|
VALUE hash;
|
58
|
-
|
59
|
-
//
|
47
|
+
|
48
|
+
// Temps.
|
49
|
+
//
|
60
50
|
VALUE v, vv;
|
61
|
-
|
62
|
-
//
|
51
|
+
|
52
|
+
// Conversions.
|
53
|
+
//
|
63
54
|
rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
64
|
-
smallest_array = RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
|
65
|
-
|
66
|
-
//
|
55
|
+
smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
|
56
|
+
|
57
|
+
// Iterate through all arrays.
|
58
|
+
//
|
67
59
|
for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
|
68
60
|
// Break if the smallest array is empty
|
69
61
|
if (RARRAY_LEN(smallest_array) == 0) {
|
70
62
|
break;
|
71
63
|
}
|
72
|
-
|
73
|
-
//
|
64
|
+
|
65
|
+
// Make a hash from the currently smallest version.
|
66
|
+
//
|
74
67
|
hash = ary_make_hash(smallest_array, 0);
|
75
|
-
|
68
|
+
|
69
|
+
// Clear for use as temp array.
|
70
|
+
//
|
76
71
|
rb_ary_clear(smallest_array);
|
77
|
-
|
72
|
+
|
73
|
+
// Iterate through all array elements.
|
74
|
+
//
|
78
75
|
current_array = RARRAY_PTR(rb_array_of_arrays)[i];
|
79
|
-
// iterate through all array elements
|
80
76
|
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
81
77
|
v = vv = rb_ary_elt(current_array, j);
|
82
78
|
if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
|
@@ -84,256 +80,14 @@ inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of
|
|
84
80
|
}
|
85
81
|
}
|
86
82
|
}
|
87
|
-
|
83
|
+
|
88
84
|
return smallest_array;
|
89
85
|
}
|
90
86
|
|
91
|
-
// Brute force algorithm to find the intersection of an array of length sorted, unsorted arrays.
|
92
|
-
// This algorithm can be faster than others for small arrays.
|
93
|
-
//
|
94
|
-
// inline VALUE brute_force_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
|
95
|
-
// // counters
|
96
|
-
// long i, j, k;
|
97
|
-
//
|
98
|
-
// // structs
|
99
|
-
// struct RArray *rb_array_of_arrays;
|
100
|
-
// struct RArray *candidate_answer_set;
|
101
|
-
// struct RArray *current_set;
|
102
|
-
//
|
103
|
-
// // conversions
|
104
|
-
// rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
105
|
-
//
|
106
|
-
// // temps
|
107
|
-
// VALUE e;
|
108
|
-
// unsigned char found;
|
109
|
-
//
|
110
|
-
// // Let the smallest set s[0] be the candidate answer set
|
111
|
-
// // Note: Need a duplicate
|
112
|
-
// candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
|
113
|
-
//
|
114
|
-
// // For each entry in candidate anser set
|
115
|
-
// // Get current value
|
116
|
-
// for(i = 0; i < candidate_answer_set->len; i++) {
|
117
|
-
// e = candidate_answer_set->ptr[i];
|
118
|
-
//
|
119
|
-
// // Find the current value in other arrays
|
120
|
-
// // if not found, break
|
121
|
-
// for(j = 1; j < rb_array_of_arrays->len; j++) {
|
122
|
-
// current_set = RARRAY(rb_array_of_arrays->ptr[j]);
|
123
|
-
// found = 0;
|
124
|
-
//
|
125
|
-
// // Find with a linear search
|
126
|
-
// for(k = 0; k < current_set->len; k++) {
|
127
|
-
// if (e == current_set->ptr[k]) {
|
128
|
-
// found = 1;
|
129
|
-
// break;
|
130
|
-
// }
|
131
|
-
// }
|
132
|
-
//
|
133
|
-
// // break if not found
|
134
|
-
// if (!found) {
|
135
|
-
// break;
|
136
|
-
// }
|
137
|
-
// }
|
138
|
-
//
|
139
|
-
// // remove from candidate answer set if not found
|
140
|
-
// if (!found) {
|
141
|
-
// candidate_answer_set->ptr[i] = Qnil;
|
142
|
-
// }
|
143
|
-
// }
|
144
|
-
//
|
145
|
-
// // compact the candidate answer set
|
146
|
-
// // rb_ary_compact_bang(candidate_answer_set);
|
147
|
-
// rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
|
148
|
-
//
|
149
|
-
// return candidate_answer_set;
|
150
|
-
// }
|
151
|
-
|
152
|
-
// inline VALUE intersect_unique(VALUE self, VALUE length_sorted_array_of_arrays) {
|
153
|
-
// // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
|
154
|
-
//
|
155
|
-
// // structs
|
156
|
-
// struct RArray *result;
|
157
|
-
// struct RArray *rb_array_of_arrays;
|
158
|
-
//
|
159
|
-
// // conversions
|
160
|
-
// rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
161
|
-
//
|
162
|
-
// // TODO
|
163
|
-
//
|
164
|
-
// return result;
|
165
|
-
// }
|
166
|
-
|
167
|
-
// Generates the intersection of multiple
|
168
|
-
//
|
169
|
-
// inline VALUE sorting_intersect_multiple(VALUE self, VALUE length_sorted_array_of_arrays) {
|
170
|
-
// // TODO
|
171
|
-
// }
|
172
|
-
|
173
|
-
// Generates the intersection of multiple length sorted, sorted arrays
|
174
|
-
//
|
175
|
-
// inline VALUE intersect_multiple_sorted(VALUE self, VALUE _length_sorted_array_of_arrays) {
|
176
|
-
// VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
|
177
|
-
//
|
178
|
-
// // counters
|
179
|
-
// long i, j;
|
180
|
-
// long current_set_position, current_answer_set_position;
|
181
|
-
//
|
182
|
-
// // structs
|
183
|
-
// struct RArray *rb_array_of_arrays;
|
184
|
-
// struct RArray *candidate_answer_set;
|
185
|
-
// struct RArray *current_set;
|
186
|
-
//
|
187
|
-
// // temps
|
188
|
-
// long e;
|
189
|
-
//
|
190
|
-
// // conversions
|
191
|
-
// rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
192
|
-
//
|
193
|
-
// // Let the smallest set s[0] be the candidate answer set
|
194
|
-
// // Note: Need a duplicate
|
195
|
-
// candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
|
196
|
-
//
|
197
|
-
// // For each set s[i], i = 1 .. k do
|
198
|
-
// for(i = 1; i < rb_array_of_arrays->len; i++) {
|
199
|
-
// current_set = RARRAY(rb_array_of_arrays->ptr[i]);
|
200
|
-
// current_set_position = 0;
|
201
|
-
//
|
202
|
-
// // for each element e in the candidate answer set
|
203
|
-
// for(j = 0; j < candidate_answer_set->len; j++) {
|
204
|
-
// e = candidate_answer_set->ptr[j];
|
205
|
-
//
|
206
|
-
// // search for e in the range l[i] to size(s[i])
|
207
|
-
// // and update l[i] to the last position probed in the previous step
|
208
|
-
// // if e was not found then
|
209
|
-
// if (bsearch(
|
210
|
-
// &e,
|
211
|
-
// ¤t_set->ptr[current_set_position],
|
212
|
-
// (current_set->len - current_set_position),
|
213
|
-
// sizeof(VALUE), //sizeof(current_set->ptr[0]),
|
214
|
-
// intcmp //longcmp
|
215
|
-
// ) == NULL) {
|
216
|
-
//
|
217
|
-
// // remove e from the candidate answer set
|
218
|
-
// // and advance e to the next element in the answer set
|
219
|
-
// // rb_ary_delete_at(candidate_answer_set, j);
|
220
|
-
// candidate_answer_set->ptr[j] = Qnil;
|
221
|
-
// }
|
222
|
-
// current_set_position = j - 1;
|
223
|
-
// }
|
224
|
-
//
|
225
|
-
// // compact the candidate answer set
|
226
|
-
// // rb_ary_compact_bang(candidate_answer_set);
|
227
|
-
// rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
|
228
|
-
// }
|
229
|
-
//
|
230
|
-
// return candidate_answer_set;
|
231
|
-
// }
|
232
|
-
|
233
|
-
// Trying to make a custom version of Matz' ary &
|
234
|
-
//
|
235
|
-
// Differences:
|
236
|
-
// * Multiple arrays
|
237
|
-
// * No to_ary
|
238
|
-
// * Smallest array is used to make hash
|
239
|
-
// Note: Assumes that whatever is given in as array of arrays is sorted by array sizes.
|
240
|
-
//
|
241
|
-
// static VALUE rb_ary_and(ary1, ary2) VALUE ary1, ary2; {
|
242
|
-
// static VALUE intersect_multiple_with_hash(VALUE self, VALUE _length_sorted_array_of_arrays) {
|
243
|
-
// // VALUE hash, ary3, v, vv;
|
244
|
-
// // long i;
|
245
|
-
// //
|
246
|
-
// // ary2 = to_ary(ary2);
|
247
|
-
// // ary3 = rb_ary_new2(RARRAY(ary1)->len < RARRAY(ary2)->len ?
|
248
|
-
// // RARRAY(ary1)->len : RARRAY(ary2)->len);
|
249
|
-
// // hash = ary_make_hash(ary2, 0);
|
250
|
-
// //
|
251
|
-
// // for (i=0; i<RARRAY(ary1)->len; i++) {
|
252
|
-
// // v = vv = rb_ary_elt(ary1, i);
|
253
|
-
// // if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
|
254
|
-
// // rb_ary_push(ary3, v);
|
255
|
-
// // }
|
256
|
-
// // }
|
257
|
-
// //
|
258
|
-
// // return ary3;
|
259
|
-
// VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
|
260
|
-
//
|
261
|
-
// // structs
|
262
|
-
// struct RArray *candidate_answer_set;
|
263
|
-
// struct RArray *current_set;
|
264
|
-
//
|
265
|
-
// // temps
|
266
|
-
// VALUE hash, v, vv;
|
267
|
-
// long i, j, k;
|
268
|
-
//
|
269
|
-
// // Get smallest array size
|
270
|
-
// candidate_answer_set = rb_ary_new2((RARRAY(rb_array_of_arrays->ptr[0])->len);
|
271
|
-
//
|
272
|
-
// hash = ary_make_hash(RARRAY(rb_array_of_arrays->ptr[0]), 0);
|
273
|
-
//
|
274
|
-
// // For each entry in candidate answer set
|
275
|
-
// // Get current value
|
276
|
-
// for(i = 0; i < candidate_answer_set->len; i++) {
|
277
|
-
// // e = candidate_answer_set->ptr[i];
|
278
|
-
// v = vv = rb_ary_elt(candidate_answer_set, i);
|
279
|
-
//
|
280
|
-
// // Find the current value in other arrays
|
281
|
-
// // if not found, break
|
282
|
-
// for(j = 1; j < rb_array_of_arrays->len; j++) {
|
283
|
-
// current_set = RARRAY(rb_array_of_arrays->ptr[j]);
|
284
|
-
// found = 0;
|
285
|
-
//
|
286
|
-
// // Find with a linear search
|
287
|
-
// for(k = 0; k < current_set->len; k++) {
|
288
|
-
// // if (e == current_set->ptr[k]) {
|
289
|
-
// if (st_delete(RHASH(hash)->tbl, (unsigned long*)&vv, 0))
|
290
|
-
// found = 1;
|
291
|
-
// break;
|
292
|
-
// }
|
293
|
-
// }
|
294
|
-
//
|
295
|
-
// // break if not found
|
296
|
-
// if (!found) {
|
297
|
-
// break;
|
298
|
-
// }
|
299
|
-
// }
|
300
|
-
//
|
301
|
-
// // remove from candidate answer set if not found
|
302
|
-
// if (!found) {
|
303
|
-
// rb_ary_push(result, v);
|
304
|
-
// // candidate_answer_set->ptr[i] = Qnil;
|
305
|
-
// }
|
306
|
-
// }
|
307
|
-
//
|
308
|
-
// // compact the candidate answer set
|
309
|
-
// // rb_ary_compact_bang(candidate_answer_set);
|
310
|
-
// rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
|
311
|
-
//
|
312
|
-
// return candidate_answer_set;
|
313
|
-
// }
|
314
|
-
|
315
|
-
// VALUE rb_ary_clear_bang(ary) VALUE ary; {
|
316
|
-
// rb_ary_modify(ary);
|
317
|
-
// ARY_SET_LEN(ary, 0);
|
318
|
-
// // capa stays the same
|
319
|
-
// // if (ARY_DEFAULT_SIZE * 2 < RARRAY(ary)->aux.capa) {
|
320
|
-
// // REALLOC_N(RARRAY(ary)->ptr, VALUE, ARY_DEFAULT_SIZE * 2);
|
321
|
-
// // RARRAY(ary)->aux.capa = ARY_DEFAULT_SIZE * 2;
|
322
|
-
// // }
|
323
|
-
// return ary;
|
324
|
-
// }
|
325
|
-
|
326
87
|
VALUE p_mPerformant, p_cArray;
|
327
88
|
|
328
89
|
void Init_performant() {
|
329
90
|
p_mPerformant = rb_define_module("Performant");
|
330
91
|
p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
|
331
|
-
// p_cArray = rb_define_module_under(p_mPerformant, "Array");
|
332
|
-
|
333
|
-
// rb_define_method(rb_cArray, "clear!", rb_ary_clear_bang, 0);
|
334
|
-
|
335
92
|
rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
|
336
|
-
// rb_define_singleton_method(p_cArray, "brute_force_intersect", brute_force_intersect, 1);
|
337
|
-
// rb_define_singleton_method(p_cArray, "intersect_multiple_sorted", intersect_multiple_sorted, 1);
|
338
|
-
// rb_define_singleton_method(p_cArray, "intersect_multiple_with_hash", intersect_multiple_sorted_with_hash, 1);
|
339
93
|
}
|
@@ -25,7 +25,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
25
25
|
),
|
26
26
|
field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
|
27
27
|
field(:author, :qualifiers => [:s, :author, :auteur]),
|
28
|
-
field(:isbn, :qualifiers => [:i, :isbn])
|
28
|
+
field(:isbn, :qualifiers => [:i, :isbn], :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
|
29
29
|
end
|
30
30
|
|
31
31
|
queries do
|
@@ -30,6 +30,20 @@ describe Cacher::Partial::Subtoken do
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
context 'down_to set' do
|
33
|
+
context "large down_to" do
|
34
|
+
before(:each) do
|
35
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => 10
|
36
|
+
end
|
37
|
+
describe 'generate_from' do
|
38
|
+
it 'should generate the right index' do
|
39
|
+
@cacher.generate_from( :florian => [1], :'01234567890' => [2] ).should == {
|
40
|
+
:florian => [1],
|
41
|
+
:'01234567890' => [2],
|
42
|
+
:'0123456789' => [2]
|
43
|
+
}
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
33
47
|
context 'default starting_at' do
|
34
48
|
before(:each) do
|
35
49
|
@cacher = Cacher::Partial::Subtoken.new :down_to => 4
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 3
|
9
|
+
version: 0.0.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-09-
|
17
|
+
date: 2010-09-30 00:00:00 +02:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|