picky 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,3 @@
|
|
1
|
-
// Note: This is the Ruby 1.9 version.
|
2
|
-
//
|
3
1
|
#include "ruby.h"
|
4
2
|
|
5
3
|
// Copying internal ruby methods.
|
@@ -14,7 +12,6 @@ static inline VALUE rb_ary_elt(ary, offset)
|
|
14
12
|
}
|
15
13
|
return RARRAY_PTR(ary)[offset];
|
16
14
|
}
|
17
|
-
VALUE rb_ary_make_hash(VALUE, VALUE);
|
18
15
|
static VALUE ary_make_hash(ary1, ary2)
|
19
16
|
VALUE ary1, ary2;
|
20
17
|
{
|
@@ -32,51 +29,50 @@ static VALUE ary_make_hash(ary1, ary2)
|
|
32
29
|
return hash;
|
33
30
|
}
|
34
31
|
|
35
|
-
// Comparison functions.
|
36
|
-
//
|
37
|
-
inline int intvaluecmp(VALUE a, VALUE b) {
|
38
|
-
return FIX2INT(a) - FIX2INT(b);
|
39
|
-
}
|
40
|
-
inline int intcmp(const int * a, const int * b) {
|
41
|
-
return (*a - *b);
|
42
|
-
}
|
43
|
-
inline long longcmp(const void * a, const void * b) {
|
44
|
-
return (*(long*) a - *(long*) b);
|
45
|
-
}
|
46
|
-
|
47
32
|
// This version just calls the & consecutively for all arrays.
|
48
|
-
//
|
33
|
+
//
|
34
|
+
// The arrays need to be pre-sorted small to large.
|
35
|
+
//
|
49
36
|
inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
|
50
|
-
//
|
37
|
+
// Counters.
|
38
|
+
//
|
51
39
|
long i, j;
|
52
|
-
|
53
|
-
//
|
40
|
+
|
41
|
+
// Vars.
|
42
|
+
//
|
54
43
|
struct RArray *rb_array_of_arrays;
|
55
|
-
|
56
|
-
|
44
|
+
VALUE smallest_array;
|
45
|
+
VALUE current_array;
|
57
46
|
VALUE hash;
|
58
|
-
|
59
|
-
//
|
47
|
+
|
48
|
+
// Temps.
|
49
|
+
//
|
60
50
|
VALUE v, vv;
|
61
|
-
|
62
|
-
//
|
51
|
+
|
52
|
+
// Conversions.
|
53
|
+
//
|
63
54
|
rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
64
|
-
smallest_array = RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
|
65
|
-
|
66
|
-
//
|
55
|
+
smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
|
56
|
+
|
57
|
+
// Iterate through all arrays.
|
58
|
+
//
|
67
59
|
for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
|
68
60
|
// Break if the smallest array is empty
|
69
61
|
if (RARRAY_LEN(smallest_array) == 0) {
|
70
62
|
break;
|
71
63
|
}
|
72
|
-
|
73
|
-
//
|
64
|
+
|
65
|
+
// Make a hash from the currently smallest version.
|
66
|
+
//
|
74
67
|
hash = ary_make_hash(smallest_array, 0);
|
75
|
-
|
68
|
+
|
69
|
+
// Clear for use as temp array.
|
70
|
+
//
|
76
71
|
rb_ary_clear(smallest_array);
|
77
|
-
|
72
|
+
|
73
|
+
// Iterate through all array elements.
|
74
|
+
//
|
78
75
|
current_array = RARRAY_PTR(rb_array_of_arrays)[i];
|
79
|
-
// iterate through all array elements
|
80
76
|
for (j = 0; j < RARRAY_LEN(current_array); j++) {
|
81
77
|
v = vv = rb_ary_elt(current_array, j);
|
82
78
|
if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
|
@@ -84,256 +80,14 @@ inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of
|
|
84
80
|
}
|
85
81
|
}
|
86
82
|
}
|
87
|
-
|
83
|
+
|
88
84
|
return smallest_array;
|
89
85
|
}
|
90
86
|
|
91
|
-
// Brute force algorithm to find the intersection of an array of length sorted, unsorted arrays.
|
92
|
-
// This algorithm can be faster than others for small arrays.
|
93
|
-
//
|
94
|
-
// inline VALUE brute_force_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
|
95
|
-
// // counters
|
96
|
-
// long i, j, k;
|
97
|
-
//
|
98
|
-
// // structs
|
99
|
-
// struct RArray *rb_array_of_arrays;
|
100
|
-
// struct RArray *candidate_answer_set;
|
101
|
-
// struct RArray *current_set;
|
102
|
-
//
|
103
|
-
// // conversions
|
104
|
-
// rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
105
|
-
//
|
106
|
-
// // temps
|
107
|
-
// VALUE e;
|
108
|
-
// unsigned char found;
|
109
|
-
//
|
110
|
-
// // Let the smallest set s[0] be the candidate answer set
|
111
|
-
// // Note: Need a duplicate
|
112
|
-
// candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
|
113
|
-
//
|
114
|
-
// // For each entry in candidate anser set
|
115
|
-
// // Get current value
|
116
|
-
// for(i = 0; i < candidate_answer_set->len; i++) {
|
117
|
-
// e = candidate_answer_set->ptr[i];
|
118
|
-
//
|
119
|
-
// // Find the current value in other arrays
|
120
|
-
// // if not found, break
|
121
|
-
// for(j = 1; j < rb_array_of_arrays->len; j++) {
|
122
|
-
// current_set = RARRAY(rb_array_of_arrays->ptr[j]);
|
123
|
-
// found = 0;
|
124
|
-
//
|
125
|
-
// // Find with a linear search
|
126
|
-
// for(k = 0; k < current_set->len; k++) {
|
127
|
-
// if (e == current_set->ptr[k]) {
|
128
|
-
// found = 1;
|
129
|
-
// break;
|
130
|
-
// }
|
131
|
-
// }
|
132
|
-
//
|
133
|
-
// // break if not found
|
134
|
-
// if (!found) {
|
135
|
-
// break;
|
136
|
-
// }
|
137
|
-
// }
|
138
|
-
//
|
139
|
-
// // remove from candidate answer set if not found
|
140
|
-
// if (!found) {
|
141
|
-
// candidate_answer_set->ptr[i] = Qnil;
|
142
|
-
// }
|
143
|
-
// }
|
144
|
-
//
|
145
|
-
// // compact the candidate answer set
|
146
|
-
// // rb_ary_compact_bang(candidate_answer_set);
|
147
|
-
// rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
|
148
|
-
//
|
149
|
-
// return candidate_answer_set;
|
150
|
-
// }
|
151
|
-
|
152
|
-
// inline VALUE intersect_unique(VALUE self, VALUE length_sorted_array_of_arrays) {
|
153
|
-
// // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
|
154
|
-
//
|
155
|
-
// // structs
|
156
|
-
// struct RArray *result;
|
157
|
-
// struct RArray *rb_array_of_arrays;
|
158
|
-
//
|
159
|
-
// // conversions
|
160
|
-
// rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
161
|
-
//
|
162
|
-
// // TODO
|
163
|
-
//
|
164
|
-
// return result;
|
165
|
-
// }
|
166
|
-
|
167
|
-
// Generates the intersection of multiple
|
168
|
-
//
|
169
|
-
// inline VALUE sorting_intersect_multiple(VALUE self, VALUE length_sorted_array_of_arrays) {
|
170
|
-
// // TODO
|
171
|
-
// }
|
172
|
-
|
173
|
-
// Generates the intersection of multiple length sorted, sorted arrays
|
174
|
-
//
|
175
|
-
// inline VALUE intersect_multiple_sorted(VALUE self, VALUE _length_sorted_array_of_arrays) {
|
176
|
-
// VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
|
177
|
-
//
|
178
|
-
// // counters
|
179
|
-
// long i, j;
|
180
|
-
// long current_set_position, current_answer_set_position;
|
181
|
-
//
|
182
|
-
// // structs
|
183
|
-
// struct RArray *rb_array_of_arrays;
|
184
|
-
// struct RArray *candidate_answer_set;
|
185
|
-
// struct RArray *current_set;
|
186
|
-
//
|
187
|
-
// // temps
|
188
|
-
// long e;
|
189
|
-
//
|
190
|
-
// // conversions
|
191
|
-
// rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
|
192
|
-
//
|
193
|
-
// // Let the smallest set s[0] be the candidate answer set
|
194
|
-
// // Note: Need a duplicate
|
195
|
-
// candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
|
196
|
-
//
|
197
|
-
// // For each set s[i], i = 1 .. k do
|
198
|
-
// for(i = 1; i < rb_array_of_arrays->len; i++) {
|
199
|
-
// current_set = RARRAY(rb_array_of_arrays->ptr[i]);
|
200
|
-
// current_set_position = 0;
|
201
|
-
//
|
202
|
-
// // for each element e in the candidate answer set
|
203
|
-
// for(j = 0; j < candidate_answer_set->len; j++) {
|
204
|
-
// e = candidate_answer_set->ptr[j];
|
205
|
-
//
|
206
|
-
// // search for e in the range l[i] to size(s[i])
|
207
|
-
// // and update l[i] to the last position probed in the previous step
|
208
|
-
// // if e was not found then
|
209
|
-
// if (bsearch(
|
210
|
-
// &e,
|
211
|
-
// ¤t_set->ptr[current_set_position],
|
212
|
-
// (current_set->len - current_set_position),
|
213
|
-
// sizeof(VALUE), //sizeof(current_set->ptr[0]),
|
214
|
-
// intcmp //longcmp
|
215
|
-
// ) == NULL) {
|
216
|
-
//
|
217
|
-
// // remove e from the candidate answer set
|
218
|
-
// // and advance e to the next element in the answer set
|
219
|
-
// // rb_ary_delete_at(candidate_answer_set, j);
|
220
|
-
// candidate_answer_set->ptr[j] = Qnil;
|
221
|
-
// }
|
222
|
-
// current_set_position = j - 1;
|
223
|
-
// }
|
224
|
-
//
|
225
|
-
// // compact the candidate answer set
|
226
|
-
// // rb_ary_compact_bang(candidate_answer_set);
|
227
|
-
// rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
|
228
|
-
// }
|
229
|
-
//
|
230
|
-
// return candidate_answer_set;
|
231
|
-
// }
|
232
|
-
|
233
|
-
// Trying to make a custom version of Matz' ary &
|
234
|
-
//
|
235
|
-
// Differences:
|
236
|
-
// * Multiple arrays
|
237
|
-
// * No to_ary
|
238
|
-
// * Smallest array is used to make hash
|
239
|
-
// Note: Assumes that whatever is given in as array of arrays is sorted by array sizes.
|
240
|
-
//
|
241
|
-
// static VALUE rb_ary_and(ary1, ary2) VALUE ary1, ary2; {
|
242
|
-
// static VALUE intersect_multiple_with_hash(VALUE self, VALUE _length_sorted_array_of_arrays) {
|
243
|
-
// // VALUE hash, ary3, v, vv;
|
244
|
-
// // long i;
|
245
|
-
// //
|
246
|
-
// // ary2 = to_ary(ary2);
|
247
|
-
// // ary3 = rb_ary_new2(RARRAY(ary1)->len < RARRAY(ary2)->len ?
|
248
|
-
// // RARRAY(ary1)->len : RARRAY(ary2)->len);
|
249
|
-
// // hash = ary_make_hash(ary2, 0);
|
250
|
-
// //
|
251
|
-
// // for (i=0; i<RARRAY(ary1)->len; i++) {
|
252
|
-
// // v = vv = rb_ary_elt(ary1, i);
|
253
|
-
// // if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
|
254
|
-
// // rb_ary_push(ary3, v);
|
255
|
-
// // }
|
256
|
-
// // }
|
257
|
-
// //
|
258
|
-
// // return ary3;
|
259
|
-
// VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
|
260
|
-
//
|
261
|
-
// // structs
|
262
|
-
// struct RArray *candidate_answer_set;
|
263
|
-
// struct RArray *current_set;
|
264
|
-
//
|
265
|
-
// // temps
|
266
|
-
// VALUE hash, v, vv;
|
267
|
-
// long i, j, k;
|
268
|
-
//
|
269
|
-
// // Get smallest array size
|
270
|
-
// candidate_answer_set = rb_ary_new2((RARRAY(rb_array_of_arrays->ptr[0])->len);
|
271
|
-
//
|
272
|
-
// hash = ary_make_hash(RARRAY(rb_array_of_arrays->ptr[0]), 0);
|
273
|
-
//
|
274
|
-
// // For each entry in candidate answer set
|
275
|
-
// // Get current value
|
276
|
-
// for(i = 0; i < candidate_answer_set->len; i++) {
|
277
|
-
// // e = candidate_answer_set->ptr[i];
|
278
|
-
// v = vv = rb_ary_elt(candidate_answer_set, i);
|
279
|
-
//
|
280
|
-
// // Find the current value in other arrays
|
281
|
-
// // if not found, break
|
282
|
-
// for(j = 1; j < rb_array_of_arrays->len; j++) {
|
283
|
-
// current_set = RARRAY(rb_array_of_arrays->ptr[j]);
|
284
|
-
// found = 0;
|
285
|
-
//
|
286
|
-
// // Find with a linear search
|
287
|
-
// for(k = 0; k < current_set->len; k++) {
|
288
|
-
// // if (e == current_set->ptr[k]) {
|
289
|
-
// if (st_delete(RHASH(hash)->tbl, (unsigned long*)&vv, 0))
|
290
|
-
// found = 1;
|
291
|
-
// break;
|
292
|
-
// }
|
293
|
-
// }
|
294
|
-
//
|
295
|
-
// // break if not found
|
296
|
-
// if (!found) {
|
297
|
-
// break;
|
298
|
-
// }
|
299
|
-
// }
|
300
|
-
//
|
301
|
-
// // remove from candidate answer set if not found
|
302
|
-
// if (!found) {
|
303
|
-
// rb_ary_push(result, v);
|
304
|
-
// // candidate_answer_set->ptr[i] = Qnil;
|
305
|
-
// }
|
306
|
-
// }
|
307
|
-
//
|
308
|
-
// // compact the candidate answer set
|
309
|
-
// // rb_ary_compact_bang(candidate_answer_set);
|
310
|
-
// rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
|
311
|
-
//
|
312
|
-
// return candidate_answer_set;
|
313
|
-
// }
|
314
|
-
|
315
|
-
// VALUE rb_ary_clear_bang(ary) VALUE ary; {
|
316
|
-
// rb_ary_modify(ary);
|
317
|
-
// ARY_SET_LEN(ary, 0);
|
318
|
-
// // capa stays the same
|
319
|
-
// // if (ARY_DEFAULT_SIZE * 2 < RARRAY(ary)->aux.capa) {
|
320
|
-
// // REALLOC_N(RARRAY(ary)->ptr, VALUE, ARY_DEFAULT_SIZE * 2);
|
321
|
-
// // RARRAY(ary)->aux.capa = ARY_DEFAULT_SIZE * 2;
|
322
|
-
// // }
|
323
|
-
// return ary;
|
324
|
-
// }
|
325
|
-
|
326
87
|
VALUE p_mPerformant, p_cArray;
|
327
88
|
|
328
89
|
void Init_performant() {
|
329
90
|
p_mPerformant = rb_define_module("Performant");
|
330
91
|
p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
|
331
|
-
// p_cArray = rb_define_module_under(p_mPerformant, "Array");
|
332
|
-
|
333
|
-
// rb_define_method(rb_cArray, "clear!", rb_ary_clear_bang, 0);
|
334
|
-
|
335
92
|
rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
|
336
|
-
// rb_define_singleton_method(p_cArray, "brute_force_intersect", brute_force_intersect, 1);
|
337
|
-
// rb_define_singleton_method(p_cArray, "intersect_multiple_sorted", intersect_multiple_sorted, 1);
|
338
|
-
// rb_define_singleton_method(p_cArray, "intersect_multiple_with_hash", intersect_multiple_sorted_with_hash, 1);
|
339
93
|
}
|
@@ -25,7 +25,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
25
25
|
),
|
26
26
|
field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
|
27
27
|
field(:author, :qualifiers => [:s, :author, :auteur]),
|
28
|
-
field(:isbn, :qualifiers => [:i, :isbn])
|
28
|
+
field(:isbn, :qualifiers => [:i, :isbn], :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
|
29
29
|
end
|
30
30
|
|
31
31
|
queries do
|
@@ -30,6 +30,20 @@ describe Cacher::Partial::Subtoken do
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
context 'down_to set' do
|
33
|
+
context "large down_to" do
|
34
|
+
before(:each) do
|
35
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => 10
|
36
|
+
end
|
37
|
+
describe 'generate_from' do
|
38
|
+
it 'should generate the right index' do
|
39
|
+
@cacher.generate_from( :florian => [1], :'01234567890' => [2] ).should == {
|
40
|
+
:florian => [1],
|
41
|
+
:'01234567890' => [2],
|
42
|
+
:'0123456789' => [2]
|
43
|
+
}
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
33
47
|
context 'default starting_at' do
|
34
48
|
before(:each) do
|
35
49
|
@cacher = Cacher::Partial::Subtoken.new :down_to => 4
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 3
|
9
|
+
version: 0.0.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-09-
|
17
|
+
date: 2010-09-30 00:00:00 +02:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|