picky 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,3 @@
1
- // Note: This is the Ruby 1.9 version.
2
- //
3
1
  #include "ruby.h"
4
2
 
5
3
  // Copying internal ruby methods.
@@ -14,7 +12,6 @@ static inline VALUE rb_ary_elt(ary, offset)
14
12
  }
15
13
  return RARRAY_PTR(ary)[offset];
16
14
  }
17
- VALUE rb_ary_make_hash(VALUE, VALUE);
18
15
  static VALUE ary_make_hash(ary1, ary2)
19
16
  VALUE ary1, ary2;
20
17
  {
@@ -32,51 +29,50 @@ static VALUE ary_make_hash(ary1, ary2)
32
29
  return hash;
33
30
  }
34
31
 
35
- // Comparison functions.
36
- //
37
- inline int intvaluecmp(VALUE a, VALUE b) {
38
- return FIX2INT(a) - FIX2INT(b);
39
- }
40
- inline int intcmp(const int * a, const int * b) {
41
- return (*a - *b);
42
- }
43
- inline long longcmp(const void * a, const void * b) {
44
- return (*(long*) a - *(long*) b);
45
- }
46
-
47
32
  // This version just calls the & consecutively for all arrays.
48
- //
33
+ //
34
+ // The arrays need to be pre-sorted small to large.
35
+ //
49
36
  inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
50
- // counters
37
+ // Counters.
38
+ //
51
39
  long i, j;
52
-
53
- // structs
40
+
41
+ // Vars.
42
+ //
54
43
  struct RArray *rb_array_of_arrays;
55
- struct RArray *smallest_array;
56
- struct RArray *current_array;
44
+ VALUE smallest_array;
45
+ VALUE current_array;
57
46
  VALUE hash;
58
-
59
- // temps
47
+
48
+ // Temps.
49
+ //
60
50
  VALUE v, vv;
61
-
62
- // conversions
51
+
52
+ // Conversions.
53
+ //
63
54
  rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
64
- smallest_array = RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
65
-
66
- // iterate through all arrays
55
+ smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
56
+
57
+ // Iterate through all arrays.
58
+ //
67
59
  for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
68
60
  // Break if the smallest array is empty
69
61
  if (RARRAY_LEN(smallest_array) == 0) {
70
62
  break;
71
63
  }
72
-
73
- // make a hash from the currently smallest version
64
+
65
+ // Make a hash from the currently smallest version.
66
+ //
74
67
  hash = ary_make_hash(smallest_array, 0);
75
- // clear for use as temp array
68
+
69
+ // Clear for use as temp array.
70
+ //
76
71
  rb_ary_clear(smallest_array);
77
-
72
+
73
+ // Iterate through all array elements.
74
+ //
78
75
  current_array = RARRAY_PTR(rb_array_of_arrays)[i];
79
- // iterate through all array elements
80
76
  for (j = 0; j < RARRAY_LEN(current_array); j++) {
81
77
  v = vv = rb_ary_elt(current_array, j);
82
78
  if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
@@ -84,256 +80,14 @@ inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of
84
80
  }
85
81
  }
86
82
  }
87
-
83
+
88
84
  return smallest_array;
89
85
  }
90
86
 
91
- // Brute force algorithm to find the intersection of an array of length sorted, unsorted arrays.
92
- // This algorithm can be faster than others for small arrays.
93
- //
94
- // inline VALUE brute_force_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
95
- // // counters
96
- // long i, j, k;
97
- //
98
- // // structs
99
- // struct RArray *rb_array_of_arrays;
100
- // struct RArray *candidate_answer_set;
101
- // struct RArray *current_set;
102
- //
103
- // // conversions
104
- // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
105
- //
106
- // // temps
107
- // VALUE e;
108
- // unsigned char found;
109
- //
110
- // // Let the smallest set s[0] be the candidate answer set
111
- // // Note: Need a duplicate
112
- // candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
113
- //
114
- // // For each entry in candidate anser set
115
- // // Get current value
116
- // for(i = 0; i < candidate_answer_set->len; i++) {
117
- // e = candidate_answer_set->ptr[i];
118
- //
119
- // // Find the current value in other arrays
120
- // // if not found, break
121
- // for(j = 1; j < rb_array_of_arrays->len; j++) {
122
- // current_set = RARRAY(rb_array_of_arrays->ptr[j]);
123
- // found = 0;
124
- //
125
- // // Find with a linear search
126
- // for(k = 0; k < current_set->len; k++) {
127
- // if (e == current_set->ptr[k]) {
128
- // found = 1;
129
- // break;
130
- // }
131
- // }
132
- //
133
- // // break if not found
134
- // if (!found) {
135
- // break;
136
- // }
137
- // }
138
- //
139
- // // remove from candidate answer set if not found
140
- // if (!found) {
141
- // candidate_answer_set->ptr[i] = Qnil;
142
- // }
143
- // }
144
- //
145
- // // compact the candidate answer set
146
- // // rb_ary_compact_bang(candidate_answer_set);
147
- // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
148
- //
149
- // return candidate_answer_set;
150
- // }
151
-
152
- // inline VALUE intersect_unique(VALUE self, VALUE length_sorted_array_of_arrays) {
153
- // // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
154
- //
155
- // // structs
156
- // struct RArray *result;
157
- // struct RArray *rb_array_of_arrays;
158
- //
159
- // // conversions
160
- // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
161
- //
162
- // // TODO
163
- //
164
- // return result;
165
- // }
166
-
167
- // Generates the intersection of multiple
168
- //
169
- // inline VALUE sorting_intersect_multiple(VALUE self, VALUE length_sorted_array_of_arrays) {
170
- // // TODO
171
- // }
172
-
173
- // Generates the intersection of multiple length sorted, sorted arrays
174
- //
175
- // inline VALUE intersect_multiple_sorted(VALUE self, VALUE _length_sorted_array_of_arrays) {
176
- // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
177
- //
178
- // // counters
179
- // long i, j;
180
- // long current_set_position, current_answer_set_position;
181
- //
182
- // // structs
183
- // struct RArray *rb_array_of_arrays;
184
- // struct RArray *candidate_answer_set;
185
- // struct RArray *current_set;
186
- //
187
- // // temps
188
- // long e;
189
- //
190
- // // conversions
191
- // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
192
- //
193
- // // Let the smallest set s[0] be the candidate answer set
194
- // // Note: Need a duplicate
195
- // candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
196
- //
197
- // // For each set s[i], i = 1 .. k do
198
- // for(i = 1; i < rb_array_of_arrays->len; i++) {
199
- // current_set = RARRAY(rb_array_of_arrays->ptr[i]);
200
- // current_set_position = 0;
201
- //
202
- // // for each element e in the candidate answer set
203
- // for(j = 0; j < candidate_answer_set->len; j++) {
204
- // e = candidate_answer_set->ptr[j];
205
- //
206
- // // search for e in the range l[i] to size(s[i])
207
- // // and update l[i] to the last position probed in the previous step
208
- // // if e was not found then
209
- // if (bsearch(
210
- // &e,
211
- // &current_set->ptr[current_set_position],
212
- // (current_set->len - current_set_position),
213
- // sizeof(VALUE), //sizeof(current_set->ptr[0]),
214
- // intcmp //longcmp
215
- // ) == NULL) {
216
- //
217
- // // remove e from the candidate answer set
218
- // // and advance e to the next element in the answer set
219
- // // rb_ary_delete_at(candidate_answer_set, j);
220
- // candidate_answer_set->ptr[j] = Qnil;
221
- // }
222
- // current_set_position = j - 1;
223
- // }
224
- //
225
- // // compact the candidate answer set
226
- // // rb_ary_compact_bang(candidate_answer_set);
227
- // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
228
- // }
229
- //
230
- // return candidate_answer_set;
231
- // }
232
-
233
- // Trying to make a custom version of Matz' ary &
234
- //
235
- // Differences:
236
- // * Multiple arrays
237
- // * No to_ary
238
- // * Smallest array is used to make hash
239
- // Note: Assumes that whatever is given in as array of arrays is sorted by array sizes.
240
- //
241
- // static VALUE rb_ary_and(ary1, ary2) VALUE ary1, ary2; {
242
- // static VALUE intersect_multiple_with_hash(VALUE self, VALUE _length_sorted_array_of_arrays) {
243
- // // VALUE hash, ary3, v, vv;
244
- // // long i;
245
- // //
246
- // // ary2 = to_ary(ary2);
247
- // // ary3 = rb_ary_new2(RARRAY(ary1)->len < RARRAY(ary2)->len ?
248
- // // RARRAY(ary1)->len : RARRAY(ary2)->len);
249
- // // hash = ary_make_hash(ary2, 0);
250
- // //
251
- // // for (i=0; i<RARRAY(ary1)->len; i++) {
252
- // // v = vv = rb_ary_elt(ary1, i);
253
- // // if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
254
- // // rb_ary_push(ary3, v);
255
- // // }
256
- // // }
257
- // //
258
- // // return ary3;
259
- // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
260
- //
261
- // // structs
262
- // struct RArray *candidate_answer_set;
263
- // struct RArray *current_set;
264
- //
265
- // // temps
266
- // VALUE hash, v, vv;
267
- // long i, j, k;
268
- //
269
- // // Get smallest array size
270
- // candidate_answer_set = rb_ary_new2((RARRAY(rb_array_of_arrays->ptr[0])->len);
271
- //
272
- // hash = ary_make_hash(RARRAY(rb_array_of_arrays->ptr[0]), 0);
273
- //
274
- // // For each entry in candidate answer set
275
- // // Get current value
276
- // for(i = 0; i < candidate_answer_set->len; i++) {
277
- // // e = candidate_answer_set->ptr[i];
278
- // v = vv = rb_ary_elt(candidate_answer_set, i);
279
- //
280
- // // Find the current value in other arrays
281
- // // if not found, break
282
- // for(j = 1; j < rb_array_of_arrays->len; j++) {
283
- // current_set = RARRAY(rb_array_of_arrays->ptr[j]);
284
- // found = 0;
285
- //
286
- // // Find with a linear search
287
- // for(k = 0; k < current_set->len; k++) {
288
- // // if (e == current_set->ptr[k]) {
289
- // if (st_delete(RHASH(hash)->tbl, (unsigned long*)&vv, 0))
290
- // found = 1;
291
- // break;
292
- // }
293
- // }
294
- //
295
- // // break if not found
296
- // if (!found) {
297
- // break;
298
- // }
299
- // }
300
- //
301
- // // remove from candidate answer set if not found
302
- // if (!found) {
303
- // rb_ary_push(result, v);
304
- // // candidate_answer_set->ptr[i] = Qnil;
305
- // }
306
- // }
307
- //
308
- // // compact the candidate answer set
309
- // // rb_ary_compact_bang(candidate_answer_set);
310
- // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
311
- //
312
- // return candidate_answer_set;
313
- // }
314
-
315
- // VALUE rb_ary_clear_bang(ary) VALUE ary; {
316
- // rb_ary_modify(ary);
317
- // ARY_SET_LEN(ary, 0);
318
- // // capa stays the same
319
- // // if (ARY_DEFAULT_SIZE * 2 < RARRAY(ary)->aux.capa) {
320
- // // REALLOC_N(RARRAY(ary)->ptr, VALUE, ARY_DEFAULT_SIZE * 2);
321
- // // RARRAY(ary)->aux.capa = ARY_DEFAULT_SIZE * 2;
322
- // // }
323
- // return ary;
324
- // }
325
-
326
87
  VALUE p_mPerformant, p_cArray;
327
88
 
328
89
  void Init_performant() {
329
90
  p_mPerformant = rb_define_module("Performant");
330
91
  p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
331
- // p_cArray = rb_define_module_under(p_mPerformant, "Array");
332
-
333
- // rb_define_method(rb_cArray, "clear!", rb_ary_clear_bang, 0);
334
-
335
92
  rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
336
- // rb_define_singleton_method(p_cArray, "brute_force_intersect", brute_force_intersect, 1);
337
- // rb_define_singleton_method(p_cArray, "intersect_multiple_sorted", intersect_multiple_sorted, 1);
338
- // rb_define_singleton_method(p_cArray, "intersect_multiple_with_hash", intersect_multiple_sorted_with_hash, 1);
339
93
  }
@@ -25,7 +25,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
25
25
  ),
26
26
  field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
27
27
  field(:author, :qualifiers => [:s, :author, :auteur]),
28
- field(:isbn, :qualifiers => [:i, :isbn])
28
+ field(:isbn, :qualifiers => [:i, :isbn], :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
29
29
  end
30
30
 
31
31
  queries do
@@ -30,6 +30,20 @@ describe Cacher::Partial::Subtoken do
30
30
  end
31
31
  end
32
32
  context 'down_to set' do
33
+ context "large down_to" do
34
+ before(:each) do
35
+ @cacher = Cacher::Partial::Subtoken.new :down_to => 10
36
+ end
37
+ describe 'generate_from' do
38
+ it 'should generate the right index' do
39
+ @cacher.generate_from( :florian => [1], :'01234567890' => [2] ).should == {
40
+ :florian => [1],
41
+ :'01234567890' => [2],
42
+ :'0123456789' => [2]
43
+ }
44
+ end
45
+ end
46
+ end
33
47
  context 'default starting_at' do
34
48
  before(:each) do
35
49
  @cacher = Cacher::Partial::Subtoken.new :down_to => 4
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 2
9
- version: 0.0.2
8
+ - 3
9
+ version: 0.0.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-29 00:00:00 +02:00
17
+ date: 2010-09-30 00:00:00 +02:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency