picky 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,3 @@
1
- // Note: This is the Ruby 1.9 version.
2
- //
3
1
  #include "ruby.h"
4
2
 
5
3
  // Copying internal ruby methods.
@@ -14,7 +12,6 @@ static inline VALUE rb_ary_elt(ary, offset)
14
12
  }
15
13
  return RARRAY_PTR(ary)[offset];
16
14
  }
17
- VALUE rb_ary_make_hash(VALUE, VALUE);
18
15
  static VALUE ary_make_hash(ary1, ary2)
19
16
  VALUE ary1, ary2;
20
17
  {
@@ -32,51 +29,50 @@ static VALUE ary_make_hash(ary1, ary2)
32
29
  return hash;
33
30
  }
34
31
 
35
- // Comparison functions.
36
- //
37
- inline int intvaluecmp(VALUE a, VALUE b) {
38
- return FIX2INT(a) - FIX2INT(b);
39
- }
40
- inline int intcmp(const int * a, const int * b) {
41
- return (*a - *b);
42
- }
43
- inline long longcmp(const void * a, const void * b) {
44
- return (*(long*) a - *(long*) b);
45
- }
46
-
47
32
  // This version just calls the & consecutively for all arrays.
48
- //
33
+ //
34
+ // The arrays need to be pre-sorted small to large.
35
+ //
49
36
  inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
50
- // counters
37
+ // Counters.
38
+ //
51
39
  long i, j;
52
-
53
- // structs
40
+
41
+ // Vars.
42
+ //
54
43
  struct RArray *rb_array_of_arrays;
55
- struct RArray *smallest_array;
56
- struct RArray *current_array;
44
+ VALUE smallest_array;
45
+ VALUE current_array;
57
46
  VALUE hash;
58
-
59
- // temps
47
+
48
+ // Temps.
49
+ //
60
50
  VALUE v, vv;
61
-
62
- // conversions
51
+
52
+ // Conversions.
53
+ //
63
54
  rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
64
- smallest_array = RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
65
-
66
- // iterate through all arrays
55
+ smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
56
+
57
+ // Iterate through all arrays.
58
+ //
67
59
  for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
68
60
  // Break if the smallest array is empty
69
61
  if (RARRAY_LEN(smallest_array) == 0) {
70
62
  break;
71
63
  }
72
-
73
- // make a hash from the currently smallest version
64
+
65
+ // Make a hash from the currently smallest version.
66
+ //
74
67
  hash = ary_make_hash(smallest_array, 0);
75
- // clear for use as temp array
68
+
69
+ // Clear for use as temp array.
70
+ //
76
71
  rb_ary_clear(smallest_array);
77
-
72
+
73
+ // Iterate through all array elements.
74
+ //
78
75
  current_array = RARRAY_PTR(rb_array_of_arrays)[i];
79
- // iterate through all array elements
80
76
  for (j = 0; j < RARRAY_LEN(current_array); j++) {
81
77
  v = vv = rb_ary_elt(current_array, j);
82
78
  if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
@@ -84,256 +80,14 @@ inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of
84
80
  }
85
81
  }
86
82
  }
87
-
83
+
88
84
  return smallest_array;
89
85
  }
90
86
 
91
- // Brute force algorithm to find the intersection of an array of length sorted, unsorted arrays.
92
- // This algorithm can be faster than others for small arrays.
93
- //
94
- // inline VALUE brute_force_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
95
- // // counters
96
- // long i, j, k;
97
- //
98
- // // structs
99
- // struct RArray *rb_array_of_arrays;
100
- // struct RArray *candidate_answer_set;
101
- // struct RArray *current_set;
102
- //
103
- // // conversions
104
- // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
105
- //
106
- // // temps
107
- // VALUE e;
108
- // unsigned char found;
109
- //
110
- // // Let the smallest set s[0] be the candidate answer set
111
- // // Note: Need a duplicate
112
- // candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
113
- //
114
- // // For each entry in candidate anser set
115
- // // Get current value
116
- // for(i = 0; i < candidate_answer_set->len; i++) {
117
- // e = candidate_answer_set->ptr[i];
118
- //
119
- // // Find the current value in other arrays
120
- // // if not found, break
121
- // for(j = 1; j < rb_array_of_arrays->len; j++) {
122
- // current_set = RARRAY(rb_array_of_arrays->ptr[j]);
123
- // found = 0;
124
- //
125
- // // Find with a linear search
126
- // for(k = 0; k < current_set->len; k++) {
127
- // if (e == current_set->ptr[k]) {
128
- // found = 1;
129
- // break;
130
- // }
131
- // }
132
- //
133
- // // break if not found
134
- // if (!found) {
135
- // break;
136
- // }
137
- // }
138
- //
139
- // // remove from candidate answer set if not found
140
- // if (!found) {
141
- // candidate_answer_set->ptr[i] = Qnil;
142
- // }
143
- // }
144
- //
145
- // // compact the candidate answer set
146
- // // rb_ary_compact_bang(candidate_answer_set);
147
- // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
148
- //
149
- // return candidate_answer_set;
150
- // }
151
-
152
- // inline VALUE intersect_unique(VALUE self, VALUE length_sorted_array_of_arrays) {
153
- // // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
154
- //
155
- // // structs
156
- // struct RArray *result;
157
- // struct RArray *rb_array_of_arrays;
158
- //
159
- // // conversions
160
- // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
161
- //
162
- // // TODO
163
- //
164
- // return result;
165
- // }
166
-
167
- // Generates the intersection of multiple
168
- //
169
- // inline VALUE sorting_intersect_multiple(VALUE self, VALUE length_sorted_array_of_arrays) {
170
- // // TODO
171
- // }
172
-
173
- // Generates the intersection of multiple length sorted, sorted arrays
174
- //
175
- // inline VALUE intersect_multiple_sorted(VALUE self, VALUE _length_sorted_array_of_arrays) {
176
- // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
177
- //
178
- // // counters
179
- // long i, j;
180
- // long current_set_position, current_answer_set_position;
181
- //
182
- // // structs
183
- // struct RArray *rb_array_of_arrays;
184
- // struct RArray *candidate_answer_set;
185
- // struct RArray *current_set;
186
- //
187
- // // temps
188
- // long e;
189
- //
190
- // // conversions
191
- // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
192
- //
193
- // // Let the smallest set s[0] be the candidate answer set
194
- // // Note: Need a duplicate
195
- // candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
196
- //
197
- // // For each set s[i], i = 1 .. k do
198
- // for(i = 1; i < rb_array_of_arrays->len; i++) {
199
- // current_set = RARRAY(rb_array_of_arrays->ptr[i]);
200
- // current_set_position = 0;
201
- //
202
- // // for each element e in the candidate answer set
203
- // for(j = 0; j < candidate_answer_set->len; j++) {
204
- // e = candidate_answer_set->ptr[j];
205
- //
206
- // // search for e in the range l[i] to size(s[i])
207
- // // and update l[i] to the last position probed in the previous step
208
- // // if e was not found then
209
- // if (bsearch(
210
- // &e,
211
- // &current_set->ptr[current_set_position],
212
- // (current_set->len - current_set_position),
213
- // sizeof(VALUE), //sizeof(current_set->ptr[0]),
214
- // intcmp //longcmp
215
- // ) == NULL) {
216
- //
217
- // // remove e from the candidate answer set
218
- // // and advance e to the next element in the answer set
219
- // // rb_ary_delete_at(candidate_answer_set, j);
220
- // candidate_answer_set->ptr[j] = Qnil;
221
- // }
222
- // current_set_position = j - 1;
223
- // }
224
- //
225
- // // compact the candidate answer set
226
- // // rb_ary_compact_bang(candidate_answer_set);
227
- // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
228
- // }
229
- //
230
- // return candidate_answer_set;
231
- // }
232
-
233
- // Trying to make a custom version of Matz' ary &
234
- //
235
- // Differences:
236
- // * Multiple arrays
237
- // * No to_ary
238
- // * Smallest array is used to make hash
239
- // Note: Assumes that whatever is given in as array of arrays is sorted by array sizes.
240
- //
241
- // static VALUE rb_ary_and(ary1, ary2) VALUE ary1, ary2; {
242
- // static VALUE intersect_multiple_with_hash(VALUE self, VALUE _length_sorted_array_of_arrays) {
243
- // // VALUE hash, ary3, v, vv;
244
- // // long i;
245
- // //
246
- // // ary2 = to_ary(ary2);
247
- // // ary3 = rb_ary_new2(RARRAY(ary1)->len < RARRAY(ary2)->len ?
248
- // // RARRAY(ary1)->len : RARRAY(ary2)->len);
249
- // // hash = ary_make_hash(ary2, 0);
250
- // //
251
- // // for (i=0; i<RARRAY(ary1)->len; i++) {
252
- // // v = vv = rb_ary_elt(ary1, i);
253
- // // if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
254
- // // rb_ary_push(ary3, v);
255
- // // }
256
- // // }
257
- // //
258
- // // return ary3;
259
- // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
260
- //
261
- // // structs
262
- // struct RArray *candidate_answer_set;
263
- // struct RArray *current_set;
264
- //
265
- // // temps
266
- // VALUE hash, v, vv;
267
- // long i, j, k;
268
- //
269
- // // Get smallest array size
270
- // candidate_answer_set = rb_ary_new2((RARRAY(rb_array_of_arrays->ptr[0])->len);
271
- //
272
- // hash = ary_make_hash(RARRAY(rb_array_of_arrays->ptr[0]), 0);
273
- //
274
- // // For each entry in candidate answer set
275
- // // Get current value
276
- // for(i = 0; i < candidate_answer_set->len; i++) {
277
- // // e = candidate_answer_set->ptr[i];
278
- // v = vv = rb_ary_elt(candidate_answer_set, i);
279
- //
280
- // // Find the current value in other arrays
281
- // // if not found, break
282
- // for(j = 1; j < rb_array_of_arrays->len; j++) {
283
- // current_set = RARRAY(rb_array_of_arrays->ptr[j]);
284
- // found = 0;
285
- //
286
- // // Find with a linear search
287
- // for(k = 0; k < current_set->len; k++) {
288
- // // if (e == current_set->ptr[k]) {
289
- // if (st_delete(RHASH(hash)->tbl, (unsigned long*)&vv, 0))
290
- // found = 1;
291
- // break;
292
- // }
293
- // }
294
- //
295
- // // break if not found
296
- // if (!found) {
297
- // break;
298
- // }
299
- // }
300
- //
301
- // // remove from candidate answer set if not found
302
- // if (!found) {
303
- // rb_ary_push(result, v);
304
- // // candidate_answer_set->ptr[i] = Qnil;
305
- // }
306
- // }
307
- //
308
- // // compact the candidate answer set
309
- // // rb_ary_compact_bang(candidate_answer_set);
310
- // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
311
- //
312
- // return candidate_answer_set;
313
- // }
314
-
315
- // VALUE rb_ary_clear_bang(ary) VALUE ary; {
316
- // rb_ary_modify(ary);
317
- // ARY_SET_LEN(ary, 0);
318
- // // capa stays the same
319
- // // if (ARY_DEFAULT_SIZE * 2 < RARRAY(ary)->aux.capa) {
320
- // // REALLOC_N(RARRAY(ary)->ptr, VALUE, ARY_DEFAULT_SIZE * 2);
321
- // // RARRAY(ary)->aux.capa = ARY_DEFAULT_SIZE * 2;
322
- // // }
323
- // return ary;
324
- // }
325
-
326
87
  VALUE p_mPerformant, p_cArray;
327
88
 
328
89
  void Init_performant() {
329
90
  p_mPerformant = rb_define_module("Performant");
330
91
  p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
331
- // p_cArray = rb_define_module_under(p_mPerformant, "Array");
332
-
333
- // rb_define_method(rb_cArray, "clear!", rb_ary_clear_bang, 0);
334
-
335
92
  rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
336
- // rb_define_singleton_method(p_cArray, "brute_force_intersect", brute_force_intersect, 1);
337
- // rb_define_singleton_method(p_cArray, "intersect_multiple_sorted", intersect_multiple_sorted, 1);
338
- // rb_define_singleton_method(p_cArray, "intersect_multiple_with_hash", intersect_multiple_sorted_with_hash, 1);
339
93
  }
@@ -25,7 +25,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
25
25
  ),
26
26
  field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
27
27
  field(:author, :qualifiers => [:s, :author, :auteur]),
28
- field(:isbn, :qualifiers => [:i, :isbn])
28
+ field(:isbn, :qualifiers => [:i, :isbn], :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
29
29
  end
30
30
 
31
31
  queries do
@@ -30,6 +30,20 @@ describe Cacher::Partial::Subtoken do
30
30
  end
31
31
  end
32
32
  context 'down_to set' do
33
+ context "large down_to" do
34
+ before(:each) do
35
+ @cacher = Cacher::Partial::Subtoken.new :down_to => 10
36
+ end
37
+ describe 'generate_from' do
38
+ it 'should generate the right index' do
39
+ @cacher.generate_from( :florian => [1], :'01234567890' => [2] ).should == {
40
+ :florian => [1],
41
+ :'01234567890' => [2],
42
+ :'0123456789' => [2]
43
+ }
44
+ end
45
+ end
46
+ end
33
47
  context 'default starting_at' do
34
48
  before(:each) do
35
49
  @cacher = Cacher::Partial::Subtoken.new :down_to => 4
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 2
9
- version: 0.0.2
8
+ - 3
9
+ version: 0.0.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-09-29 00:00:00 +02:00
17
+ date: 2010-09-30 00:00:00 +02:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency