rroonga 4.0.0 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -444,6 +444,13 @@ rb_grn_object_assign (VALUE klass, VALUE self, VALUE rb_context,
444
444
  context, object);
445
445
  rb_grn_index_column_bind(RB_GRN_INDEX_COLUMN(rb_grn_object),
446
446
  context, object);
447
+ } else if (RVAL2CBOOL(rb_obj_is_kind_of(self, rb_cGrnVariableSizeColumn))) {
448
+ rb_grn_object = ALLOC(RbGrnVariableSizeColumn);
449
+ rb_grn_object_bind_common(klass, self, rb_context, rb_grn_object,
450
+ context, object);
451
+ rb_grn_variable_size_column_bind(RB_GRN_VARIABLE_SIZE_COLUMN(rb_grn_object),
452
+ context,
453
+ object);
447
454
  } else if (RVAL2CBOOL(rb_obj_is_kind_of(self, rb_cGrnColumn))) {
448
455
  rb_grn_object = ALLOC(RbGrnColumn);
449
456
  rb_grn_object_bind_common(klass, self, rb_context, rb_grn_object,
@@ -226,6 +226,11 @@ rb_grn_table_inspect (VALUE self)
226
226
  *
227
227
  * - +:scalar+ := スカラ値(単独の値)を格納する。
228
228
  * - +:vector+ := 値の配列を格納する。
229
+ * @option options [Boolean] :with_weight (false)
230
+ * It specifies whether making the column weight vector column or not.
231
+ * Weight vector column can store weight for each element.
232
+ *
233
+ * You can't use this option for scalar column.
229
234
  * @option options :compress
230
235
  * 値の圧縮方法を指定する。省略した場合は、圧縮しない。
231
236
  *
@@ -244,7 +249,7 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self)
244
249
  unsigned name_size = 0;
245
250
  grn_obj_flags flags = 0;
246
251
  VALUE rb_name, rb_value_type;
247
- VALUE options, rb_path, rb_persistent, rb_compress, rb_type;
252
+ VALUE options, rb_path, rb_persistent, rb_compress, rb_type, rb_with_weight;
248
253
  VALUE columns;
249
254
  VALUE rb_column;
250
255
 
@@ -262,6 +267,7 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self)
262
267
  "path", &rb_path,
263
268
  "persistent", &rb_persistent,
264
269
  "type", &rb_type,
270
+ "with_weight", &rb_with_weight,
265
271
  "compress", &rb_compress,
266
272
  NULL);
267
273
 
@@ -294,6 +300,15 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self)
294
300
  rb_grn_inspect(rb_type));
295
301
  }
296
302
 
303
+ if (RVAL2CBOOL(rb_with_weight)) {
304
+ if (flags & GRN_OBJ_COLUMN_VECTOR) {
305
+ flags |= GRN_OBJ_WITH_WEIGHT;
306
+ } else {
307
+ rb_raise(rb_eArgError,
308
+ "can't use weight for scalar column");
309
+ }
310
+ }
311
+
297
312
  if (NIL_P(rb_compress)) {
298
313
  } else if (rb_grn_equal_option(rb_compress, "zlib")) {
299
314
  flags |= GRN_OBJ_COMPRESS_ZLIB;
@@ -1268,7 +1283,8 @@ rb_grn_table_group (int argc, VALUE *argv, VALUE self)
1268
1283
  grn_obj *table;
1269
1284
  grn_table_sort_key *keys;
1270
1285
  grn_table_group_result *results;
1271
- int i, n_keys, n_results, max_n_sub_records = 0;
1286
+ int i, n_keys, n_results;
1287
+ unsigned int max_n_sub_records = 0;
1272
1288
  grn_rc rc;
1273
1289
  VALUE rb_keys, rb_options, rb_max_n_sub_records;
1274
1290
  VALUE *rb_group_keys;
@@ -1294,7 +1310,7 @@ rb_grn_table_group (int argc, VALUE *argv, VALUE self)
1294
1310
  NULL);
1295
1311
 
1296
1312
  if (!NIL_P(rb_max_n_sub_records))
1297
- max_n_sub_records = NUM2INT(rb_max_n_sub_records);
1313
+ max_n_sub_records = NUM2UINT(rb_max_n_sub_records);
1298
1314
 
1299
1315
  keys = ALLOCA_N(grn_table_sort_key, n_keys);
1300
1316
  for (i = 0; i < n_keys; i++) {
@@ -123,6 +123,10 @@ rb_grn_scan_options (VALUE options, ...)
123
123
  rb_key = RB_GRN_INTERN(key);
124
124
  rb_ary_push(available_keys, rb_key);
125
125
  *value = rb_funcall(options, rb_intern("delete"), 1, rb_key);
126
+ if (NIL_P(*value)) {
127
+ rb_key = rb_str_new_cstr(key);
128
+ *value = rb_funcall(options, rb_intern("delete"), 1, rb_key);
129
+ }
126
130
 
127
131
  key = va_arg(args, const char *);
128
132
  }
@@ -652,7 +656,6 @@ rb_grn_vector_to_ruby_object (grn_ctx *context, grn_obj *vector)
652
656
  grn_obj_reinit(context, &value, domain, 0);
653
657
  grn_bulk_write(context, &value, _value, length);
654
658
  rb_ary_push(array, GRNOBJ2RVAL(Qnil, context, &value, Qnil));
655
- /* UINT2NUM(weight); */ /* TODO: How handle weight? */
656
659
  }
657
660
  GRN_OBJ_FIN(context, &value);
658
661
 
@@ -1,6 +1,6 @@
1
1
  /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
2
  /*
3
- Copyright (C) 2009-2011 Kouhei Sutou <kou@clear-code.com>
3
+ Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -18,16 +18,411 @@
18
18
 
19
19
  #include "rb-grn.h"
20
20
 
21
- #define SELF(object) ((RbGrnColumn *)DATA_PTR(object))
21
+ #define SELF(object) ((RbGrnVariableSizeColumn *)DATA_PTR(object))
22
22
 
23
23
  VALUE rb_cGrnVariableSizeColumn;
24
24
 
25
+ void
26
+ rb_grn_variable_size_column_bind (RbGrnVariableSizeColumn *rb_column,
27
+ grn_ctx *context, grn_obj *column)
28
+ {
29
+ RbGrnObject *rb_grn_object;
30
+ int column_type;
31
+ unsigned char value_type;
32
+
33
+ rb_grn_object = RB_GRN_OBJECT(rb_column);
34
+ rb_grn_column_bind(RB_GRN_COLUMN(rb_column), context, column);
35
+
36
+ rb_column->element_value = NULL;
37
+ column_type = (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK);
38
+ if (column_type != GRN_OBJ_COLUMN_VECTOR) {
39
+ return;
40
+ }
41
+
42
+ switch (rb_grn_object->range->header.type) {
43
+ case GRN_TABLE_HASH_KEY:
44
+ case GRN_TABLE_PAT_KEY:
45
+ case GRN_TABLE_DAT_KEY:
46
+ case GRN_TABLE_NO_KEY:
47
+ value_type = GRN_UVECTOR;
48
+ break;
49
+ default:
50
+ value_type = GRN_VECTOR;
51
+ break;
52
+ }
53
+ if (column->header.flags & GRN_OBJ_WITH_WEIGHT) {
54
+ rb_column->element_value = grn_obj_open(context, value_type, 0,
55
+ rb_grn_object->range_id);
56
+ }
57
+ }
58
+
59
+ void
60
+ rb_grn_variable_size_column_finalizer (grn_ctx *context, grn_obj *grn_object,
61
+ RbGrnVariableSizeColumn *rb_column)
62
+ {
63
+ rb_grn_column_finalizer(context, grn_object,
64
+ RB_GRN_COLUMN(rb_column));
65
+ if (context && rb_column->element_value)
66
+ grn_obj_unlink(context, rb_column->element_value);
67
+ rb_column->element_value = NULL;
68
+ }
69
+
70
+ static void
71
+ rb_grn_variable_size_column_deconstruct (RbGrnVariableSizeColumn *rb_column,
72
+ grn_obj **column,
73
+ grn_ctx **context,
74
+ grn_id *domain_id,
75
+ grn_obj **domain,
76
+ grn_obj **value,
77
+ grn_obj **element_value,
78
+ grn_id *range_id,
79
+ grn_obj **range)
80
+ {
81
+ RbGrnColumn *rb_grn_column;
82
+
83
+ rb_grn_column = RB_GRN_COLUMN(rb_column);
84
+ rb_grn_column_deconstruct(rb_grn_column, column, context,
85
+ domain_id, domain, value,
86
+ range_id, range);
87
+
88
+ if (element_value)
89
+ *element_value = rb_column->element_value;
90
+ }
91
+
25
92
  /*
26
93
  * Document-class: Groonga::VariableSizeColumn < Groonga::Column
27
94
  *
28
- * 可変長データ用のカラム。
95
+ * A column for variable size data like text family types and vector
96
+ * column.
29
97
  */
30
98
 
99
+ /*
100
+ * It gets a value of variable size column value for the record that
101
+ * ID is _id_.
102
+ *
103
+ * @example Gets weight vector value
104
+ * Groonga::Schema.define do |schema|
105
+ * schema.create_table("Products",
106
+ * :type => :patricia_trie,
107
+ * :key_type => "ShortText") do |table|
108
+ * # This is weight vector.
109
+ * # ":with_weight => true" is important to store weight value.
110
+ * table.short_text("tags",
111
+ * :type => :vector,
112
+ * :with_weight => true)
113
+ * end
114
+ * end
115
+ *
116
+ * products = Groonga["Products"]
117
+ * rroonga = products.add("Rroonga")
118
+ * rroonga.tags = [
119
+ * {
120
+ * :value => "ruby",
121
+ * :weight => 100,
122
+ * },
123
+ * {
124
+ * :value => "groonga",
125
+ * :weight => 10,
126
+ * },
127
+ * ]
128
+ *
129
+ * p rroonga.tags
130
+ * # => [
131
+ * # {:value => "ruby", :weight => 100},
132
+ * # {:value => "groonga", :weight => 10}
133
+ * # ]
134
+ *
135
+ * @overload [](id)
136
+ * @param [Integer, Record] id The record ID.
137
+ * @return [Array<Hash<Symbol, String>>] An array of value if the column
138
+ * is a weight vector column.
139
+ * Each value is a Hash like the following form:
140
+ *
141
+ * <pre>
142
+ * {
143
+ * :value => [KEY],
144
+ * :weight => [WEIGHT],
145
+ * }
146
+ * </pre>
147
+ *
148
+ * @[KEY]@ is the key of the table that is specified as range on
149
+ * creating the weight vector.
150
+ *
151
+ * @[WEIGHT]@ is a positive integer.
152
+ *
153
+ * @return [::Object] See {Groonga::Object#[]} for columns except
154
+ * weight vector column.
155
+ *
156
+ * @since 4.0.1.
157
+ */
158
+ static VALUE
159
+ rb_grn_variable_size_column_array_reference (VALUE self, VALUE rb_id)
160
+ {
161
+ grn_ctx *context = NULL;
162
+ grn_obj *column, *range;
163
+ grn_id id;
164
+ grn_obj *value;
165
+ VALUE rb_value;
166
+ unsigned int i, n;
167
+
168
+ rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
169
+ NULL, NULL, &value, NULL,
170
+ NULL, &range);
171
+
172
+ if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
173
+ return rb_call_super(1, &rb_id);
174
+ }
175
+
176
+ id = RVAL2GRNID(rb_id, context, range, self);
177
+
178
+ grn_obj_reinit(context, value,
179
+ value->header.domain,
180
+ value->header.flags | GRN_OBJ_VECTOR);
181
+ grn_obj_get_value(context, column, id, value);
182
+ rb_grn_context_check(context, self);
183
+
184
+ n = grn_vector_size(context, value);
185
+ rb_value = rb_ary_new2(n);
186
+ for (i = 0; i < n; i++) {
187
+ const char *element_value;
188
+ unsigned int element_value_length;
189
+ unsigned int weight = 0;
190
+ grn_id domain;
191
+ VALUE rb_element;
192
+
193
+ element_value_length = grn_vector_get_element(context,
194
+ value,
195
+ i,
196
+ &element_value,
197
+ &weight,
198
+ &domain);
199
+ rb_element = rb_hash_new();
200
+ rb_hash_aset(rb_element,
201
+ ID2SYM(rb_intern("value")),
202
+ rb_str_new(element_value, element_value_length));
203
+ rb_hash_aset(rb_element,
204
+ ID2SYM(rb_intern("weight")),
205
+ UINT2NUM(weight));
206
+
207
+ rb_ary_push(rb_value, rb_element);
208
+ }
209
+
210
+ return rb_value;
211
+ }
212
+
213
+ typedef struct {
214
+ grn_ctx *context;
215
+ grn_obj *vector;
216
+ grn_obj *element_value;
217
+ } HashElementToVectorElementData;
218
+
219
+ static int
220
+ hash_element_to_vector_element(VALUE key, VALUE value, VALUE user_data)
221
+ {
222
+ HashElementToVectorElementData *data =
223
+ (HashElementToVectorElementData *)user_data;
224
+ unsigned int weight;
225
+
226
+ GRN_BULK_REWIND(data->element_value);
227
+ RVAL2GRNBULK(key, data->context, data->element_value);
228
+
229
+ weight = NUM2UINT(value);
230
+ grn_vector_add_element(data->context, data->vector,
231
+ GRN_BULK_HEAD(data->element_value),
232
+ GRN_BULK_VSIZE(data->element_value),
233
+ weight,
234
+ data->element_value->header.domain);
235
+
236
+ return ST_CONTINUE;
237
+ }
238
+
239
+ /*
240
+ * It updates a value of variable size column value for the record
241
+ * that ID is _id_.
242
+ *
243
+ * Weight vector column is a special variable size column. This
244
+ * description describes only weight vector column. Other variable
245
+ * size column works what you think.
246
+ *
247
+ * @example Use weight vector as matrix search result weight
248
+ * Groonga::Schema.define do |schema|
249
+ * schema.create_table("Products",
250
+ * :type => :patricia_trie,
251
+ * :key_type => "ShortText") do |table|
252
+ * # This is weight vector.
253
+ * # ":with_weight => true" is important for matrix search result weight.
254
+ * table.short_text("tags",
255
+ * :type => :vector,
256
+ * :with_weight => true)
257
+ * end
258
+ *
259
+ * schema.create_table("Tags",
260
+ * :type => :hash,
261
+ * :key_type => "ShortText") do |table|
262
+ * # This is inverted index. It also needs ":with_weight => true".
263
+ * table.index("Products.tags", :with_weight => true)
264
+ * end
265
+ * end
266
+ *
267
+ * products = Groonga["Products"]
268
+ * groonga = products.add("Groonga")
269
+ * groonga.tags = [
270
+ * {
271
+ * :value => "groonga",
272
+ * :weight => 100,
273
+ * },
274
+ * ]
275
+ * rroonga = products.add("Rroonga")
276
+ * rroonga.tags = [
277
+ * {
278
+ * :value => "ruby",
279
+ * :weight => 100,
280
+ * },
281
+ * {
282
+ * :value => "groonga",
283
+ * :weight => 10,
284
+ * },
285
+ * ]
286
+ *
287
+ * result = products.select do |record|
288
+ * # Search by "groonga"
289
+ * record.match("groonga") do |match_target|
290
+ * match_target.tags
291
+ * end
292
+ * end
293
+ *
294
+ * result.each do |record|
295
+ * p [record.key.key, record.score]
296
+ * end
297
+ * # Matches all records with weight.
298
+ * # => ["Groonga", 101]
299
+ * # ["Rroonga", 11]
300
+ *
301
+ * # Increases score for "ruby" 10 times
302
+ * products.select(# The previous search result. Required.
303
+ * :result => result,
304
+ * # It just adds score to existing records in the result. Required.
305
+ * :operator => Groonga::Operator::ADJUST) do |record|
306
+ * record.match("ruby") do |target|
307
+ * target.tags * 10 # 10 times
308
+ * end
309
+ * end
310
+ *
311
+ * result.each do |record|
312
+ * p [record.key.key, record.score]
313
+ * end
314
+ * # Weight is used for increasing score.
315
+ * # => ["Groonga", 101] <- Not changed.
316
+ * # ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased.
317
+ *
318
+ * @overload []=(id, elements)
319
+ * This description is for weight vector column.
320
+ *
321
+ * @param [Integer, Record] id The record ID.
322
+ * @param [Array<Hash<Symbol, String>>] elements An array of values
323
+ * for weight vector.
324
+ * Each value is a Hash like the following form:
325
+ *
326
+ * <pre>
327
+ * {
328
+ * :value => [KEY],
329
+ * :weight => [WEIGHT],
330
+ * }
331
+ * </pre>
332
+ *
333
+ * @[KEY]@ must be the same type of the key of the table that is
334
+ * specified as range on creating the weight vector.
335
+ *
336
+ * @[WEIGHT]@ must be an positive integer. Note that search
337
+ * becomes @weight + 1@. It means that You want to get 10 as
338
+ * score, you should set 9 as weight.
339
+ *
340
+ * @overload []=(id, value)
341
+ * This description is for variable size columns except weight
342
+ * vector column.
343
+ *
344
+ * @param [Integer, Record] id The record ID.
345
+ * @param [::Object] value A new value.
346
+ * @see Groonga::Object#[]=
347
+ *
348
+ * @since 4.0.1
349
+ */
350
+ static VALUE
351
+ rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
352
+ {
353
+ grn_ctx *context = NULL;
354
+ grn_obj *column, *range;
355
+ grn_rc rc;
356
+ grn_id id;
357
+ grn_obj *value, *element_value;
358
+ int flags = GRN_OBJ_SET;
359
+
360
+ rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
361
+ NULL, NULL, &value, &element_value,
362
+ NULL, &range);
363
+
364
+ if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
365
+ VALUE args[2];
366
+ args[0] = rb_id;
367
+ args[1] = rb_value;
368
+ return rb_call_super(2, args);
369
+ }
370
+
371
+ id = RVAL2GRNID(rb_id, context, range, self);
372
+
373
+ grn_obj_reinit(context, value,
374
+ value->header.domain,
375
+ value->header.flags | GRN_OBJ_VECTOR);
376
+ if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) {
377
+ int i, n;
378
+ n = RARRAY_LEN(rb_value);
379
+ for (i = 0; i < n; i++) {
380
+ unsigned int weight = 0;
381
+ VALUE rb_element_value, rb_weight;
382
+
383
+ rb_grn_scan_options(RARRAY_PTR(rb_value)[i],
384
+ "value", &rb_element_value,
385
+ "weight", &rb_weight,
386
+ NULL);
387
+
388
+ if (!NIL_P(rb_weight)) {
389
+ weight = NUM2UINT(rb_weight);
390
+ }
391
+
392
+ GRN_BULK_REWIND(element_value);
393
+ if (!NIL_P(rb_element_value)) {
394
+ RVAL2GRNBULK(rb_element_value, context, element_value);
395
+ }
396
+
397
+ grn_vector_add_element(context, value,
398
+ GRN_BULK_HEAD(element_value),
399
+ GRN_BULK_VSIZE(element_value),
400
+ weight,
401
+ element_value->header.domain);
402
+ }
403
+ } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) {
404
+ HashElementToVectorElementData data;
405
+ data.context = context;
406
+ data.vector = value;
407
+ data.element_value = element_value;
408
+ rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data);
409
+ } else {
410
+ rb_raise(rb_eArgError,
411
+ "<%s>: "
412
+ "weight vector value must be an array of index value or "
413
+ "a hash that key is vector value and value is vector weight: "
414
+ "<%s>",
415
+ rb_grn_inspect(self),
416
+ rb_grn_inspect(rb_value));
417
+ }
418
+
419
+ rc = grn_obj_set_value(context, column, id, value, flags);
420
+ rb_grn_context_check(context, self);
421
+ rb_grn_rc_check(rc, self);
422
+
423
+ return rb_value;
424
+ }
425
+
31
426
  /*
32
427
  * Returns whether the column is compressed or not. If
33
428
  * @type@ is specified, it returns whether the column is
@@ -42,7 +437,7 @@ VALUE rb_cGrnVariableSizeColumn;
42
437
  static VALUE
43
438
  rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
44
439
  {
45
- RbGrnColumn *rb_grn_column;
440
+ RbGrnVariableSizeColumn *rb_grn_column;
46
441
  grn_ctx *context = NULL;
47
442
  grn_obj *column;
48
443
  grn_obj_flags flags;
@@ -112,7 +507,7 @@ rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
112
507
  static VALUE
113
508
  rb_grn_variable_size_column_defrag (int argc, VALUE *argv, VALUE self)
114
509
  {
115
- RbGrnColumn *rb_grn_column;
510
+ RbGrnVariableSizeColumn *rb_grn_column;
116
511
  grn_ctx *context = NULL;
117
512
  grn_obj *column;
118
513
  int n_segments;
@@ -143,6 +538,11 @@ rb_grn_init_variable_size_column (VALUE mGrn)
143
538
  rb_cGrnVariableSizeColumn =
144
539
  rb_define_class_under(mGrn, "VariableSizeColumn", rb_cGrnColumn);
145
540
 
541
+ rb_define_method(rb_cGrnVariableSizeColumn, "[]",
542
+ rb_grn_variable_size_column_array_reference, 1);
543
+ rb_define_method(rb_cGrnVariableSizeColumn, "[]=",
544
+ rb_grn_variable_size_column_array_set, 2);
545
+
146
546
  rb_define_method(rb_cGrnVariableSizeColumn, "compressed?",
147
547
  rb_grn_variable_size_column_compressed_p, -1);
148
548
  rb_define_method(rb_cGrnVariableSizeColumn, "defrag",