rroonga 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -444,6 +444,13 @@ rb_grn_object_assign (VALUE klass, VALUE self, VALUE rb_context,
444
444
  context, object);
445
445
  rb_grn_index_column_bind(RB_GRN_INDEX_COLUMN(rb_grn_object),
446
446
  context, object);
447
+ } else if (RVAL2CBOOL(rb_obj_is_kind_of(self, rb_cGrnVariableSizeColumn))) {
448
+ rb_grn_object = ALLOC(RbGrnVariableSizeColumn);
449
+ rb_grn_object_bind_common(klass, self, rb_context, rb_grn_object,
450
+ context, object);
451
+ rb_grn_variable_size_column_bind(RB_GRN_VARIABLE_SIZE_COLUMN(rb_grn_object),
452
+ context,
453
+ object);
447
454
  } else if (RVAL2CBOOL(rb_obj_is_kind_of(self, rb_cGrnColumn))) {
448
455
  rb_grn_object = ALLOC(RbGrnColumn);
449
456
  rb_grn_object_bind_common(klass, self, rb_context, rb_grn_object,
@@ -226,6 +226,11 @@ rb_grn_table_inspect (VALUE self)
226
226
  *
227
227
  * - +:scalar+ := スカラ値(単独の値)を格納する。
228
228
  * - +:vector+ := 値の配列を格納する。
229
+ * @option options [Boolean] :with_weight (false)
230
+ * It specifies whether making the column weight vector column or not.
231
+ * Weight vector column can store weight for each element.
232
+ *
233
+ * You can't use this option for scalar column.
229
234
  * @option options :compress
230
235
  * 値の圧縮方法を指定する。省略した場合は、圧縮しない。
231
236
  *
@@ -244,7 +249,7 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self)
244
249
  unsigned name_size = 0;
245
250
  grn_obj_flags flags = 0;
246
251
  VALUE rb_name, rb_value_type;
247
- VALUE options, rb_path, rb_persistent, rb_compress, rb_type;
252
+ VALUE options, rb_path, rb_persistent, rb_compress, rb_type, rb_with_weight;
248
253
  VALUE columns;
249
254
  VALUE rb_column;
250
255
 
@@ -262,6 +267,7 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self)
262
267
  "path", &rb_path,
263
268
  "persistent", &rb_persistent,
264
269
  "type", &rb_type,
270
+ "with_weight", &rb_with_weight,
265
271
  "compress", &rb_compress,
266
272
  NULL);
267
273
 
@@ -294,6 +300,15 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self)
294
300
  rb_grn_inspect(rb_type));
295
301
  }
296
302
 
303
+ if (RVAL2CBOOL(rb_with_weight)) {
304
+ if (flags & GRN_OBJ_COLUMN_VECTOR) {
305
+ flags |= GRN_OBJ_WITH_WEIGHT;
306
+ } else {
307
+ rb_raise(rb_eArgError,
308
+ "can't use weight for scalar column");
309
+ }
310
+ }
311
+
297
312
  if (NIL_P(rb_compress)) {
298
313
  } else if (rb_grn_equal_option(rb_compress, "zlib")) {
299
314
  flags |= GRN_OBJ_COMPRESS_ZLIB;
@@ -1268,7 +1283,8 @@ rb_grn_table_group (int argc, VALUE *argv, VALUE self)
1268
1283
  grn_obj *table;
1269
1284
  grn_table_sort_key *keys;
1270
1285
  grn_table_group_result *results;
1271
- int i, n_keys, n_results, max_n_sub_records = 0;
1286
+ int i, n_keys, n_results;
1287
+ unsigned int max_n_sub_records = 0;
1272
1288
  grn_rc rc;
1273
1289
  VALUE rb_keys, rb_options, rb_max_n_sub_records;
1274
1290
  VALUE *rb_group_keys;
@@ -1294,7 +1310,7 @@ rb_grn_table_group (int argc, VALUE *argv, VALUE self)
1294
1310
  NULL);
1295
1311
 
1296
1312
  if (!NIL_P(rb_max_n_sub_records))
1297
- max_n_sub_records = NUM2INT(rb_max_n_sub_records);
1313
+ max_n_sub_records = NUM2UINT(rb_max_n_sub_records);
1298
1314
 
1299
1315
  keys = ALLOCA_N(grn_table_sort_key, n_keys);
1300
1316
  for (i = 0; i < n_keys; i++) {
@@ -123,6 +123,10 @@ rb_grn_scan_options (VALUE options, ...)
123
123
  rb_key = RB_GRN_INTERN(key);
124
124
  rb_ary_push(available_keys, rb_key);
125
125
  *value = rb_funcall(options, rb_intern("delete"), 1, rb_key);
126
+ if (NIL_P(*value)) {
127
+ rb_key = rb_str_new_cstr(key);
128
+ *value = rb_funcall(options, rb_intern("delete"), 1, rb_key);
129
+ }
126
130
 
127
131
  key = va_arg(args, const char *);
128
132
  }
@@ -652,7 +656,6 @@ rb_grn_vector_to_ruby_object (grn_ctx *context, grn_obj *vector)
652
656
  grn_obj_reinit(context, &value, domain, 0);
653
657
  grn_bulk_write(context, &value, _value, length);
654
658
  rb_ary_push(array, GRNOBJ2RVAL(Qnil, context, &value, Qnil));
655
- /* UINT2NUM(weight); */ /* TODO: How handle weight? */
656
659
  }
657
660
  GRN_OBJ_FIN(context, &value);
658
661
 
@@ -1,6 +1,6 @@
1
1
  /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
2
  /*
3
- Copyright (C) 2009-2011 Kouhei Sutou <kou@clear-code.com>
3
+ Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -18,16 +18,411 @@
18
18
 
19
19
  #include "rb-grn.h"
20
20
 
21
- #define SELF(object) ((RbGrnColumn *)DATA_PTR(object))
21
+ #define SELF(object) ((RbGrnVariableSizeColumn *)DATA_PTR(object))
22
22
 
23
23
  VALUE rb_cGrnVariableSizeColumn;
24
24
 
25
+ void
26
+ rb_grn_variable_size_column_bind (RbGrnVariableSizeColumn *rb_column,
27
+ grn_ctx *context, grn_obj *column)
28
+ {
29
+ RbGrnObject *rb_grn_object;
30
+ int column_type;
31
+ unsigned char value_type;
32
+
33
+ rb_grn_object = RB_GRN_OBJECT(rb_column);
34
+ rb_grn_column_bind(RB_GRN_COLUMN(rb_column), context, column);
35
+
36
+ rb_column->element_value = NULL;
37
+ column_type = (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK);
38
+ if (column_type != GRN_OBJ_COLUMN_VECTOR) {
39
+ return;
40
+ }
41
+
42
+ switch (rb_grn_object->range->header.type) {
43
+ case GRN_TABLE_HASH_KEY:
44
+ case GRN_TABLE_PAT_KEY:
45
+ case GRN_TABLE_DAT_KEY:
46
+ case GRN_TABLE_NO_KEY:
47
+ value_type = GRN_UVECTOR;
48
+ break;
49
+ default:
50
+ value_type = GRN_VECTOR;
51
+ break;
52
+ }
53
+ if (column->header.flags & GRN_OBJ_WITH_WEIGHT) {
54
+ rb_column->element_value = grn_obj_open(context, value_type, 0,
55
+ rb_grn_object->range_id);
56
+ }
57
+ }
58
+
59
+ void
60
+ rb_grn_variable_size_column_finalizer (grn_ctx *context, grn_obj *grn_object,
61
+ RbGrnVariableSizeColumn *rb_column)
62
+ {
63
+ rb_grn_column_finalizer(context, grn_object,
64
+ RB_GRN_COLUMN(rb_column));
65
+ if (context && rb_column->element_value)
66
+ grn_obj_unlink(context, rb_column->element_value);
67
+ rb_column->element_value = NULL;
68
+ }
69
+
70
+ static void
71
+ rb_grn_variable_size_column_deconstruct (RbGrnVariableSizeColumn *rb_column,
72
+ grn_obj **column,
73
+ grn_ctx **context,
74
+ grn_id *domain_id,
75
+ grn_obj **domain,
76
+ grn_obj **value,
77
+ grn_obj **element_value,
78
+ grn_id *range_id,
79
+ grn_obj **range)
80
+ {
81
+ RbGrnColumn *rb_grn_column;
82
+
83
+ rb_grn_column = RB_GRN_COLUMN(rb_column);
84
+ rb_grn_column_deconstruct(rb_grn_column, column, context,
85
+ domain_id, domain, value,
86
+ range_id, range);
87
+
88
+ if (element_value)
89
+ *element_value = rb_column->element_value;
90
+ }
91
+
25
92
  /*
26
93
  * Document-class: Groonga::VariableSizeColumn < Groonga::Column
27
94
  *
28
- * 可変長データ用のカラム。
95
+ * A column for variable size data like text family types and vector
96
+ * column.
29
97
  */
30
98
 
99
+ /*
100
+ * It gets a value of variable size column value for the record that
101
+ * ID is _id_.
102
+ *
103
+ * @example Gets weight vector value
104
+ * Groonga::Schema.define do |schema|
105
+ * schema.create_table("Products",
106
+ * :type => :patricia_trie,
107
+ * :key_type => "ShortText") do |table|
108
+ * # This is weight vector.
109
+ * # ":with_weight => true" is important to store weight value.
110
+ * table.short_text("tags",
111
+ * :type => :vector,
112
+ * :with_weight => true)
113
+ * end
114
+ * end
115
+ *
116
+ * products = Groonga["Products"]
117
+ * rroonga = products.add("Rroonga")
118
+ * rroonga.tags = [
119
+ * {
120
+ * :value => "ruby",
121
+ * :weight => 100,
122
+ * },
123
+ * {
124
+ * :value => "groonga",
125
+ * :weight => 10,
126
+ * },
127
+ * ]
128
+ *
129
+ * p rroonga.tags
130
+ * # => [
131
+ * # {:value => "ruby", :weight => 100},
132
+ * # {:value => "groonga", :weight => 10}
133
+ * # ]
134
+ *
135
+ * @overload [](id)
136
+ * @param [Integer, Record] id The record ID.
137
+ * @return [Array<Hash<Symbol, String>>] An array of value if the column
138
+ * is a weight vector column.
139
+ * Each value is a Hash like the following form:
140
+ *
141
+ * <pre>
142
+ * {
143
+ * :value => [KEY],
144
+ * :weight => [WEIGHT],
145
+ * }
146
+ * </pre>
147
+ *
148
+ * @[KEY]@ is the key of the table that is specified as range on
149
+ * creating the weight vector.
150
+ *
151
+ * @[WEIGHT]@ is a positive integer.
152
+ *
153
+ * @return [::Object] See {Groonga::Object#[]} for columns except
154
+ * weight vector column.
155
+ *
156
+ * @since 4.0.1.
157
+ */
158
+ static VALUE
159
+ rb_grn_variable_size_column_array_reference (VALUE self, VALUE rb_id)
160
+ {
161
+ grn_ctx *context = NULL;
162
+ grn_obj *column, *range;
163
+ grn_id id;
164
+ grn_obj *value;
165
+ VALUE rb_value;
166
+ unsigned int i, n;
167
+
168
+ rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
169
+ NULL, NULL, &value, NULL,
170
+ NULL, &range);
171
+
172
+ if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
173
+ return rb_call_super(1, &rb_id);
174
+ }
175
+
176
+ id = RVAL2GRNID(rb_id, context, range, self);
177
+
178
+ grn_obj_reinit(context, value,
179
+ value->header.domain,
180
+ value->header.flags | GRN_OBJ_VECTOR);
181
+ grn_obj_get_value(context, column, id, value);
182
+ rb_grn_context_check(context, self);
183
+
184
+ n = grn_vector_size(context, value);
185
+ rb_value = rb_ary_new2(n);
186
+ for (i = 0; i < n; i++) {
187
+ const char *element_value;
188
+ unsigned int element_value_length;
189
+ unsigned int weight = 0;
190
+ grn_id domain;
191
+ VALUE rb_element;
192
+
193
+ element_value_length = grn_vector_get_element(context,
194
+ value,
195
+ i,
196
+ &element_value,
197
+ &weight,
198
+ &domain);
199
+ rb_element = rb_hash_new();
200
+ rb_hash_aset(rb_element,
201
+ ID2SYM(rb_intern("value")),
202
+ rb_str_new(element_value, element_value_length));
203
+ rb_hash_aset(rb_element,
204
+ ID2SYM(rb_intern("weight")),
205
+ UINT2NUM(weight));
206
+
207
+ rb_ary_push(rb_value, rb_element);
208
+ }
209
+
210
+ return rb_value;
211
+ }
212
+
213
+ typedef struct {
214
+ grn_ctx *context;
215
+ grn_obj *vector;
216
+ grn_obj *element_value;
217
+ } HashElementToVectorElementData;
218
+
219
+ static int
220
+ hash_element_to_vector_element(VALUE key, VALUE value, VALUE user_data)
221
+ {
222
+ HashElementToVectorElementData *data =
223
+ (HashElementToVectorElementData *)user_data;
224
+ unsigned int weight;
225
+
226
+ GRN_BULK_REWIND(data->element_value);
227
+ RVAL2GRNBULK(key, data->context, data->element_value);
228
+
229
+ weight = NUM2UINT(value);
230
+ grn_vector_add_element(data->context, data->vector,
231
+ GRN_BULK_HEAD(data->element_value),
232
+ GRN_BULK_VSIZE(data->element_value),
233
+ weight,
234
+ data->element_value->header.domain);
235
+
236
+ return ST_CONTINUE;
237
+ }
238
+
239
+ /*
240
+ * It updates a value of variable size column value for the record
241
+ * that ID is _id_.
242
+ *
243
+ * Weight vector column is a special variable size column. This
244
+ * description describes only weight vector column. Other variable
245
+ * size column works what you think.
246
+ *
247
+ * @example Use weight vector as matrix search result weight
248
+ * Groonga::Schema.define do |schema|
249
+ * schema.create_table("Products",
250
+ * :type => :patricia_trie,
251
+ * :key_type => "ShortText") do |table|
252
+ * # This is weight vector.
253
+ * # ":with_weight => true" is important for matrix search result weight.
254
+ * table.short_text("tags",
255
+ * :type => :vector,
256
+ * :with_weight => true)
257
+ * end
258
+ *
259
+ * schema.create_table("Tags",
260
+ * :type => :hash,
261
+ * :key_type => "ShortText") do |table|
262
+ * # This is inverted index. It also needs ":with_weight => true".
263
+ * table.index("Products.tags", :with_weight => true)
264
+ * end
265
+ * end
266
+ *
267
+ * products = Groonga["Products"]
268
+ * groonga = products.add("Groonga")
269
+ * groonga.tags = [
270
+ * {
271
+ * :value => "groonga",
272
+ * :weight => 100,
273
+ * },
274
+ * ]
275
+ * rroonga = products.add("Rroonga")
276
+ * rroonga.tags = [
277
+ * {
278
+ * :value => "ruby",
279
+ * :weight => 100,
280
+ * },
281
+ * {
282
+ * :value => "groonga",
283
+ * :weight => 10,
284
+ * },
285
+ * ]
286
+ *
287
+ * result = products.select do |record|
288
+ * # Search by "groonga"
289
+ * record.match("groonga") do |match_target|
290
+ * match_target.tags
291
+ * end
292
+ * end
293
+ *
294
+ * result.each do |record|
295
+ * p [record.key.key, record.score]
296
+ * end
297
+ * # Matches all records with weight.
298
+ * # => ["Groonga", 101]
299
+ * # ["Rroonga", 11]
300
+ *
301
+ * # Increases score for "ruby" 10 times
302
+ * products.select(# The previous search result. Required.
303
+ * :result => result,
304
+ * # It just adds score to existing records in the result. Required.
305
+ * :operator => Groonga::Operator::ADJUST) do |record|
306
+ * record.match("ruby") do |target|
307
+ * target.tags * 10 # 10 times
308
+ * end
309
+ * end
310
+ *
311
+ * result.each do |record|
312
+ * p [record.key.key, record.score]
313
+ * end
314
+ * # Weight is used for increasing score.
315
+ * # => ["Groonga", 101] <- Not changed.
316
+ * # ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased.
317
+ *
318
+ * @overload []=(id, elements)
319
+ * This description is for weight vector column.
320
+ *
321
+ * @param [Integer, Record] id The record ID.
322
+ * @param [Array<Hash<Symbol, String>>] elements An array of values
323
+ * for weight vector.
324
+ * Each value is a Hash like the following form:
325
+ *
326
+ * <pre>
327
+ * {
328
+ * :value => [KEY],
329
+ * :weight => [WEIGHT],
330
+ * }
331
+ * </pre>
332
+ *
333
+ * @[KEY]@ must be the same type of the key of the table that is
334
+ * specified as range on creating the weight vector.
335
+ *
336
+ * @[WEIGHT]@ must be an positive integer. Note that search
337
+ * becomes @weight + 1@. It means that You want to get 10 as
338
+ * score, you should set 9 as weight.
339
+ *
340
+ * @overload []=(id, value)
341
+ * This description is for variable size columns except weight
342
+ * vector column.
343
+ *
344
+ * @param [Integer, Record] id The record ID.
345
+ * @param [::Object] value A new value.
346
+ * @see Groonga::Object#[]=
347
+ *
348
+ * @since 4.0.1
349
+ */
350
+ static VALUE
351
+ rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
352
+ {
353
+ grn_ctx *context = NULL;
354
+ grn_obj *column, *range;
355
+ grn_rc rc;
356
+ grn_id id;
357
+ grn_obj *value, *element_value;
358
+ int flags = GRN_OBJ_SET;
359
+
360
+ rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
361
+ NULL, NULL, &value, &element_value,
362
+ NULL, &range);
363
+
364
+ if (!(column->header.flags & GRN_OBJ_WITH_WEIGHT)) {
365
+ VALUE args[2];
366
+ args[0] = rb_id;
367
+ args[1] = rb_value;
368
+ return rb_call_super(2, args);
369
+ }
370
+
371
+ id = RVAL2GRNID(rb_id, context, range, self);
372
+
373
+ grn_obj_reinit(context, value,
374
+ value->header.domain,
375
+ value->header.flags | GRN_OBJ_VECTOR);
376
+ if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) {
377
+ int i, n;
378
+ n = RARRAY_LEN(rb_value);
379
+ for (i = 0; i < n; i++) {
380
+ unsigned int weight = 0;
381
+ VALUE rb_element_value, rb_weight;
382
+
383
+ rb_grn_scan_options(RARRAY_PTR(rb_value)[i],
384
+ "value", &rb_element_value,
385
+ "weight", &rb_weight,
386
+ NULL);
387
+
388
+ if (!NIL_P(rb_weight)) {
389
+ weight = NUM2UINT(rb_weight);
390
+ }
391
+
392
+ GRN_BULK_REWIND(element_value);
393
+ if (!NIL_P(rb_element_value)) {
394
+ RVAL2GRNBULK(rb_element_value, context, element_value);
395
+ }
396
+
397
+ grn_vector_add_element(context, value,
398
+ GRN_BULK_HEAD(element_value),
399
+ GRN_BULK_VSIZE(element_value),
400
+ weight,
401
+ element_value->header.domain);
402
+ }
403
+ } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) {
404
+ HashElementToVectorElementData data;
405
+ data.context = context;
406
+ data.vector = value;
407
+ data.element_value = element_value;
408
+ rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data);
409
+ } else {
410
+ rb_raise(rb_eArgError,
411
+ "<%s>: "
412
+ "weight vector value must be an array of index value or "
413
+ "a hash that key is vector value and value is vector weight: "
414
+ "<%s>",
415
+ rb_grn_inspect(self),
416
+ rb_grn_inspect(rb_value));
417
+ }
418
+
419
+ rc = grn_obj_set_value(context, column, id, value, flags);
420
+ rb_grn_context_check(context, self);
421
+ rb_grn_rc_check(rc, self);
422
+
423
+ return rb_value;
424
+ }
425
+
31
426
  /*
32
427
  * Returns whether the column is compressed or not. If
33
428
  * @type@ is specified, it returns whether the column is
@@ -42,7 +437,7 @@ VALUE rb_cGrnVariableSizeColumn;
42
437
  static VALUE
43
438
  rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
44
439
  {
45
- RbGrnColumn *rb_grn_column;
440
+ RbGrnVariableSizeColumn *rb_grn_column;
46
441
  grn_ctx *context = NULL;
47
442
  grn_obj *column;
48
443
  grn_obj_flags flags;
@@ -112,7 +507,7 @@ rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
112
507
  static VALUE
113
508
  rb_grn_variable_size_column_defrag (int argc, VALUE *argv, VALUE self)
114
509
  {
115
- RbGrnColumn *rb_grn_column;
510
+ RbGrnVariableSizeColumn *rb_grn_column;
116
511
  grn_ctx *context = NULL;
117
512
  grn_obj *column;
118
513
  int n_segments;
@@ -143,6 +538,11 @@ rb_grn_init_variable_size_column (VALUE mGrn)
143
538
  rb_cGrnVariableSizeColumn =
144
539
  rb_define_class_under(mGrn, "VariableSizeColumn", rb_cGrnColumn);
145
540
 
541
+ rb_define_method(rb_cGrnVariableSizeColumn, "[]",
542
+ rb_grn_variable_size_column_array_reference, 1);
543
+ rb_define_method(rb_cGrnVariableSizeColumn, "[]=",
544
+ rb_grn_variable_size_column_array_set, 2);
545
+
146
546
  rb_define_method(rb_cGrnVariableSizeColumn, "compressed?",
147
547
  rb_grn_variable_size_column_compressed_p, -1);
148
548
  rb_define_method(rb_cGrnVariableSizeColumn, "defrag",