groonga 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/AUTHORS +4 -0
  2. data/NEWS.ja.rdoc +10 -0
  3. data/NEWS.rdoc +10 -0
  4. data/README.ja.rdoc +9 -3
  5. data/README.rdoc +10 -4
  6. data/Rakefile +1 -1
  7. data/TUTORIAL.ja.rdoc +3 -6
  8. data/example/bookmark.rb +1 -1
  9. data/example/search/config.ru +52 -28
  10. data/ext/rb-grn-column.c +24 -18
  11. data/ext/rb-grn-context.c +165 -17
  12. data/ext/rb-grn-encoding.c +37 -0
  13. data/ext/rb-grn-expression.c +286 -51
  14. data/ext/rb-grn-object.c +27 -8
  15. data/ext/rb-grn-operation.c +128 -22
  16. data/ext/rb-grn-patricia-trie.c +62 -0
  17. data/ext/rb-grn-snippet.c +7 -17
  18. data/ext/rb-grn-table.c +101 -31
  19. data/ext/rb-grn-utils.c +87 -22
  20. data/ext/rb-grn-variable-size-column.c +1 -1
  21. data/ext/rb-grn.h +27 -4
  22. data/ext/rb-groonga.c +12 -2
  23. data/extconf.rb +2 -1
  24. data/html/index.html +2 -2
  25. data/lib/groonga.rb +1 -0
  26. data/lib/groonga/expression-builder.rb +47 -12
  27. data/lib/groonga/patricia-trie.rb +40 -0
  28. data/lib/groonga/record.rb +17 -13
  29. data/misc/grnop2ruby.rb +49 -0
  30. data/pkg-config.rb +1 -1
  31. data/test-unit/lib/test/unit/assertions.rb +5 -2
  32. data/test-unit/lib/test/unit/autorunner.rb +19 -4
  33. data/test-unit/lib/test/unit/collector/load.rb +3 -1
  34. data/test-unit/lib/test/unit/color-scheme.rb +5 -1
  35. data/test-unit/lib/test/unit/error.rb +7 -5
  36. data/test-unit/lib/test/unit/runner/tap.rb +8 -0
  37. data/test-unit/lib/test/unit/ui/console/testrunner.rb +63 -8
  38. data/test-unit/lib/test/unit/ui/tap/testrunner.rb +92 -0
  39. data/test-unit/test/collector/test-load.rb +1 -5
  40. data/test-unit/test/test-color-scheme.rb +4 -0
  41. data/test/groonga-test-utils.rb +10 -0
  42. data/test/run-test.rb +5 -1
  43. data/test/test-column.rb +58 -0
  44. data/test/test-database.rb +8 -1
  45. data/test/test-expression.rb +48 -6
  46. data/test/test-hash.rb +7 -0
  47. data/test/test-patricia-trie.rb +39 -0
  48. data/test/test-record.rb +2 -2
  49. data/test/test-remote.rb +52 -0
  50. data/test/test-schema.rb +1 -1
  51. data/test/test-table-select-normalize.rb +48 -0
  52. data/test/test-table-select.rb +101 -0
  53. data/test/test-table.rb +0 -9
  54. data/test/test-variable-size-column.rb +28 -0
  55. metadata +16 -5
@@ -148,6 +148,43 @@ rb_grn_encoding_to_ruby_object (grn_encoding encoding)
148
148
  return rb_encoding;
149
149
  }
150
150
 
151
+ #ifdef HAVE_RUBY_ENCODING_H
152
+ rb_encoding *
153
+ rb_grn_encoding_to_ruby_encoding (grn_encoding encoding)
154
+ {
155
+ rb_encoding *rb_encoding;
156
+
157
+ if (encoding == GRN_ENC_DEFAULT)
158
+ encoding = grn_get_default_encoding();
159
+
160
+ switch (encoding) {
161
+ case GRN_ENC_NONE:
162
+ rb_encoding = rb_ascii8bit_encoding();
163
+ break;
164
+ case GRN_ENC_EUC_JP:
165
+ rb_encoding = rb_enc_find("euc-jp");
166
+ break;
167
+ case GRN_ENC_UTF8:
168
+ rb_encoding = rb_utf8_encoding();
169
+ break;
170
+ case GRN_ENC_SJIS:
171
+ rb_encoding = rb_enc_find("CP932");
172
+ break;
173
+ case GRN_ENC_LATIN1:
174
+ rb_encoding = rb_enc_find("ISO-8859-1");
175
+ break;
176
+ case GRN_ENC_KOI8R:
177
+ rb_encoding = rb_enc_find("KOI8-R");
178
+ break;
179
+ default:
180
+ rb_raise(rb_eArgError, "unknown encoding: %d", encoding);
181
+ break;
182
+ }
183
+
184
+ return rb_encoding;
185
+ }
186
+ #endif
187
+
151
188
  /*
152
189
  * call-seq:
153
190
  * Groonga::Encoding.default -> エンコーディング
@@ -70,17 +70,14 @@ rb_grn_expression_initialize (int argc, VALUE *argv, VALUE self)
70
70
  {
71
71
  grn_ctx *context = NULL;
72
72
  grn_obj *expression;
73
- VALUE options, rb_context, rb_name, rb_query, rb_table, rb_default_column;
74
- char *name = NULL, *query = NULL;
75
- unsigned name_size = 0, query_size = 0;
73
+ VALUE options, rb_context, rb_name;
74
+ char *name = NULL;
75
+ unsigned name_size = 0;
76
76
 
77
77
  rb_scan_args(argc, argv, "01", &options);
78
78
  rb_grn_scan_options(options,
79
79
  "context", &rb_context,
80
80
  "name", &rb_name,
81
- "query", &rb_query,
82
- "table", &rb_table,
83
- "default_column", &rb_default_column,
84
81
  NULL);
85
82
 
86
83
  context = rb_grn_context_ensure(&rb_context);
@@ -90,23 +87,7 @@ rb_grn_expression_initialize (int argc, VALUE *argv, VALUE self)
90
87
  name_size = RSTRING_LEN(rb_name);
91
88
  }
92
89
 
93
- if (!NIL_P(rb_query)) {
94
- query = StringValuePtr(rb_query);
95
- query_size = RSTRING_LEN(rb_query);
96
- }
97
-
98
- if (query) {
99
- grn_obj *table;
100
- grn_obj *default_column = NULL;
101
-
102
- table = RVAL2GRNOBJECT(rb_table, &context);
103
- default_column = RVAL2GRNBULK(rb_default_column, context, default_column);
104
- expression = grn_expr_create_from_str(context, name, name_size,
105
- query, query_size,
106
- table, default_column);
107
- } else {
108
- expression = grn_expr_create(context, name, name_size);
109
- }
90
+ expression = grn_expr_create(context, name, name_size);
110
91
  rb_grn_object_assign(Qnil, self, rb_context, context, expression);
111
92
  rb_grn_context_check(context, self);
112
93
 
@@ -151,49 +132,52 @@ rb_grn_expression_define_variable (int argc, VALUE *argv, VALUE self)
151
132
  }
152
133
 
153
134
  static VALUE
154
- rb_grn_expression_get_value (VALUE self, VALUE rb_offset)
155
- {
156
- grn_ctx *context = NULL;
157
- grn_obj *value, *expression;
158
- int offset;
159
-
160
- rb_grn_expression_deconstruct(SELF(self), &expression, &context,
161
- NULL, NULL,
162
- NULL, NULL, NULL);
163
-
164
- offset = NUM2INT(rb_offset);
165
- value = grn_expr_get_value(context, expression, offset);
166
- return GRNBULK2RVAL(context, value, self);
167
- }
168
-
169
- static VALUE
170
- rb_grn_expression_append_object (VALUE self, VALUE rb_object)
135
+ rb_grn_expression_append_object (int argc, VALUE *argv, VALUE self)
171
136
  {
137
+ VALUE rb_object, rb_operation, rb_n_arguments;
172
138
  grn_ctx *context = NULL;
173
139
  grn_obj *expression, *object;
140
+ grn_operator operation = GRN_OP_PUSH;
141
+ int n_arguments = 1;
142
+
143
+ rb_scan_args(argc, argv, "12", &rb_object, &rb_operation, &rb_n_arguments);
144
+ if (!NIL_P(rb_operation))
145
+ operation = NUM2INT(rb_operation);
146
+ if (!NIL_P(rb_n_arguments))
147
+ n_arguments = NUM2INT(rb_n_arguments);
174
148
 
175
149
  rb_grn_expression_deconstruct(SELF(self), &expression, &context,
176
150
  NULL, NULL,
177
151
  NULL, NULL, NULL);
178
152
 
179
153
  object = RVAL2GRNOBJECT(rb_object, &context);
180
- grn_expr_append_obj(context, expression, object);
154
+ grn_expr_append_obj(context, expression, object,
155
+ operation, n_arguments);
181
156
  rb_grn_context_check(context, self);
182
157
  return self;
183
158
  }
184
159
 
185
160
  static VALUE
186
- rb_grn_expression_append_constant (VALUE self, VALUE rb_constant)
161
+ rb_grn_expression_append_constant (int argc, VALUE *argv, VALUE self)
187
162
  {
163
+ VALUE rb_constant, rb_operator, rb_n_arguments;
188
164
  grn_ctx *context = NULL;
189
165
  grn_obj *expression, *constant = NULL;
166
+ grn_operator operator = GRN_OP_PUSH;
167
+ int n_arguments = 1;
168
+
169
+ rb_scan_args(argc, argv, "12", &rb_constant, &rb_operator, &rb_n_arguments);
170
+ if (!NIL_P(rb_operator))
171
+ operator = NUM2INT(rb_operator);
172
+ if (!NIL_P(rb_n_arguments))
173
+ n_arguments = NUM2INT(rb_n_arguments);
190
174
 
191
175
  rb_grn_expression_deconstruct(SELF(self), &expression, &context,
192
176
  NULL, NULL, NULL,
193
177
  NULL, NULL);
194
178
 
195
179
  RVAL2GRNOBJ(rb_constant, context, &constant);
196
- grn_expr_append_const(context, expression, constant);
180
+ grn_expr_append_const(context, expression, constant, operator, n_arguments);
197
181
  grn_obj_close(context, constant);
198
182
  rb_grn_context_check(context, self);
199
183
  return self;
@@ -219,18 +203,94 @@ rb_grn_expression_append_operation (VALUE self, VALUE rb_operation,
219
203
  return Qnil;
220
204
  }
221
205
 
206
+ static VALUE
207
+ rb_grn_expression_parse (int argc, VALUE *argv, VALUE self)
208
+ {
209
+ grn_ctx *context = NULL;
210
+ grn_obj *expression, *default_column = NULL;
211
+ grn_operator default_operator = GRN_OP_AND;
212
+ grn_operator default_mode = GRN_OP_MATCH;
213
+ grn_rc rc;
214
+ char *query = NULL;
215
+ unsigned query_size = 0;
216
+ int parse_level = 0;
217
+ VALUE options, rb_query, rb_default_column, rb_default_operator;
218
+ VALUE rb_default_mode, rb_parser;
219
+ VALUE exception = Qnil;
220
+
221
+ rb_scan_args(argc, argv, "11", &rb_query, &options);
222
+ rb_grn_scan_options(options,
223
+ "default_column", &rb_default_column,
224
+ "default_operator", &rb_default_operator,
225
+ "default_mode", &rb_default_mode,
226
+ "parser", &rb_parser,
227
+ NULL);
228
+
229
+ query = StringValuePtr(rb_query);
230
+ query_size = RSTRING_LEN(rb_query);
231
+
232
+ rb_grn_expression_deconstruct(SELF(self), &expression, &context,
233
+ NULL, NULL,
234
+ NULL, NULL, NULL);
235
+
236
+ default_column = RVAL2GRNBULK(rb_default_column, context, default_column);
237
+ if (!NIL_P(rb_default_mode))
238
+ default_mode = RVAL2GRNOPERATOR(rb_default_mode);
239
+ if (!NIL_P(rb_default_operator))
240
+ default_operator = RVAL2GRNOPERATOR(rb_default_operator);
241
+ if (NIL_P(rb_parser) ||
242
+ rb_grn_equal_option(rb_parser, "column") ||
243
+ rb_grn_equal_option(rb_parser, "column-query") ||
244
+ rb_grn_equal_option(rb_parser, "column_query")) {
245
+ parse_level = 0;
246
+ } else if (rb_grn_equal_option(rb_parser, "table") ||
247
+ rb_grn_equal_option(rb_parser, "table-query") ||
248
+ rb_grn_equal_option(rb_parser, "table_query")) {
249
+ parse_level = 2;
250
+ } else if (rb_grn_equal_option(rb_parser, "expression") ||
251
+ rb_grn_equal_option(rb_parser, "language")) {
252
+ parse_level = 4;
253
+ } else {
254
+ rb_raise(rb_eArgError,
255
+ "parser should be one of "
256
+ "[nil, :column, :column_query, :table, :table_query, "
257
+ ":expression, :language]: %s",
258
+ rb_grn_inspect(rb_parser));
259
+ }
260
+
261
+ rc = grn_expr_parse(context, expression, query, query_size,
262
+ default_column, default_mode, default_operator,
263
+ parse_level);
264
+ if (rc != GRN_SUCCESS) {
265
+ VALUE related_object;
266
+
267
+ related_object = rb_ary_new3(2, self, rb_ary_new4(argc, argv));
268
+ exception = rb_grn_context_to_exception(context, related_object);
269
+ }
270
+ grn_obj_close(context, default_column);
271
+
272
+ if (!NIL_P(exception))
273
+ rb_exc_raise(exception);
274
+
275
+ return Qnil;
276
+ }
277
+
222
278
  static VALUE
223
279
  rb_grn_expression_execute (VALUE self)
224
280
  {
225
281
  grn_ctx *context = NULL;
226
- grn_obj *expression, *result;
282
+ grn_obj *expression;
283
+ grn_rc rc;
227
284
 
228
285
  rb_grn_expression_deconstruct(SELF(self), &expression, &context,
229
286
  NULL, NULL,
230
287
  NULL, NULL, NULL);
231
288
 
232
- result = grn_expr_exec(context, expression);
233
- return GRNOBJ2RVAL(Qnil, context, result, self);
289
+ rc = grn_expr_exec(context, expression, 0);
290
+ rb_grn_context_check(context, self);
291
+ rb_grn_rc_check(rc, self);
292
+
293
+ return Qnil;
234
294
  }
235
295
 
236
296
  static VALUE
@@ -284,6 +344,175 @@ rb_grn_expression_array_reference (VALUE self, VALUE rb_name_or_offset)
284
344
  return Qnil;
285
345
  }
286
346
 
347
+ /* REMOVE ME */
348
+ grn_rc grn_expr_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *expr);
349
+
350
+ static VALUE
351
+ rb_grn_expression_inspect (VALUE self)
352
+ {
353
+ grn_rc rc;
354
+ grn_ctx *context = NULL;
355
+ grn_obj inspected;
356
+ grn_obj *expression;
357
+ VALUE rb_inspected;
358
+
359
+ rb_grn_expression_deconstruct(SELF(self), &expression, &context,
360
+ NULL, NULL,
361
+ NULL, NULL, NULL);
362
+
363
+ GRN_TEXT_INIT(&inspected, 0);
364
+ GRN_TEXT_PUTS(context, &inspected, "#<Groonga::Expression ");
365
+ rc = grn_expr_inspect(context, &inspected, expression);
366
+ GRN_TEXT_PUTS(context, &inspected, ">");
367
+ rb_inspected = rb_str_new(GRN_TEXT_VALUE(&inspected),
368
+ GRN_TEXT_LEN(&inspected));
369
+ GRN_OBJ_FIN(context, &inspected);
370
+
371
+ return rb_inspected;
372
+ }
373
+
374
+ /*
375
+ * call-seq:
376
+ * expression.snippet(tags, options) -> Groonga::Snippet
377
+ *
378
+ * _expression_からGroonga::Snippetを生成する。_tags_にはキー
379
+ * ワードの前後に挿入するタグの配列を以下のような形式で指定
380
+ * する。
381
+ *
382
+ * [
383
+ * ["キーワード前に挿入する文字列1", "キーワード後に挿入する文字列1"],
384
+ * ["キーワード前に挿入する文字列2", "キーワード後に挿入する文字列2"],
385
+ * ...,
386
+ * ]
387
+ *
388
+ * もし、1つのスニペットの中に_tags_で指定したタグより多くの
389
+ * キーワードが含まれている場合は、以下のように、また、先頭
390
+ * のタグから順番に使われる。
391
+ *
392
+ * expression.parse("Ruby groonga 検索")
393
+ * tags = [["<tag1>", "</tag1>"], ["<tag2>", "</tag2>"]]
394
+ * snippet = expression.snippet(tags)
395
+ * p snippet.execute("Rubyでgroonga使って全文検索、高速検索。")
396
+ * # => ["<tag1>Ruby</tag1>で<tag2>groonga</tag2>"
397
+ * # "使って全文<tag1>検索</tag1>、高速<tag2>検索</tag2>。"]
398
+ *
399
+ * _options_に指定可能な値は以下の通り。
400
+ *
401
+ * [+:normalize+]
402
+ * キーワード文字列・スニペット元の文字列を正規化するかど
403
+ * うか。省略した場合は+false+で正規化しない。
404
+ *
405
+ * [+:skip_leading_spaces+]
406
+ * 先頭の空白を無視するかどうか。省略した場合は+false+で無
407
+ * 視しない。
408
+ *
409
+ * [+:width+]
410
+ * スニペット文字列の長さ。省略した場合は100文字。
411
+ *
412
+ * [+:max_results+]
413
+ * 生成するスニペットの最大数。省略した場合は3。
414
+ *
415
+ * [+:html_escape+]
416
+ * スニペット内の+<+, +>+, +&+, +"+をHTMLエスケープするか
417
+ * どうか。省略した場合は+false+で、HTMLエスケープしない。
418
+ */
419
+ static VALUE
420
+ rb_grn_expression_snippet (int argc, VALUE *argv, VALUE self)
421
+ {
422
+ grn_ctx *context = NULL;
423
+ grn_obj *expression;
424
+ grn_snip *snippet;
425
+ VALUE options;
426
+ VALUE rb_normalize, rb_skip_leading_spaces;
427
+ VALUE rb_width, rb_max_results, rb_tags;
428
+ VALUE rb_html_escape;
429
+ VALUE *rb_tag_values;
430
+ unsigned int i;
431
+ int flags = GRN_SNIP_COPY_TAG;
432
+ unsigned int width = 100;
433
+ unsigned int max_results = 3;
434
+ unsigned int n_tags = 0;
435
+ char **open_tags = NULL;
436
+ unsigned int *open_tag_lengths = NULL;
437
+ char **close_tags = NULL;
438
+ unsigned int *close_tag_lengths = NULL;
439
+ grn_snip_mapping *mapping = NULL;
440
+
441
+ rb_grn_expression_deconstruct(SELF(self), &expression, &context,
442
+ NULL, NULL,
443
+ NULL, NULL, NULL);
444
+
445
+ rb_scan_args(argc, argv, "11", &rb_tags, &options);
446
+
447
+ rb_grn_scan_options(options,
448
+ "normalize", &rb_normalize,
449
+ "skip_leading_spaces", &rb_skip_leading_spaces,
450
+ "width", &rb_width,
451
+ "max_results", &rb_max_results,
452
+ "html_escape", &rb_html_escape,
453
+ NULL);
454
+
455
+ if (TYPE(rb_tags) != T_ARRAY) {
456
+ rb_raise(rb_eArgError,
457
+ "tags should be "
458
+ "[\"open_tag\", \"close_tag\"] or"
459
+ "[[\"open_tag1\", \"close_tag1\"], ...]: %s",
460
+ rb_grn_inspect(rb_tags));
461
+ }
462
+
463
+ if (TYPE(RARRAY_PTR(rb_tags)[0]) == T_STRING) {
464
+ rb_tags = rb_ary_new3(1, rb_tags);
465
+ }
466
+
467
+ rb_tag_values = RARRAY_PTR(rb_tags);
468
+ n_tags = RARRAY_LEN(rb_tags);
469
+ open_tags = ALLOCA_N(char *, n_tags);
470
+ open_tag_lengths = ALLOCA_N(unsigned int, n_tags);
471
+ close_tags = ALLOCA_N(char *, n_tags);
472
+ close_tag_lengths = ALLOCA_N(unsigned int, n_tags);
473
+ for (i = 0; i < n_tags; i++) {
474
+ VALUE *tag_pair;
475
+
476
+ if (TYPE(rb_tag_values[i]) != T_ARRAY ||
477
+ RARRAY_LEN(rb_tag_values[i]) != 2) {
478
+ rb_raise(rb_eArgError,
479
+ "tags should be "
480
+ "[\"open_tag\", \"close_tag\"] or"
481
+ "[[\"open_tag1\", \"close_tag1\"], ...]: %s",
482
+ rb_grn_inspect(rb_tags));
483
+ }
484
+ tag_pair = RARRAY_PTR(rb_tag_values[i]);
485
+ open_tags[i] = StringValuePtr(tag_pair[0]);
486
+ open_tag_lengths[i] = RSTRING_LEN(tag_pair[0]);
487
+ close_tags[i] = StringValuePtr(tag_pair[1]);
488
+ close_tag_lengths[i] = RSTRING_LEN(tag_pair[1]);
489
+ }
490
+
491
+ if (RVAL2CBOOL(rb_normalize))
492
+ flags |= GRN_SNIP_NORMALIZE;
493
+ if (RVAL2CBOOL(rb_skip_leading_spaces))
494
+ flags |= GRN_SNIP_SKIP_LEADING_SPACES;
495
+
496
+ if (!NIL_P(rb_width))
497
+ width = NUM2UINT(rb_width);
498
+
499
+ if (!NIL_P(rb_max_results))
500
+ max_results = NUM2UINT(rb_max_results);
501
+
502
+ if (RVAL2CBOOL(rb_html_escape))
503
+ mapping = (grn_snip_mapping *)-1;
504
+
505
+ snippet = grn_expr_snip(context, expression, flags, width, max_results,
506
+ n_tags,
507
+ (const char **)open_tags, open_tag_lengths,
508
+ (const char **)close_tags, close_tag_lengths,
509
+ mapping);
510
+ rb_grn_context_check(context,
511
+ rb_ary_new3(2, self, rb_ary_new4(argc, argv)));
512
+
513
+ return GRNSNIPPET2RVAL(context, snippet, RB_GRN_TRUE);
514
+ }
515
+
287
516
  void
288
517
  rb_grn_init_expression (VALUE mGrn)
289
518
  {
@@ -295,20 +524,26 @@ rb_grn_init_expression (VALUE mGrn)
295
524
  rb_define_method(rb_cGrnExpression, "define_variable",
296
525
  rb_grn_expression_define_variable, -1);
297
526
  rb_define_method(rb_cGrnExpression, "append_object",
298
- rb_grn_expression_append_object, 1);
527
+ rb_grn_expression_append_object, -1);
299
528
  rb_define_method(rb_cGrnExpression, "append_constant",
300
- rb_grn_expression_append_constant, 1);
529
+ rb_grn_expression_append_constant, -1);
301
530
  rb_define_method(rb_cGrnExpression, "append_operation",
302
531
  rb_grn_expression_append_operation, 2);
303
532
 
533
+ rb_define_method(rb_cGrnExpression, "parse",
534
+ rb_grn_expression_parse, -1);
535
+
304
536
  rb_define_method(rb_cGrnExpression, "execute",
305
537
  rb_grn_expression_execute, 0);
306
538
  rb_define_method(rb_cGrnExpression, "compile",
307
539
  rb_grn_expression_compile, 0);
308
540
 
309
- rb_define_method(rb_cGrnExpression, "value",
310
- rb_grn_expression_get_value, 1);
311
-
312
541
  rb_define_method(rb_cGrnExpression, "[]",
313
542
  rb_grn_expression_array_reference, 1);
543
+
544
+ rb_define_method(rb_cGrnExpression, "snippet",
545
+ rb_grn_expression_snippet, -1);
546
+
547
+ rb_define_method(rb_cGrnExpression, "inspect",
548
+ rb_grn_expression_inspect, 0);
314
549
  }