groonga 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/AUTHORS +4 -0
  2. data/NEWS.ja.rdoc +10 -0
  3. data/NEWS.rdoc +10 -0
  4. data/README.ja.rdoc +9 -3
  5. data/README.rdoc +10 -4
  6. data/Rakefile +1 -1
  7. data/TUTORIAL.ja.rdoc +3 -6
  8. data/example/bookmark.rb +1 -1
  9. data/example/search/config.ru +52 -28
  10. data/ext/rb-grn-column.c +24 -18
  11. data/ext/rb-grn-context.c +165 -17
  12. data/ext/rb-grn-encoding.c +37 -0
  13. data/ext/rb-grn-expression.c +286 -51
  14. data/ext/rb-grn-object.c +27 -8
  15. data/ext/rb-grn-operation.c +128 -22
  16. data/ext/rb-grn-patricia-trie.c +62 -0
  17. data/ext/rb-grn-snippet.c +7 -17
  18. data/ext/rb-grn-table.c +101 -31
  19. data/ext/rb-grn-utils.c +87 -22
  20. data/ext/rb-grn-variable-size-column.c +1 -1
  21. data/ext/rb-grn.h +27 -4
  22. data/ext/rb-groonga.c +12 -2
  23. data/extconf.rb +2 -1
  24. data/html/index.html +2 -2
  25. data/lib/groonga.rb +1 -0
  26. data/lib/groonga/expression-builder.rb +47 -12
  27. data/lib/groonga/patricia-trie.rb +40 -0
  28. data/lib/groonga/record.rb +17 -13
  29. data/misc/grnop2ruby.rb +49 -0
  30. data/pkg-config.rb +1 -1
  31. data/test-unit/lib/test/unit/assertions.rb +5 -2
  32. data/test-unit/lib/test/unit/autorunner.rb +19 -4
  33. data/test-unit/lib/test/unit/collector/load.rb +3 -1
  34. data/test-unit/lib/test/unit/color-scheme.rb +5 -1
  35. data/test-unit/lib/test/unit/error.rb +7 -5
  36. data/test-unit/lib/test/unit/runner/tap.rb +8 -0
  37. data/test-unit/lib/test/unit/ui/console/testrunner.rb +63 -8
  38. data/test-unit/lib/test/unit/ui/tap/testrunner.rb +92 -0
  39. data/test-unit/test/collector/test-load.rb +1 -5
  40. data/test-unit/test/test-color-scheme.rb +4 -0
  41. data/test/groonga-test-utils.rb +10 -0
  42. data/test/run-test.rb +5 -1
  43. data/test/test-column.rb +58 -0
  44. data/test/test-database.rb +8 -1
  45. data/test/test-expression.rb +48 -6
  46. data/test/test-hash.rb +7 -0
  47. data/test/test-patricia-trie.rb +39 -0
  48. data/test/test-record.rb +2 -2
  49. data/test/test-remote.rb +52 -0
  50. data/test/test-schema.rb +1 -1
  51. data/test/test-table-select-normalize.rb +48 -0
  52. data/test/test-table-select.rb +101 -0
  53. data/test/test-table.rb +0 -9
  54. data/test/test-variable-size-column.rb +28 -0
  55. metadata +16 -5
@@ -148,6 +148,43 @@ rb_grn_encoding_to_ruby_object (grn_encoding encoding)
148
148
  return rb_encoding;
149
149
  }
150
150
 
151
+ #ifdef HAVE_RUBY_ENCODING_H
152
+ rb_encoding *
153
+ rb_grn_encoding_to_ruby_encoding (grn_encoding encoding)
154
+ {
155
+ rb_encoding *rb_encoding;
156
+
157
+ if (encoding == GRN_ENC_DEFAULT)
158
+ encoding = grn_get_default_encoding();
159
+
160
+ switch (encoding) {
161
+ case GRN_ENC_NONE:
162
+ rb_encoding = rb_ascii8bit_encoding();
163
+ break;
164
+ case GRN_ENC_EUC_JP:
165
+ rb_encoding = rb_enc_find("euc-jp");
166
+ break;
167
+ case GRN_ENC_UTF8:
168
+ rb_encoding = rb_utf8_encoding();
169
+ break;
170
+ case GRN_ENC_SJIS:
171
+ rb_encoding = rb_enc_find("CP932");
172
+ break;
173
+ case GRN_ENC_LATIN1:
174
+ rb_encoding = rb_enc_find("ISO-8859-1");
175
+ break;
176
+ case GRN_ENC_KOI8R:
177
+ rb_encoding = rb_enc_find("KOI8-R");
178
+ break;
179
+ default:
180
+ rb_raise(rb_eArgError, "unknown encoding: %d", encoding);
181
+ break;
182
+ }
183
+
184
+ return rb_encoding;
185
+ }
186
+ #endif
187
+
151
188
  /*
152
189
  * call-seq:
153
190
  * Groonga::Encoding.default -> エンコーディング
@@ -70,17 +70,14 @@ rb_grn_expression_initialize (int argc, VALUE *argv, VALUE self)
70
70
  {
71
71
  grn_ctx *context = NULL;
72
72
  grn_obj *expression;
73
- VALUE options, rb_context, rb_name, rb_query, rb_table, rb_default_column;
74
- char *name = NULL, *query = NULL;
75
- unsigned name_size = 0, query_size = 0;
73
+ VALUE options, rb_context, rb_name;
74
+ char *name = NULL;
75
+ unsigned name_size = 0;
76
76
 
77
77
  rb_scan_args(argc, argv, "01", &options);
78
78
  rb_grn_scan_options(options,
79
79
  "context", &rb_context,
80
80
  "name", &rb_name,
81
- "query", &rb_query,
82
- "table", &rb_table,
83
- "default_column", &rb_default_column,
84
81
  NULL);
85
82
 
86
83
  context = rb_grn_context_ensure(&rb_context);
@@ -90,23 +87,7 @@ rb_grn_expression_initialize (int argc, VALUE *argv, VALUE self)
90
87
  name_size = RSTRING_LEN(rb_name);
91
88
  }
92
89
 
93
- if (!NIL_P(rb_query)) {
94
- query = StringValuePtr(rb_query);
95
- query_size = RSTRING_LEN(rb_query);
96
- }
97
-
98
- if (query) {
99
- grn_obj *table;
100
- grn_obj *default_column = NULL;
101
-
102
- table = RVAL2GRNOBJECT(rb_table, &context);
103
- default_column = RVAL2GRNBULK(rb_default_column, context, default_column);
104
- expression = grn_expr_create_from_str(context, name, name_size,
105
- query, query_size,
106
- table, default_column);
107
- } else {
108
- expression = grn_expr_create(context, name, name_size);
109
- }
90
+ expression = grn_expr_create(context, name, name_size);
110
91
  rb_grn_object_assign(Qnil, self, rb_context, context, expression);
111
92
  rb_grn_context_check(context, self);
112
93
 
@@ -151,49 +132,52 @@ rb_grn_expression_define_variable (int argc, VALUE *argv, VALUE self)
151
132
  }
152
133
 
153
134
  static VALUE
154
- rb_grn_expression_get_value (VALUE self, VALUE rb_offset)
155
- {
156
- grn_ctx *context = NULL;
157
- grn_obj *value, *expression;
158
- int offset;
159
-
160
- rb_grn_expression_deconstruct(SELF(self), &expression, &context,
161
- NULL, NULL,
162
- NULL, NULL, NULL);
163
-
164
- offset = NUM2INT(rb_offset);
165
- value = grn_expr_get_value(context, expression, offset);
166
- return GRNBULK2RVAL(context, value, self);
167
- }
168
-
169
- static VALUE
170
- rb_grn_expression_append_object (VALUE self, VALUE rb_object)
135
+ rb_grn_expression_append_object (int argc, VALUE *argv, VALUE self)
171
136
  {
137
+ VALUE rb_object, rb_operation, rb_n_arguments;
172
138
  grn_ctx *context = NULL;
173
139
  grn_obj *expression, *object;
140
+ grn_operator operation = GRN_OP_PUSH;
141
+ int n_arguments = 1;
142
+
143
+ rb_scan_args(argc, argv, "12", &rb_object, &rb_operation, &rb_n_arguments);
144
+ if (!NIL_P(rb_operation))
145
+ operation = NUM2INT(rb_operation);
146
+ if (!NIL_P(rb_n_arguments))
147
+ n_arguments = NUM2INT(rb_n_arguments);
174
148
 
175
149
  rb_grn_expression_deconstruct(SELF(self), &expression, &context,
176
150
  NULL, NULL,
177
151
  NULL, NULL, NULL);
178
152
 
179
153
  object = RVAL2GRNOBJECT(rb_object, &context);
180
- grn_expr_append_obj(context, expression, object);
154
+ grn_expr_append_obj(context, expression, object,
155
+ operation, n_arguments);
181
156
  rb_grn_context_check(context, self);
182
157
  return self;
183
158
  }
184
159
 
185
160
  static VALUE
186
- rb_grn_expression_append_constant (VALUE self, VALUE rb_constant)
161
+ rb_grn_expression_append_constant (int argc, VALUE *argv, VALUE self)
187
162
  {
163
+ VALUE rb_constant, rb_operator, rb_n_arguments;
188
164
  grn_ctx *context = NULL;
189
165
  grn_obj *expression, *constant = NULL;
166
+ grn_operator operator = GRN_OP_PUSH;
167
+ int n_arguments = 1;
168
+
169
+ rb_scan_args(argc, argv, "12", &rb_constant, &rb_operator, &rb_n_arguments);
170
+ if (!NIL_P(rb_operator))
171
+ operator = NUM2INT(rb_operator);
172
+ if (!NIL_P(rb_n_arguments))
173
+ n_arguments = NUM2INT(rb_n_arguments);
190
174
 
191
175
  rb_grn_expression_deconstruct(SELF(self), &expression, &context,
192
176
  NULL, NULL, NULL,
193
177
  NULL, NULL);
194
178
 
195
179
  RVAL2GRNOBJ(rb_constant, context, &constant);
196
- grn_expr_append_const(context, expression, constant);
180
+ grn_expr_append_const(context, expression, constant, operator, n_arguments);
197
181
  grn_obj_close(context, constant);
198
182
  rb_grn_context_check(context, self);
199
183
  return self;
@@ -219,18 +203,94 @@ rb_grn_expression_append_operation (VALUE self, VALUE rb_operation,
219
203
  return Qnil;
220
204
  }
221
205
 
206
+ static VALUE
207
+ rb_grn_expression_parse (int argc, VALUE *argv, VALUE self)
208
+ {
209
+ grn_ctx *context = NULL;
210
+ grn_obj *expression, *default_column = NULL;
211
+ grn_operator default_operator = GRN_OP_AND;
212
+ grn_operator default_mode = GRN_OP_MATCH;
213
+ grn_rc rc;
214
+ char *query = NULL;
215
+ unsigned query_size = 0;
216
+ int parse_level = 0;
217
+ VALUE options, rb_query, rb_default_column, rb_default_operator;
218
+ VALUE rb_default_mode, rb_parser;
219
+ VALUE exception = Qnil;
220
+
221
+ rb_scan_args(argc, argv, "11", &rb_query, &options);
222
+ rb_grn_scan_options(options,
223
+ "default_column", &rb_default_column,
224
+ "default_operator", &rb_default_operator,
225
+ "default_mode", &rb_default_mode,
226
+ "parser", &rb_parser,
227
+ NULL);
228
+
229
+ query = StringValuePtr(rb_query);
230
+ query_size = RSTRING_LEN(rb_query);
231
+
232
+ rb_grn_expression_deconstruct(SELF(self), &expression, &context,
233
+ NULL, NULL,
234
+ NULL, NULL, NULL);
235
+
236
+ default_column = RVAL2GRNBULK(rb_default_column, context, default_column);
237
+ if (!NIL_P(rb_default_mode))
238
+ default_mode = RVAL2GRNOPERATOR(rb_default_mode);
239
+ if (!NIL_P(rb_default_operator))
240
+ default_operator = RVAL2GRNOPERATOR(rb_default_operator);
241
+ if (NIL_P(rb_parser) ||
242
+ rb_grn_equal_option(rb_parser, "column") ||
243
+ rb_grn_equal_option(rb_parser, "column-query") ||
244
+ rb_grn_equal_option(rb_parser, "column_query")) {
245
+ parse_level = 0;
246
+ } else if (rb_grn_equal_option(rb_parser, "table") ||
247
+ rb_grn_equal_option(rb_parser, "table-query") ||
248
+ rb_grn_equal_option(rb_parser, "table_query")) {
249
+ parse_level = 2;
250
+ } else if (rb_grn_equal_option(rb_parser, "expression") ||
251
+ rb_grn_equal_option(rb_parser, "language")) {
252
+ parse_level = 4;
253
+ } else {
254
+ rb_raise(rb_eArgError,
255
+ "parser should be one of "
256
+ "[nil, :column, :column_query, :table, :table_query, "
257
+ ":expression, :language]: %s",
258
+ rb_grn_inspect(rb_parser));
259
+ }
260
+
261
+ rc = grn_expr_parse(context, expression, query, query_size,
262
+ default_column, default_mode, default_operator,
263
+ parse_level);
264
+ if (rc != GRN_SUCCESS) {
265
+ VALUE related_object;
266
+
267
+ related_object = rb_ary_new3(2, self, rb_ary_new4(argc, argv));
268
+ exception = rb_grn_context_to_exception(context, related_object);
269
+ }
270
+ grn_obj_close(context, default_column);
271
+
272
+ if (!NIL_P(exception))
273
+ rb_exc_raise(exception);
274
+
275
+ return Qnil;
276
+ }
277
+
222
278
  static VALUE
223
279
  rb_grn_expression_execute (VALUE self)
224
280
  {
225
281
  grn_ctx *context = NULL;
226
- grn_obj *expression, *result;
282
+ grn_obj *expression;
283
+ grn_rc rc;
227
284
 
228
285
  rb_grn_expression_deconstruct(SELF(self), &expression, &context,
229
286
  NULL, NULL,
230
287
  NULL, NULL, NULL);
231
288
 
232
- result = grn_expr_exec(context, expression);
233
- return GRNOBJ2RVAL(Qnil, context, result, self);
289
+ rc = grn_expr_exec(context, expression, 0);
290
+ rb_grn_context_check(context, self);
291
+ rb_grn_rc_check(rc, self);
292
+
293
+ return Qnil;
234
294
  }
235
295
 
236
296
  static VALUE
@@ -284,6 +344,175 @@ rb_grn_expression_array_reference (VALUE self, VALUE rb_name_or_offset)
284
344
  return Qnil;
285
345
  }
286
346
 
347
+ /* REMOVE ME */
348
+ grn_rc grn_expr_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *expr);
349
+
350
+ static VALUE
351
+ rb_grn_expression_inspect (VALUE self)
352
+ {
353
+ grn_rc rc;
354
+ grn_ctx *context = NULL;
355
+ grn_obj inspected;
356
+ grn_obj *expression;
357
+ VALUE rb_inspected;
358
+
359
+ rb_grn_expression_deconstruct(SELF(self), &expression, &context,
360
+ NULL, NULL,
361
+ NULL, NULL, NULL);
362
+
363
+ GRN_TEXT_INIT(&inspected, 0);
364
+ GRN_TEXT_PUTS(context, &inspected, "#<Groonga::Expression ");
365
+ rc = grn_expr_inspect(context, &inspected, expression);
366
+ GRN_TEXT_PUTS(context, &inspected, ">");
367
+ rb_inspected = rb_str_new(GRN_TEXT_VALUE(&inspected),
368
+ GRN_TEXT_LEN(&inspected));
369
+ GRN_OBJ_FIN(context, &inspected);
370
+
371
+ return rb_inspected;
372
+ }
373
+
374
+ /*
375
+ * call-seq:
376
+ * expression.snippet(tags, options) -> Groonga::Snippet
377
+ *
378
+ * _expression_からGroonga::Snippetを生成する。_tags_にはキー
379
+ * ワードの前後に挿入するタグの配列を以下のような形式で指定
380
+ * する。
381
+ *
382
+ * [
383
+ * ["キーワード前に挿入する文字列1", "キーワード後に挿入する文字列1"],
384
+ * ["キーワード前に挿入する文字列2", "キーワード後に挿入する文字列2"],
385
+ * ...,
386
+ * ]
387
+ *
388
+ * もし、1つのスニペットの中に_tags_で指定したタグより多くの
389
+ * キーワードが含まれている場合は、以下のように、また、先頭
390
+ * のタグから順番に使われる。
391
+ *
392
+ * expression.parse("Ruby groonga 検索")
393
+ * tags = [["<tag1>", "</tag1>"], ["<tag2>", "</tag2>"]]
394
+ * snippet = expression.snippet(tags)
395
+ * p snippet.execute("Rubyでgroonga使って全文検索、高速検索。")
396
+ * # => ["<tag1>Ruby</tag1>で<tag2>groonga</tag2>"
397
+ * # "使って全文<tag1>検索</tag1>、高速<tag2>検索</tag2>。"]
398
+ *
399
+ * _options_に指定可能な値は以下の通り。
400
+ *
401
+ * [+:normalize+]
402
+ * キーワード文字列・スニペット元の文字列を正規化するかど
403
+ * うか。省略した場合は+false+で正規化しない。
404
+ *
405
+ * [+:skip_leading_spaces+]
406
+ * 先頭の空白を無視するかどうか。省略した場合は+false+で無
407
+ * 視しない。
408
+ *
409
+ * [+:width+]
410
+ * スニペット文字列の長さ。省略した場合は100文字。
411
+ *
412
+ * [+:max_results+]
413
+ * 生成するスニペットの最大数。省略した場合は3。
414
+ *
415
+ * [+:html_escape+]
416
+ * スニペット内の+<+, +>+, +&+, +"+をHTMLエスケープするか
417
+ * どうか。省略した場合は+false+で、HTMLエスケープしない。
418
+ */
419
+ static VALUE
420
+ rb_grn_expression_snippet (int argc, VALUE *argv, VALUE self)
421
+ {
422
+ grn_ctx *context = NULL;
423
+ grn_obj *expression;
424
+ grn_snip *snippet;
425
+ VALUE options;
426
+ VALUE rb_normalize, rb_skip_leading_spaces;
427
+ VALUE rb_width, rb_max_results, rb_tags;
428
+ VALUE rb_html_escape;
429
+ VALUE *rb_tag_values;
430
+ unsigned int i;
431
+ int flags = GRN_SNIP_COPY_TAG;
432
+ unsigned int width = 100;
433
+ unsigned int max_results = 3;
434
+ unsigned int n_tags = 0;
435
+ char **open_tags = NULL;
436
+ unsigned int *open_tag_lengths = NULL;
437
+ char **close_tags = NULL;
438
+ unsigned int *close_tag_lengths = NULL;
439
+ grn_snip_mapping *mapping = NULL;
440
+
441
+ rb_grn_expression_deconstruct(SELF(self), &expression, &context,
442
+ NULL, NULL,
443
+ NULL, NULL, NULL);
444
+
445
+ rb_scan_args(argc, argv, "11", &rb_tags, &options);
446
+
447
+ rb_grn_scan_options(options,
448
+ "normalize", &rb_normalize,
449
+ "skip_leading_spaces", &rb_skip_leading_spaces,
450
+ "width", &rb_width,
451
+ "max_results", &rb_max_results,
452
+ "html_escape", &rb_html_escape,
453
+ NULL);
454
+
455
+ if (TYPE(rb_tags) != T_ARRAY) {
456
+ rb_raise(rb_eArgError,
457
+ "tags should be "
458
+ "[\"open_tag\", \"close_tag\"] or"
459
+ "[[\"open_tag1\", \"close_tag1\"], ...]: %s",
460
+ rb_grn_inspect(rb_tags));
461
+ }
462
+
463
+ if (TYPE(RARRAY_PTR(rb_tags)[0]) == T_STRING) {
464
+ rb_tags = rb_ary_new3(1, rb_tags);
465
+ }
466
+
467
+ rb_tag_values = RARRAY_PTR(rb_tags);
468
+ n_tags = RARRAY_LEN(rb_tags);
469
+ open_tags = ALLOCA_N(char *, n_tags);
470
+ open_tag_lengths = ALLOCA_N(unsigned int, n_tags);
471
+ close_tags = ALLOCA_N(char *, n_tags);
472
+ close_tag_lengths = ALLOCA_N(unsigned int, n_tags);
473
+ for (i = 0; i < n_tags; i++) {
474
+ VALUE *tag_pair;
475
+
476
+ if (TYPE(rb_tag_values[i]) != T_ARRAY ||
477
+ RARRAY_LEN(rb_tag_values[i]) != 2) {
478
+ rb_raise(rb_eArgError,
479
+ "tags should be "
480
+ "[\"open_tag\", \"close_tag\"] or"
481
+ "[[\"open_tag1\", \"close_tag1\"], ...]: %s",
482
+ rb_grn_inspect(rb_tags));
483
+ }
484
+ tag_pair = RARRAY_PTR(rb_tag_values[i]);
485
+ open_tags[i] = StringValuePtr(tag_pair[0]);
486
+ open_tag_lengths[i] = RSTRING_LEN(tag_pair[0]);
487
+ close_tags[i] = StringValuePtr(tag_pair[1]);
488
+ close_tag_lengths[i] = RSTRING_LEN(tag_pair[1]);
489
+ }
490
+
491
+ if (RVAL2CBOOL(rb_normalize))
492
+ flags |= GRN_SNIP_NORMALIZE;
493
+ if (RVAL2CBOOL(rb_skip_leading_spaces))
494
+ flags |= GRN_SNIP_SKIP_LEADING_SPACES;
495
+
496
+ if (!NIL_P(rb_width))
497
+ width = NUM2UINT(rb_width);
498
+
499
+ if (!NIL_P(rb_max_results))
500
+ max_results = NUM2UINT(rb_max_results);
501
+
502
+ if (RVAL2CBOOL(rb_html_escape))
503
+ mapping = (grn_snip_mapping *)-1;
504
+
505
+ snippet = grn_expr_snip(context, expression, flags, width, max_results,
506
+ n_tags,
507
+ (const char **)open_tags, open_tag_lengths,
508
+ (const char **)close_tags, close_tag_lengths,
509
+ mapping);
510
+ rb_grn_context_check(context,
511
+ rb_ary_new3(2, self, rb_ary_new4(argc, argv)));
512
+
513
+ return GRNSNIPPET2RVAL(context, snippet, RB_GRN_TRUE);
514
+ }
515
+
287
516
  void
288
517
  rb_grn_init_expression (VALUE mGrn)
289
518
  {
@@ -295,20 +524,26 @@ rb_grn_init_expression (VALUE mGrn)
295
524
  rb_define_method(rb_cGrnExpression, "define_variable",
296
525
  rb_grn_expression_define_variable, -1);
297
526
  rb_define_method(rb_cGrnExpression, "append_object",
298
- rb_grn_expression_append_object, 1);
527
+ rb_grn_expression_append_object, -1);
299
528
  rb_define_method(rb_cGrnExpression, "append_constant",
300
- rb_grn_expression_append_constant, 1);
529
+ rb_grn_expression_append_constant, -1);
301
530
  rb_define_method(rb_cGrnExpression, "append_operation",
302
531
  rb_grn_expression_append_operation, 2);
303
532
 
533
+ rb_define_method(rb_cGrnExpression, "parse",
534
+ rb_grn_expression_parse, -1);
535
+
304
536
  rb_define_method(rb_cGrnExpression, "execute",
305
537
  rb_grn_expression_execute, 0);
306
538
  rb_define_method(rb_cGrnExpression, "compile",
307
539
  rb_grn_expression_compile, 0);
308
540
 
309
- rb_define_method(rb_cGrnExpression, "value",
310
- rb_grn_expression_get_value, 1);
311
-
312
541
  rb_define_method(rb_cGrnExpression, "[]",
313
542
  rb_grn_expression_array_reference, 1);
543
+
544
+ rb_define_method(rb_cGrnExpression, "snippet",
545
+ rb_grn_expression_snippet, -1);
546
+
547
+ rb_define_method(rb_cGrnExpression, "inspect",
548
+ rb_grn_expression_inspect, 0);
314
549
  }