rroonga 4.0.5 → 4.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +1 -1
  3. data/benchmark/common.rb +4 -4
  4. data/benchmark/create-wikipedia-database.rb +5 -5
  5. data/benchmark/read-write-many-small-items.rb +8 -8
  6. data/benchmark/repeat-load.rb +4 -4
  7. data/benchmark/select.rb +9 -9
  8. data/benchmark/write-many-small-items.rb +8 -8
  9. data/doc/images/sample-schema.png +0 -0
  10. data/doc/text/install.textile +2 -2
  11. data/doc/text/news.textile +41 -0
  12. data/doc/text/tutorial.textile +4 -4
  13. data/example/bookmark.rb +6 -7
  14. data/example/index-html.rb +6 -6
  15. data/ext/groonga/extconf.rb +23 -1
  16. data/ext/groonga/rb-grn-column.c +3 -2
  17. data/ext/groonga/rb-grn-context.c +20 -3
  18. data/ext/groonga/rb-grn-double-array-trie.c +11 -2
  19. data/ext/groonga/rb-grn-exception.c +28 -10
  20. data/ext/groonga/rb-grn-expression.c +78 -0
  21. data/ext/groonga/rb-grn-hash.c +10 -0
  22. data/ext/groonga/rb-grn-logger.c +7 -3
  23. data/ext/groonga/rb-grn-object.c +3 -2
  24. data/ext/groonga/rb-grn-patricia-trie.c +23 -20
  25. data/ext/groonga/rb-grn-table-key-support.c +92 -1
  26. data/ext/groonga/rb-grn-table.c +60 -17
  27. data/ext/groonga/rb-grn-utils.c +51 -2
  28. data/ext/groonga/rb-grn-variable-size-column.c +11 -7
  29. data/ext/groonga/rb-grn.h +11 -1
  30. data/lib/groonga/dumper.rb +23 -1
  31. data/lib/groonga/patricia-trie.rb +1 -1
  32. data/lib/groonga/schema.rb +190 -205
  33. data/misc/grnop2ruby.rb +1 -1
  34. data/rroonga-build.rb +3 -3
  35. data/rroonga.gemspec +1 -0
  36. data/test/groonga-test-utils.rb +2 -2
  37. data/test/test-column.rb +19 -0
  38. data/test/test-context.rb +5 -1
  39. data/test/test-double-array-trie.rb +19 -0
  40. data/test/test-exception.rb +7 -2
  41. data/test/test-expression.rb +19 -0
  42. data/test/test-fix-size-column.rb +49 -36
  43. data/test/test-hash.rb +22 -0
  44. data/test/test-patricia-trie.rb +26 -7
  45. data/test/test-schema-dumper.rb +65 -1
  46. data/test/test-schema.rb +13 -2
  47. data/test/test-variable-size-column.rb +6 -5
  48. metadata +47 -46
@@ -1,7 +1,7 @@
1
1
  /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
2
  /*
3
3
  Copyright (C) 2014 Masafumi Yokoyama <myokoym@gmail.com>
4
- Copyright (C) 2009-2013 Kouhei Sutou <kou@clear-code.com>
4
+ Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
5
5
 
6
6
  This library is free software; you can redistribute it and/or
7
7
  modify it under the terms of the GNU Lesser General Public
@@ -144,6 +144,29 @@ rb_grn_table_key_support_inspect_content (VALUE self, VALUE inspected)
144
144
  }
145
145
  }
146
146
 
147
+ {
148
+ int i, n;
149
+ grn_obj token_filters;
150
+
151
+ rb_str_cat2(inspected, ", ");
152
+ rb_str_cat2(inspected, "token_filters: [");
153
+
154
+ GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, GRN_ID_NIL);
155
+ grn_obj_get_info(context, table,
156
+ GRN_INFO_TOKEN_FILTERS,
157
+ &token_filters);
158
+ n = GRN_BULK_VSIZE(&token_filters) / sizeof(grn_obj *);
159
+ for (i = 0; i < n; i++) {
160
+ grn_obj *token_filter = GRN_PTR_VALUE_AT(&token_filters, i);
161
+ if (i > 0) {
162
+ rb_str_cat2(inspected, ", ");
163
+ }
164
+ rb_grn_object_inspect_object_content_name(inspected, context,
165
+ token_filter);
166
+ }
167
+ rb_str_cat2(inspected, "]");
168
+ }
169
+
147
170
  {
148
171
  grn_obj *normalizer;
149
172
 
@@ -784,6 +807,69 @@ rb_grn_table_key_support_set_default_tokenizer (VALUE self, VALUE rb_tokenizer)
784
807
  return Qnil;
785
808
  }
786
809
 
810
+ /*
811
+ * Returns the token filters that are used by {Groonga::IndexColumn}.
812
+ *
813
+ * @overload token_filters
814
+ * @return [::Array<Groonga::Procedure>]
815
+ */
816
+ static VALUE
817
+ rb_grn_table_key_support_get_token_filters (VALUE self)
818
+ {
819
+ grn_ctx *context = NULL;
820
+ grn_obj *table;
821
+ grn_obj token_filters;
822
+ VALUE rb_token_filters;
823
+
824
+ rb_grn_table_key_support_deconstruct(SELF(self), &table, &context,
825
+ NULL, NULL, NULL,
826
+ NULL, NULL, NULL,
827
+ NULL);
828
+
829
+ GRN_PTR_INIT(&token_filters, GRN_VECTOR, GRN_ID_NIL);
830
+ grn_obj_get_info(context, table, GRN_INFO_TOKEN_FILTERS,
831
+ &token_filters);
832
+ rb_token_filters = GRNPVECTOR2RVAL(context, &token_filters);
833
+ rb_grn_context_check(context, self);
834
+
835
+ return rb_token_filters;
836
+ }
837
+
838
+ /*
839
+ * Sets token filters that used in {Groonga::IndexColumn}.
840
+ *
841
+ * @example
842
+ * # Use "TokenFilterStem" and "TokenfilterStopWord"
843
+ * table.token_filters = ["TokenFilterStem", "TokenFilterStopWord"]
844
+ *
845
+ * @overload token_filters=(token_filters)
846
+ * @param token_filters [::Array<String>] Token filter names.
847
+ */
848
+ static VALUE
849
+ rb_grn_table_key_support_set_token_filters (VALUE self,
850
+ VALUE rb_token_filters)
851
+ {
852
+ grn_ctx *context;
853
+ grn_obj *table;
854
+ grn_obj token_filters;
855
+ grn_rc rc;
856
+
857
+ rb_grn_table_key_support_deconstruct(SELF(self), &table, &context,
858
+ NULL, NULL, NULL,
859
+ NULL, NULL, NULL,
860
+ NULL);
861
+
862
+ GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, GRN_ID_NIL);
863
+ RVAL2GRNPVECTOR(rb_token_filters, context, &token_filters);
864
+ rc = grn_obj_set_info(context, table,
865
+ GRN_INFO_TOKEN_FILTERS, &token_filters);
866
+ grn_obj_unlink(context, &token_filters);
867
+ rb_grn_context_check(context, self);
868
+ rb_grn_rc_check(rc, self);
869
+
870
+ return Qnil;
871
+ }
872
+
787
873
  /*
788
874
  * Returns the normalizer that is used by {Groonga::IndexColumn}.
789
875
  *
@@ -976,6 +1062,11 @@ rb_grn_init_table_key_support (VALUE mGrn)
976
1062
  rb_define_method(rb_mGrnTableKeySupport, "default_tokenizer=",
977
1063
  rb_grn_table_key_support_set_default_tokenizer, 1);
978
1064
 
1065
+ rb_define_method(rb_mGrnTableKeySupport, "token_filters",
1066
+ rb_grn_table_key_support_get_token_filters, 0);
1067
+ rb_define_method(rb_mGrnTableKeySupport, "token_filters=",
1068
+ rb_grn_table_key_support_set_token_filters, 1);
1069
+
979
1070
  rb_define_method(rb_mGrnTableKeySupport, "normalizer",
980
1071
  rb_grn_table_key_support_get_normalizer, 0);
981
1072
  rb_define_method(rb_mGrnTableKeySupport, "normalizer=",
@@ -1,6 +1,7 @@
1
1
  /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
2
  /*
3
3
  Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
4
+ Copyright (C) 2014 Masafumi Yokoyama <myokoym@gmail.com>
4
5
 
5
6
  This library is free software; you can redistribute it and/or
6
7
  modify it under the terms of the GNU Lesser General Public
@@ -235,7 +236,7 @@ rb_grn_table_inspect (VALUE self)
235
236
  * 値の圧縮方法を指定する。省略した場合は、圧縮しない。
236
237
  *
237
238
  * - +:zlib+ := 値をzlib圧縮して格納する。
238
- * - +:lzo+ := 値をlzo圧縮して格納する。
239
+ * - +:lz4+ := 値をLZ4圧縮して格納する。
239
240
  *
240
241
  * @return [Groonga::FixSizeColumn or Groonga::VariableSizeColumn]
241
242
  */
@@ -313,11 +314,14 @@ rb_grn_table_define_column (int argc, VALUE *argv, VALUE self)
313
314
  } else if (rb_grn_equal_option(rb_compress, "zlib")) {
314
315
  flags |= GRN_OBJ_COMPRESS_ZLIB;
315
316
  } else if (rb_grn_equal_option(rb_compress, "lzo")) {
316
- flags |= GRN_OBJ_COMPRESS_LZO;
317
+ /* TODO: for backward compatibility */
318
+ flags |= GRN_OBJ_COMPRESS_LZ4;
319
+ } else if (rb_grn_equal_option(rb_compress, "lz4")) {
320
+ flags |= GRN_OBJ_COMPRESS_LZ4;
317
321
  } else {
318
322
  rb_raise(rb_eArgError,
319
323
  "invalid compress type: %s: "
320
- "available types: [:zlib, :lzo, nil]",
324
+ "available types: [:zlib, :lz4, nil]",
321
325
  rb_grn_inspect(rb_compress));
322
326
  }
323
327
 
@@ -924,6 +928,50 @@ rb_grn_table_truncate (VALUE self)
924
928
  return Qnil;
925
929
  }
926
930
 
931
+ typedef struct {
932
+ grn_ctx *context;
933
+ grn_table_cursor *cursor;
934
+ VALUE self;
935
+ } EachData;
936
+
937
+ static VALUE
938
+ rb_grn_table_each_body (VALUE user_data)
939
+ {
940
+ EachData *data = (EachData *)user_data;
941
+ grn_ctx *context = data->context;
942
+ grn_table_cursor *cursor = data->cursor;
943
+ VALUE self = data->self;
944
+ RbGrnObject *rb_grn_object;
945
+
946
+ rb_grn_object = RB_GRN_OBJECT(SELF(self));
947
+ while (GRN_TRUE) {
948
+ grn_id id;
949
+
950
+ if (!rb_grn_object->object) {
951
+ break;
952
+ }
953
+
954
+ id = grn_table_cursor_next(context, cursor);
955
+ if (id == GRN_ID_NIL) {
956
+ break;
957
+ }
958
+
959
+ rb_yield(rb_grn_record_new(self, id, Qnil));
960
+ }
961
+
962
+ return Qnil;
963
+ }
964
+
965
+ static VALUE
966
+ rb_grn_table_each_ensure (VALUE user_data)
967
+ {
968
+ EachData *data = (EachData *)user_data;
969
+
970
+ grn_table_cursor_close(data->context, data->cursor);
971
+
972
+ return Qnil;
973
+ }
974
+
927
975
  /*
928
976
  * テーブルに登録されているレコードを順番にブロックに渡す。
929
977
  *
@@ -940,24 +988,19 @@ rb_grn_table_truncate (VALUE self)
940
988
  static VALUE
941
989
  rb_grn_table_each (int argc, VALUE *argv, VALUE self)
942
990
  {
943
- RbGrnTable *rb_table;
944
- RbGrnObject *rb_grn_object;
945
- grn_ctx *context = NULL;
946
- grn_table_cursor *cursor;
947
- VALUE rb_cursor;
948
- grn_id id;
991
+ EachData data;
949
992
 
950
993
  RETURN_ENUMERATOR(self, argc, argv);
951
994
 
952
- cursor = rb_grn_table_open_grn_cursor(argc, argv, self, &context);
953
- rb_cursor = GRNTABLECURSOR2RVAL(Qnil, context, cursor);
954
- rb_table = SELF(self);
955
- rb_grn_object = RB_GRN_OBJECT(rb_table);
956
- while (rb_grn_object->object &&
957
- (id = grn_table_cursor_next(context, cursor)) != GRN_ID_NIL) {
958
- rb_yield(rb_grn_record_new(self, id, Qnil));
995
+ data.cursor = rb_grn_table_open_grn_cursor(argc, argv, self,
996
+ &(data.context));
997
+ if (!data.cursor) {
998
+ return Qnil;
959
999
  }
960
- rb_grn_object_close(rb_cursor);
1000
+
1001
+ data.self = self;
1002
+ rb_ensure(rb_grn_table_each_body, (VALUE)&data,
1003
+ rb_grn_table_each_ensure, (VALUE)&data);
961
1004
 
962
1005
  return Qnil;
963
1006
  }
@@ -624,14 +624,61 @@ rb_grn_bulk_from_ruby_object_with_type (VALUE object, grn_ctx *context,
624
624
  }
625
625
 
626
626
  if (!bulk) {
627
- bulk = grn_obj_open(context, GRN_BULK, flags, GRN_ID_NIL);
627
+ bulk = grn_obj_open(context, GRN_BULK, flags, type_id);
628
628
  rb_grn_context_check(context, object);
629
629
  }
630
+ if (bulk->header.domain != type_id) {
631
+ grn_obj_reinit(context, bulk, type_id, 0);
632
+ }
630
633
  GRN_TEXT_SET(context, bulk, string, size);
631
634
 
632
635
  return bulk;
633
636
  }
634
637
 
638
+ VALUE
639
+ rb_grn_pvector_to_ruby_object (grn_ctx *context, grn_obj *pvector)
640
+ {
641
+ VALUE array;
642
+ unsigned int i, n;
643
+
644
+ if (!pvector)
645
+ return Qnil;
646
+
647
+ n = GRN_BULK_VSIZE(pvector) / sizeof(grn_obj *);
648
+ array = rb_ary_new2(n);
649
+ for (i = 0; i < n; i++) {
650
+ grn_obj *object = GRN_PTR_VALUE_AT(pvector, i);
651
+
652
+ rb_ary_push(array, GRNOBJECT2RVAL(Qnil, context, object, GRN_FALSE));
653
+ }
654
+
655
+ return array;
656
+ }
657
+
658
+ grn_obj *
659
+ rb_grn_pvector_from_ruby_object (VALUE object,
660
+ grn_ctx *context,
661
+ grn_obj *pvector)
662
+ {
663
+ int i, n;
664
+ VALUE array;
665
+
666
+ if (NIL_P(object))
667
+ return pvector;
668
+
669
+ array = rb_grn_convert_to_array(object);
670
+
671
+ n = RARRAY_LEN(array);
672
+ for (i = 0; i < n; i++) {
673
+ VALUE rb_value = RARRAY_PTR(array)[i];
674
+ grn_obj *value;
675
+
676
+ value = RVAL2GRNOBJECT(rb_value, &context);
677
+ GRN_PTR_PUT(context, pvector, value);
678
+ }
679
+
680
+ return pvector;
681
+ }
635
682
 
636
683
  VALUE
637
684
  rb_grn_vector_to_ruby_object (grn_ctx *context, grn_obj *vector)
@@ -1172,7 +1219,9 @@ rb_grn_obj_to_ruby_object (VALUE klass, grn_ctx *context,
1172
1219
  break;
1173
1220
  /* case GRN_PTR: */
1174
1221
  /* case GRN_UVECTOR: */
1175
- /* case GRN_PVECTOR: */
1222
+ case GRN_PVECTOR:
1223
+ return GRNPVECTOR2RVAL(context, obj);
1224
+ break;
1176
1225
  case GRN_VECTOR:
1177
1226
  return GRNVECTOR2RVAL(context, obj);
1178
1227
  break;
@@ -1,6 +1,7 @@
1
1
  /* -*- coding: utf-8; mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
2
  /*
3
3
  Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
4
+ Copyright (C) 2014 Masafumi Yokoyama <myokoym@gmail.com>
4
5
 
5
6
  This library is free software; you can redistribute it and/or
6
7
  modify it under the terms of the GNU Lesser General Public
@@ -458,7 +459,7 @@ rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
458
459
  * @overload compressed?
459
460
  * @return [Boolean] whether the column is compressed or not.
460
461
  * @overload compressed?(type)
461
- * @param [:zlib, :lzo] type (nil)
462
+ * @param [:zlib, :lz4] type (nil)
462
463
  * @return [Boolean] whether specified compressed type is used or not.
463
464
  * @since 1.3.1
464
465
  */
@@ -473,7 +474,7 @@ rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
473
474
  grn_bool compressed_p = GRN_FALSE;
474
475
  grn_bool accept_any_type = GRN_FALSE;
475
476
  grn_bool need_zlib_check = GRN_FALSE;
476
- grn_bool need_lzo_check = GRN_FALSE;
477
+ grn_bool need_lz4_check = GRN_FALSE;
477
478
 
478
479
  rb_scan_args(argc, argv, "01", &type);
479
480
 
@@ -483,10 +484,13 @@ rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
483
484
  if (rb_grn_equal_option(type, "zlib")) {
484
485
  need_zlib_check = GRN_TRUE;
485
486
  } else if (rb_grn_equal_option(type, "lzo")) {
486
- need_lzo_check = GRN_TRUE;
487
+ /* TODO: for backward compatibility */
488
+ need_lz4_check = GRN_TRUE;
489
+ } else if (rb_grn_equal_option(type, "lz4")) {
490
+ need_lz4_check = GRN_TRUE;
487
491
  } else {
488
492
  rb_raise(rb_eArgError,
489
- "compressed type should be <:zlib> or <:lzo>: <%s>",
493
+ "compressed type should be <:zlib> or <:lz4>: <%s>",
490
494
  rb_grn_inspect(type));
491
495
  }
492
496
  }
@@ -506,11 +510,11 @@ rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
506
510
  compressed_p = GRN_BOOL_VALUE(&support_p);
507
511
  }
508
512
  break;
509
- case GRN_OBJ_COMPRESS_LZO:
510
- if (accept_any_type || need_lzo_check) {
513
+ case GRN_OBJ_COMPRESS_LZ4:
514
+ if (accept_any_type || need_lz4_check) {
511
515
  grn_obj support_p;
512
516
  GRN_BOOL_INIT(&support_p, 0);
513
- grn_obj_get_info(context, NULL, GRN_INFO_SUPPORT_LZO, &support_p);
517
+ grn_obj_get_info(context, NULL, GRN_INFO_SUPPORT_LZ4, &support_p);
514
518
  compressed_p = GRN_BOOL_VALUE(&support_p);
515
519
  }
516
520
  break;
@@ -92,7 +92,7 @@ RB_GRN_BEGIN_DECLS
92
92
 
93
93
  #define RB_GRN_MAJOR_VERSION 4
94
94
  #define RB_GRN_MINOR_VERSION 0
95
- #define RB_GRN_MICRO_VERSION 5
95
+ #define RB_GRN_MICRO_VERSION 6
96
96
 
97
97
  #define RB_GRN_QUERY_DEFAULT_MAX_EXPRESSIONS 32
98
98
 
@@ -665,6 +665,11 @@ VALUE rb_grn_column_expression_builder_build
665
665
  #define GRNBULK2RVAL(context, bulk, range, related_object) \
666
666
  (rb_grn_bulk_to_ruby_object(context, bulk, range, related_object))
667
667
 
668
+ #define RVAL2GRNPVECTOR(object, context, vector) \
669
+ (rb_grn_pvector_from_ruby_object(object, context, vector))
670
+ #define GRNPVECTOR2RVAL(context, vector) \
671
+ (rb_grn_pvector_to_ruby_object(context, vector))
672
+
668
673
  #define RVAL2GRNVECTOR(object, context, vector) \
669
674
  (rb_grn_vector_from_ruby_object(object, context, vector))
670
675
  #define GRNVECTOR2RVAL(context, vector) \
@@ -782,6 +787,11 @@ VALUE rb_grn_bulk_to_ruby_object (grn_ctx *context,
782
787
  grn_obj *bulk,
783
788
  grn_obj *range,
784
789
  VALUE related_object);
790
+ grn_obj *rb_grn_pvector_from_ruby_object (VALUE object,
791
+ grn_ctx *context,
792
+ grn_obj *pvector);
793
+ VALUE rb_grn_pvector_to_ruby_object (grn_ctx *context,
794
+ grn_obj *pvector);
785
795
  grn_obj *rb_grn_vector_from_ruby_object (VALUE object,
786
796
  grn_ctx *context,
787
797
  grn_obj *vector);
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  # Copyright (C) 2011-2014 Kouhei Sutou <kou@clear-code.com>
4
+ # Copyright (C) 2014 Masafumi Yokoyama <myokoym@gmail.com>
4
5
  #
5
6
  # This library is free software; you can redistribute it and/or
6
7
  # modify it under the terms of the GNU Lesser General Public
@@ -419,6 +420,14 @@ module Groonga
419
420
  if default_tokenizer
420
421
  parameters << ":default_tokenizer => #{default_tokenizer.name.dump}"
421
422
  end
423
+ token_filters = table.token_filters
424
+ unless token_filters.empty?
425
+ dumped_token_filter_names = token_filters.collect do |token_filter|
426
+ token_filter.name.dump
427
+ end
428
+ dumped_token_filters = "[#{dumped_token_filter_names.join(', ')}]"
429
+ parameters << ":token_filters => #{dumped_token_filters}"
430
+ end
422
431
  if _normalizer_name
423
432
  parameters << ":normalizer => #{_normalizer_name.dump}"
424
433
  end
@@ -575,6 +584,13 @@ module Groonga
575
584
  if default_tokenizer
576
585
  parameters << "--default_tokenizer #{default_tokenizer.name}"
577
586
  end
587
+ token_filters = table.token_filters
588
+ unless token_filters.empty?
589
+ token_filter_names = token_filters.collect do |token_filter|
590
+ token_filter.name
591
+ end
592
+ parameters << "--token_filters #{token_filter_names.join(',')}"
593
+ end
578
594
  end
579
595
  if _normalizer_name
580
596
  parameters << "--normalizer #{_normalizer_name}"
@@ -602,7 +618,13 @@ module Groonga
602
618
  flags << "COLUMN_VECTOR"
603
619
  end
604
620
  flags << "WITH_WEIGHT" if column.with_weight?
605
- # TODO: support COMPRESS_ZLIB and COMPRESS_LZO?
621
+ if column.is_a?(Groonga::VariableSizeColumn)
622
+ if column.compressed?(:zlib)
623
+ flags << "COMPRESS_ZLIB"
624
+ elsif column.compressed?(:lz4)
625
+ flags << "COMPRESS_LZ4"
626
+ end
627
+ end
606
628
  parameters << "#{flags.join('|')}"
607
629
  parameters << "#{column.range.name}"
608
630
  write("column_create #{parameters.join(' ')}\n")