rroonga 2.0.8 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +2 -2
- data/bin/groonga-index-dump +47 -0
- data/doc/text/news.textile +733 -0
- data/doc/text/tutorial.textile +535 -0
- data/example/bookmark.rb +1 -1
- data/ext/groonga/rb-grn-database.c +21 -24
- data/ext/groonga/rb-grn-double-array-trie.c +50 -58
- data/ext/groonga/rb-grn-exception.c +18 -1
- data/ext/groonga/rb-grn-hash.c +18 -3
- data/ext/groonga/rb-grn-index-column.c +50 -2
- data/ext/groonga/rb-grn-normalizer.c +83 -0
- data/ext/groonga/rb-grn-object.c +18 -14
- data/ext/groonga/rb-grn-patricia-trie.c +17 -2
- data/ext/groonga/rb-grn-query-logger.c +263 -0
- data/ext/groonga/rb-grn-snippet.c +6 -0
- data/ext/groonga/rb-grn-table-key-support.c +204 -13
- data/ext/groonga/rb-grn-table.c +124 -46
- data/ext/groonga/rb-grn.h +14 -3
- data/ext/groonga/rb-groonga.c +2 -0
- data/lib/groonga/database.rb +7 -0
- data/lib/groonga/dumper.rb +21 -2
- data/lib/groonga/index-column.rb +170 -0
- data/lib/groonga/query-logger.rb +129 -0
- data/lib/groonga/record.rb +32 -8
- data/lib/groonga/schema.rb +231 -288
- data/lib/groonga.rb +2 -1
- data/rroonga-build.rb +2 -2
- data/rroonga.gemspec +11 -7
- data/test/groonga-test-utils.rb +18 -6
- data/test/test-hash.rb +49 -20
- data/test/test-index-cursor.rb +4 -4
- data/{Gemfile → test/test-normalizer.rb} +9 -5
- data/test/test-pagination.rb +1 -1
- data/test/test-patricia-trie.rb +8 -0
- data/test/test-schema.rb +16 -13
- data/test/test-snippet.rb +5 -0
- data/test/test-table.rb +24 -12
- data/test/test-view.rb +0 -1
- metadata +154 -136
- data/AUTHORS +0 -5
- data/Rakefile +0 -203
- data/bin/groonga-query-log-extract +0 -117
data/ext/groonga/rb-grn-table.c
CHANGED
@@ -143,7 +143,7 @@ rb_grn_table_initialize (VALUE self)
|
|
143
143
|
return Qnil;
|
144
144
|
}
|
145
145
|
|
146
|
-
|
146
|
+
VALUE
|
147
147
|
rb_grn_table_inspect_content (VALUE self, VALUE inspected)
|
148
148
|
{
|
149
149
|
RbGrnTable *rb_grn_table;
|
@@ -165,25 +165,6 @@ rb_grn_table_inspect_content (VALUE self, VALUE inspected)
|
|
165
165
|
if (!context)
|
166
166
|
return inspected;
|
167
167
|
|
168
|
-
if (table->header.type != GRN_TABLE_NO_KEY) {
|
169
|
-
grn_obj value;
|
170
|
-
grn_encoding encoding;
|
171
|
-
|
172
|
-
rb_str_cat2(inspected, ", ");
|
173
|
-
rb_str_cat2(inspected, "encoding: <");
|
174
|
-
GRN_OBJ_INIT(&value, GRN_BULK, 0, GRN_ID_NIL);
|
175
|
-
grn_obj_get_info(context, table, GRN_INFO_ENCODING, &value);
|
176
|
-
encoding = *((grn_encoding *)GRN_BULK_HEAD(&value));
|
177
|
-
grn_obj_unlink(context, &value);
|
178
|
-
|
179
|
-
if (context->rc == GRN_SUCCESS)
|
180
|
-
rb_str_concat(inspected, rb_inspect(GRNENCODING2RVAL(encoding)));
|
181
|
-
else
|
182
|
-
rb_str_cat2(inspected, "invalid");
|
183
|
-
|
184
|
-
rb_str_cat2(inspected, ">");
|
185
|
-
}
|
186
|
-
|
187
168
|
rb_str_cat2(inspected, ", ");
|
188
169
|
rb_str_cat2(inspected, "size: <");
|
189
170
|
{
|
@@ -949,7 +930,6 @@ rb_grn_table_each (int argc, VALUE *argv, VALUE self)
|
|
949
930
|
RbGrnTable *rb_table;
|
950
931
|
RbGrnObject *rb_grn_object;
|
951
932
|
grn_ctx *context = NULL;
|
952
|
-
grn_obj *table;
|
953
933
|
grn_table_cursor *cursor;
|
954
934
|
VALUE rb_cursor;
|
955
935
|
grn_id id;
|
@@ -969,13 +949,8 @@ rb_grn_table_each (int argc, VALUE *argv, VALUE self)
|
|
969
949
|
return Qnil;
|
970
950
|
}
|
971
951
|
|
972
|
-
/*
|
973
|
-
* テーブルの _id_ に対応するレコードを削除する。
|
974
|
-
*
|
975
|
-
* @overload delete(id)
|
976
|
-
*/
|
977
952
|
VALUE
|
978
|
-
|
953
|
+
rb_grn_table_delete_by_id (VALUE self, VALUE rb_id)
|
979
954
|
{
|
980
955
|
grn_ctx *context = NULL;
|
981
956
|
grn_obj *table;
|
@@ -994,6 +969,97 @@ rb_grn_table_delete (VALUE self, VALUE rb_id)
|
|
994
969
|
return Qnil;
|
995
970
|
}
|
996
971
|
|
972
|
+
VALUE
|
973
|
+
rb_grn_table_delete_by_expression (VALUE self)
|
974
|
+
{
|
975
|
+
grn_ctx *context = NULL;
|
976
|
+
grn_obj *table;
|
977
|
+
VALUE rb_builder, rb_expression;
|
978
|
+
grn_obj *needless_records, *expression;
|
979
|
+
grn_operator operator = GRN_OP_OR;
|
980
|
+
grn_table_cursor *cursor;
|
981
|
+
|
982
|
+
rb_grn_table_deconstruct(SELF(self), &table, &context,
|
983
|
+
NULL, NULL,
|
984
|
+
NULL, NULL, NULL,
|
985
|
+
NULL);
|
986
|
+
|
987
|
+
rb_builder = rb_grn_record_expression_builder_new(self, Qnil);
|
988
|
+
rb_expression = rb_grn_record_expression_builder_build(rb_builder);
|
989
|
+
rb_grn_object_deconstruct(RB_GRN_OBJECT(DATA_PTR(rb_expression)),
|
990
|
+
&expression, NULL,
|
991
|
+
NULL, NULL, NULL, NULL);
|
992
|
+
|
993
|
+
needless_records =
|
994
|
+
grn_table_create(context, NULL, 0, NULL,
|
995
|
+
GRN_TABLE_HASH_KEY | GRN_OBJ_WITH_SUBREC,
|
996
|
+
table,
|
997
|
+
NULL);
|
998
|
+
if (!needless_records) {
|
999
|
+
rb_grn_context_check(context, self);
|
1000
|
+
rb_grn_rc_check(GRN_NO_MEMORY_AVAILABLE, self);
|
1001
|
+
}
|
1002
|
+
|
1003
|
+
grn_table_select(context, table, expression, needless_records, operator);
|
1004
|
+
cursor = grn_table_cursor_open(context, needless_records,
|
1005
|
+
NULL, 0,
|
1006
|
+
NULL, 0,
|
1007
|
+
0, -1, 0);
|
1008
|
+
if (cursor) {
|
1009
|
+
while (grn_table_cursor_next(context, cursor)) {
|
1010
|
+
grn_id *id;
|
1011
|
+
grn_table_cursor_get_key(context, cursor, (void **)&id);
|
1012
|
+
grn_table_delete_by_id(context, table, *id);
|
1013
|
+
}
|
1014
|
+
grn_table_cursor_close(context, cursor);
|
1015
|
+
}
|
1016
|
+
grn_obj_unlink(context, needless_records);
|
1017
|
+
|
1018
|
+
return Qnil;
|
1019
|
+
}
|
1020
|
+
|
1021
|
+
/*
|
1022
|
+
* @overload delete(id)
|
1023
|
+
* Delete a record that has ID @id@.
|
1024
|
+
*
|
1025
|
+
* @param id [Integer] The ID of delete target record.
|
1026
|
+
*
|
1027
|
+
* @return void
|
1028
|
+
*
|
1029
|
+
* @overload delete
|
1030
|
+
* Delete records that are matched with the given condition
|
1031
|
+
* specified block.
|
1032
|
+
*
|
1033
|
+
* @example Delete users that are younger than 20.
|
1034
|
+
* users.delete do |recod|
|
1035
|
+
* record.age < 20
|
1036
|
+
* end
|
1037
|
+
*
|
1038
|
+
* @yield [record]
|
1039
|
+
* TODO: See #select.
|
1040
|
+
* @yieldparam [Groonga::RecodExpressionBuilder] record
|
1041
|
+
* TODO: See #select.
|
1042
|
+
* @yieldreturn [Groonga::ExpressionBuilder]
|
1043
|
+
* TODO: See #select.
|
1044
|
+
*
|
1045
|
+
* @return void
|
1046
|
+
*/
|
1047
|
+
static VALUE
|
1048
|
+
rb_grn_table_delete (int argc, VALUE *argv, VALUE self)
|
1049
|
+
{
|
1050
|
+
VALUE rb_id;
|
1051
|
+
|
1052
|
+
rb_scan_args(argc, argv, "01", &rb_id);
|
1053
|
+
|
1054
|
+
if (rb_block_given_p()) {
|
1055
|
+
rb_grn_table_delete_by_expression(self);
|
1056
|
+
} else {
|
1057
|
+
rb_grn_table_delete_by_id(self, rb_id);
|
1058
|
+
}
|
1059
|
+
|
1060
|
+
return Qnil;
|
1061
|
+
}
|
1062
|
+
|
997
1063
|
/*
|
998
1064
|
* テーブルに登録されているレコードを _keys_ で指定されたルー
|
999
1065
|
* ルに従ってソートしたレコードの配列を返す。
|
@@ -1047,7 +1113,35 @@ rb_grn_table_delete (VALUE self, VALUE rb_id)
|
|
1047
1113
|
* ソートされたレコードのうち、 _:limit_ 件のみを取り出す。
|
1048
1114
|
* 省略された場合または-1が指定された場合は、全件が指定され
|
1049
1115
|
* たものとみなす。
|
1050
|
-
*
|
1116
|
+
*
|
1117
|
+
* @return [Groonga::Array] The sorted result. You can get the
|
1118
|
+
* original record by {#value} method of a record in the sorted
|
1119
|
+
* result. Normally, you doesn't need to get the original record
|
1120
|
+
* because you can access via column name method:
|
1121
|
+
*
|
1122
|
+
* <pre>
|
1123
|
+
* !!!ruby
|
1124
|
+
* names_recommended_access = sorted_users.collect do |sorted_user|
|
1125
|
+
* sorted_user.name
|
1126
|
+
* end
|
1127
|
+
* names_manually_access = sorted_users.collect do |sorted_user|
|
1128
|
+
* sorted_user.value.name
|
1129
|
+
* end
|
1130
|
+
* names_recommended_access == names_manually_access # => true
|
1131
|
+
* </pre>
|
1132
|
+
*
|
1133
|
+
* If you want to access the key of the original record, you need to
|
1134
|
+
* get the original record.
|
1135
|
+
*
|
1136
|
+
* @note The return value is changed to {Groonga::Array} from
|
1137
|
+
* {::Array} since 2.1.0. If you want to get before 2.1.0 style
|
1138
|
+
* result, use the following code:
|
1139
|
+
*
|
1140
|
+
* @example Describe incompatible API change
|
1141
|
+
* result_since_2_1_0 = table.sort(["sort_key"])
|
1142
|
+
* result_before_2_1_0 = result_since_2_1_0.collect do |record|
|
1143
|
+
* record.value
|
1144
|
+
* end
|
1051
1145
|
*/
|
1052
1146
|
static VALUE
|
1053
1147
|
rb_grn_table_sort (int argc, VALUE *argv, VALUE self)
|
@@ -1061,8 +1155,6 @@ rb_grn_table_sort (int argc, VALUE *argv, VALUE self)
|
|
1061
1155
|
VALUE rb_keys, options;
|
1062
1156
|
VALUE rb_offset, rb_limit;
|
1063
1157
|
VALUE *rb_sort_keys;
|
1064
|
-
grn_table_cursor *cursor;
|
1065
|
-
VALUE rb_result;
|
1066
1158
|
VALUE exception;
|
1067
1159
|
|
1068
1160
|
rb_grn_table_deconstruct(SELF(self), &table, &context,
|
@@ -1150,21 +1242,7 @@ rb_grn_table_sort (int argc, VALUE *argv, VALUE self)
|
|
1150
1242
|
rb_exc_raise(exception);
|
1151
1243
|
}
|
1152
1244
|
|
1153
|
-
|
1154
|
-
cursor = grn_table_cursor_open(context, result, NULL, 0, NULL, 0,
|
1155
|
-
0, -1, GRN_CURSOR_ASCENDING);
|
1156
|
-
while (grn_table_cursor_next(context, cursor) != GRN_ID_NIL) {
|
1157
|
-
void *value;
|
1158
|
-
grn_id *id;
|
1159
|
-
|
1160
|
-
grn_table_cursor_get_value(context, cursor, &value);
|
1161
|
-
id = value;
|
1162
|
-
rb_ary_push(rb_result, rb_grn_record_new(self, *id, Qnil));
|
1163
|
-
}
|
1164
|
-
grn_table_cursor_close(context, cursor);
|
1165
|
-
grn_obj_unlink(context, result);
|
1166
|
-
|
1167
|
-
return rb_result;
|
1245
|
+
return GRNOBJECT2RVAL(Qnil, context, result, GRN_TRUE);
|
1168
1246
|
}
|
1169
1247
|
|
1170
1248
|
/*
|
@@ -2149,7 +2227,7 @@ rb_grn_init_table (VALUE mGrn)
|
|
2149
2227
|
|
2150
2228
|
rb_define_method(rb_cGrnTable, "each", rb_grn_table_each, -1);
|
2151
2229
|
|
2152
|
-
rb_define_method(rb_cGrnTable, "delete", rb_grn_table_delete, 1);
|
2230
|
+
rb_define_method(rb_cGrnTable, "delete", rb_grn_table_delete, -1);
|
2153
2231
|
|
2154
2232
|
rb_define_method(rb_cGrnTable, "sort", rb_grn_table_sort, -1);
|
2155
2233
|
rb_define_method(rb_cGrnTable, "group", rb_grn_table_group, -1);
|
data/ext/groonga/rb-grn.h
CHANGED
@@ -75,8 +75,8 @@ RB_GRN_BEGIN_DECLS
|
|
75
75
|
#endif
|
76
76
|
|
77
77
|
#define RB_GRN_MAJOR_VERSION 2
|
78
|
-
#define RB_GRN_MINOR_VERSION
|
79
|
-
#define RB_GRN_MICRO_VERSION
|
78
|
+
#define RB_GRN_MINOR_VERSION 1
|
79
|
+
#define RB_GRN_MICRO_VERSION 0
|
80
80
|
|
81
81
|
#define RB_GRN_QUERY_DEFAULT_MAX_EXPRESSIONS 32
|
82
82
|
|
@@ -252,6 +252,7 @@ RB_GRN_VAR VALUE rb_cGrnExpression;
|
|
252
252
|
RB_GRN_VAR VALUE rb_cGrnRecordExpressionBuilder;
|
253
253
|
RB_GRN_VAR VALUE rb_cGrnColumnExpressionBuilder;
|
254
254
|
RB_GRN_VAR VALUE rb_cGrnPlugin;
|
255
|
+
RB_GRN_VAR VALUE rb_cGrnNormalizer;
|
255
256
|
|
256
257
|
void rb_grn_init_utils (VALUE mGrn);
|
257
258
|
void rb_grn_init_exception (VALUE mGrn);
|
@@ -292,8 +293,10 @@ void rb_grn_init_operator (VALUE mGrn);
|
|
292
293
|
void rb_grn_init_expression (VALUE mGrn);
|
293
294
|
void rb_grn_init_expression_builder (VALUE mGrn);
|
294
295
|
void rb_grn_init_logger (VALUE mGrn);
|
296
|
+
void rb_grn_init_query_logger (VALUE mGrn);
|
295
297
|
void rb_grn_init_snippet (VALUE mGrn);
|
296
298
|
void rb_grn_init_plugin (VALUE mGrn);
|
299
|
+
void rb_grn_init_normalizer (VALUE mGrn);
|
297
300
|
|
298
301
|
VALUE rb_grn_rc_to_exception (grn_rc rc);
|
299
302
|
const char *rb_grn_rc_to_message (grn_rc rc);
|
@@ -360,6 +363,10 @@ VALUE rb_grn_object_inspect_object (VALUE inspected,
|
|
360
363
|
VALUE rb_grn_object_inspect_object_content (VALUE inspected,
|
361
364
|
grn_ctx *context,
|
362
365
|
grn_obj *object);
|
366
|
+
VALUE rb_grn_object_inspect_object_content_name
|
367
|
+
(VALUE inspected,
|
368
|
+
grn_ctx *context,
|
369
|
+
grn_obj *object);
|
363
370
|
VALUE rb_grn_object_inspect_header (VALUE object,
|
364
371
|
VALUE inspected);
|
365
372
|
VALUE rb_grn_object_inspect_content (VALUE object,
|
@@ -398,8 +405,9 @@ void rb_grn_table_deconstruct (RbGrnTable *rb_grn_table,
|
|
398
405
|
grn_obj **range,
|
399
406
|
VALUE *columns);
|
400
407
|
|
401
|
-
VALUE
|
408
|
+
VALUE rb_grn_table_delete_by_id (VALUE self,
|
402
409
|
VALUE rb_id);
|
410
|
+
VALUE rb_grn_table_delete_by_expression (VALUE self);
|
403
411
|
VALUE rb_grn_table_array_reference (VALUE self,
|
404
412
|
VALUE rb_id);
|
405
413
|
VALUE rb_grn_table_array_set (VALUE self,
|
@@ -428,6 +436,8 @@ VALUE rb_grn_table_set_column_value (VALUE self,
|
|
428
436
|
VALUE rb_id,
|
429
437
|
VALUE rb_name,
|
430
438
|
VALUE rb_value);
|
439
|
+
VALUE rb_grn_table_inspect_content (VALUE object,
|
440
|
+
VALUE inspected);
|
431
441
|
|
432
442
|
grn_ctx *rb_grn_table_cursor_ensure_context (VALUE cursor,
|
433
443
|
VALUE *rb_context);
|
@@ -596,6 +606,7 @@ VALUE rb_grn_column_expression_builder_build
|
|
596
606
|
#define RVAL2GRNOPERATOR(object) (rb_grn_operator_from_ruby_object(object))
|
597
607
|
|
598
608
|
#define RVAL2GRNLOGGER(object) (rb_grn_logger_from_ruby_object(object))
|
609
|
+
#define RVAL2GRNQUERYLOGGER(object) (rb_grn_query_logger_from_ruby_object(object))
|
599
610
|
|
600
611
|
#define RVAL2GRNBULK(object, context, bulk) \
|
601
612
|
(rb_grn_bulk_from_ruby_object(object, context, bulk))
|
data/ext/groonga/rb-groonga.c
CHANGED
@@ -144,6 +144,8 @@ Init_groonga (void)
|
|
144
144
|
rb_grn_init_expression(mGrn);
|
145
145
|
rb_grn_init_expression_builder(mGrn);
|
146
146
|
rb_grn_init_logger(mGrn);
|
147
|
+
rb_grn_init_query_logger(mGrn);
|
147
148
|
rb_grn_init_snippet(mGrn);
|
148
149
|
rb_grn_init_plugin(mGrn);
|
150
|
+
rb_grn_init_normalizer(mGrn);
|
149
151
|
}
|
data/lib/groonga/database.rb
CHANGED
data/lib/groonga/dumper.rb
CHANGED
@@ -340,6 +340,17 @@ module Groonga
|
|
340
340
|
change_table_footer(table)
|
341
341
|
@table_defined = true
|
342
342
|
end
|
343
|
+
|
344
|
+
def normalizer_name(table)
|
345
|
+
return nil unless table.domain
|
346
|
+
normalizer = table.normalizer
|
347
|
+
return nil if normalizer.nil?
|
348
|
+
normalizer.name
|
349
|
+
end
|
350
|
+
|
351
|
+
def default_normalizer?(normalizer_name)
|
352
|
+
normalizer_name == "NormalizerAuto"
|
353
|
+
end
|
343
354
|
end
|
344
355
|
|
345
356
|
# @private
|
@@ -347,6 +358,7 @@ module Groonga
|
|
347
358
|
private
|
348
359
|
def create_table_header(table)
|
349
360
|
parameters = []
|
361
|
+
_normalizer_name = normalizer_name(table)
|
350
362
|
unless table.is_a?(Groonga::Array)
|
351
363
|
case table
|
352
364
|
when Groonga::Hash
|
@@ -356,7 +368,7 @@ module Groonga
|
|
356
368
|
end
|
357
369
|
if table.domain
|
358
370
|
parameters << ":key_type => #{table.domain.name.dump}"
|
359
|
-
if
|
371
|
+
if default_normalizer?(_normalizer_name)
|
360
372
|
parameters << ":key_normalize => true"
|
361
373
|
end
|
362
374
|
end
|
@@ -364,6 +376,9 @@ module Groonga
|
|
364
376
|
if default_tokenizer
|
365
377
|
parameters << ":default_tokenizer => #{default_tokenizer.name.dump}"
|
366
378
|
end
|
379
|
+
if _normalizer_name and not default_normalizer?(_normalizer_name)
|
380
|
+
parameters << ":normalizer => #{_normalizer_name.dump}"
|
381
|
+
end
|
367
382
|
end
|
368
383
|
parameters << ":force => true"
|
369
384
|
parameters.unshift("")
|
@@ -475,6 +490,7 @@ module Groonga
|
|
475
490
|
def create_table_header(table)
|
476
491
|
parameters = []
|
477
492
|
flags = []
|
493
|
+
_normalizer_name = normalizer_name(table)
|
478
494
|
case table
|
479
495
|
when Groonga::Array
|
480
496
|
flags << "TABLE_NO_KEY"
|
@@ -484,7 +500,7 @@ module Groonga
|
|
484
500
|
flags << "TABLE_PAT_KEY"
|
485
501
|
end
|
486
502
|
if table.domain
|
487
|
-
flags << "KEY_NORMALIZE" if
|
503
|
+
flags << "KEY_NORMALIZE" if default_normalizer?(_normalizer_name)
|
488
504
|
if table.is_a?(Groonga::PatriciaTrie) and table.register_key_with_sis?
|
489
505
|
flags << "KEY_WITH_SIS"
|
490
506
|
end
|
@@ -502,6 +518,9 @@ module Groonga
|
|
502
518
|
parameters << "--default_tokenizer #{default_tokenizer.name}"
|
503
519
|
end
|
504
520
|
end
|
521
|
+
if _normalizer_name and not default_normalizer?(_normalizer_name)
|
522
|
+
parameters << "--normalizer #{_normalizer_name}"
|
523
|
+
end
|
505
524
|
write("table_create #{table.name} #{parameters.join(' ')}\n")
|
506
525
|
end
|
507
526
|
|
@@ -0,0 +1,170 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2012 Kouhei Sutou <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License version 2.1 as published by the Free Software Foundation.
|
8
|
+
#
|
9
|
+
# This library is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# Lesser General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Lesser General Public
|
15
|
+
# License along with this library; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
17
|
+
|
18
|
+
require "fileutils"
|
19
|
+
require "cgi"
|
20
|
+
|
21
|
+
module Groonga
|
22
|
+
class IndexColumn
|
23
|
+
def dump(output_directory)
|
24
|
+
dumper = IndexColumnDumper.new(self, output_directory)
|
25
|
+
dumper.dump
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class IndexColumnDumper
|
30
|
+
def initialize(column, output_directory)
|
31
|
+
@column = column
|
32
|
+
@output_directory = output_directory
|
33
|
+
@sources = @column.sources
|
34
|
+
end
|
35
|
+
|
36
|
+
def dump
|
37
|
+
dump_indexes
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def dump_indexes
|
42
|
+
@column.table.open_cursor do |table_cursor|
|
43
|
+
@column.open_cursor(table_cursor) do |cursor|
|
44
|
+
postings = []
|
45
|
+
cursor.each do |posting|
|
46
|
+
if postings.empty?
|
47
|
+
postings << posting
|
48
|
+
next
|
49
|
+
end
|
50
|
+
|
51
|
+
current_term_posting = postings.first
|
52
|
+
unless same_term_posting?(current_term_posting, posting)
|
53
|
+
dump_postings(postings)
|
54
|
+
postings.clear
|
55
|
+
end
|
56
|
+
|
57
|
+
postings << posting
|
58
|
+
end
|
59
|
+
dump_postings(postings)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def same_term_posting?(posting1, posting2)
|
65
|
+
posting1.term_id == posting2.term_id
|
66
|
+
end
|
67
|
+
|
68
|
+
def dump_file_info(posting)
|
69
|
+
items = [
|
70
|
+
"index: #{@column.name}",
|
71
|
+
"term: <#{posting.term.key}>",
|
72
|
+
"domain: #{@column.domain.name}",
|
73
|
+
"range: #{@column.range.name}",
|
74
|
+
"have_section: #{@column.with_section?}",
|
75
|
+
"have_weight: #{@column.with_weight?}",
|
76
|
+
"have_position: #{@column.with_position?}",
|
77
|
+
]
|
78
|
+
info = items.join("\t")
|
79
|
+
@output.write("#{info}\n")
|
80
|
+
end
|
81
|
+
|
82
|
+
def dump_posting_header
|
83
|
+
header_items = [
|
84
|
+
"weight",
|
85
|
+
"position",
|
86
|
+
"term_frequency",
|
87
|
+
"record",
|
88
|
+
]
|
89
|
+
header = header_items.join("\t")
|
90
|
+
@output.write(" #{header}\n")
|
91
|
+
end
|
92
|
+
|
93
|
+
def encode_term(term)
|
94
|
+
CGI.escape(term.to_s)
|
95
|
+
end
|
96
|
+
|
97
|
+
def dump_postings(postings)
|
98
|
+
return if postings.empty?
|
99
|
+
|
100
|
+
distinctive_posting = postings.first
|
101
|
+
term = distinctive_posting.term.key
|
102
|
+
encoded_term = encode_term(term)
|
103
|
+
output_dir = File.join(@output_directory, @column.name)
|
104
|
+
output_path = File.join(output_dir, "#{encoded_term}.dump")
|
105
|
+
FileUtils.mkdir_p(output_dir)
|
106
|
+
File.open(output_path, "w") do |output|
|
107
|
+
@output = output
|
108
|
+
dump_file_info(distinctive_posting)
|
109
|
+
dump_posting_header
|
110
|
+
sorted_postings = postings.sort_by do |posting|
|
111
|
+
[source_column_name(posting), record_key(posting), posting.position]
|
112
|
+
end
|
113
|
+
sorted_postings.each do |posting|
|
114
|
+
dump_posting(posting)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def dump_posting(posting)
|
120
|
+
found_record = "#{posting.table.name}[#{posting.record.record_id}]"
|
121
|
+
posting_info_items = [
|
122
|
+
"#{posting.weight}",
|
123
|
+
"#{posting.position}",
|
124
|
+
"#{posting.term_frequency}",
|
125
|
+
"#{found_record}.#{source_column_name(posting)}",
|
126
|
+
]
|
127
|
+
posting_info = posting_info_items.join("\t")
|
128
|
+
@output.write(" #{posting_info}\n")
|
129
|
+
end
|
130
|
+
|
131
|
+
def term(posting)
|
132
|
+
posting.term.key
|
133
|
+
end
|
134
|
+
|
135
|
+
def record_key(posting)
|
136
|
+
posting.record.key || default_key(posting)
|
137
|
+
end
|
138
|
+
|
139
|
+
def default_key(posting)
|
140
|
+
type = posting.table.domain
|
141
|
+
return 0 if type.is_a?(Groonga::Table)
|
142
|
+
|
143
|
+
case type.name
|
144
|
+
when "ShortText", "Text", "LongText"
|
145
|
+
""
|
146
|
+
when "TokyoGeoPoint"
|
147
|
+
Groonga::TokyoGeoPoint.new(0, 0)
|
148
|
+
when "WGS84GeoPoint"
|
149
|
+
Groonga::WGS84GeoPoint.new(0, 0)
|
150
|
+
when "Bool"
|
151
|
+
true
|
152
|
+
when "Time"
|
153
|
+
Time.at(0)
|
154
|
+
else
|
155
|
+
0
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def source_column_name(posting)
|
160
|
+
source = @sources[posting.section_id - 1]
|
161
|
+
if source.nil?
|
162
|
+
"<invalid section: #{posting.section_id}>"
|
163
|
+
elsif source.is_a?(Groonga::Table)
|
164
|
+
"_key"
|
165
|
+
else
|
166
|
+
source.local_name
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2012 Kouhei Sutou <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License version 2.1 as published by the Free Software Foundation.
|
8
|
+
#
|
9
|
+
# This library is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# Lesser General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Lesser General Public
|
15
|
+
# License along with this library; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
17
|
+
|
18
|
+
module Groonga
|
19
|
+
class QueryLogger
|
20
|
+
module Flags
|
21
|
+
LABELS = {
|
22
|
+
COMMAND => "command",
|
23
|
+
RESULT_CODE => "result_code",
|
24
|
+
DESTINATION => "destination",
|
25
|
+
CACHE => "cache",
|
26
|
+
SIZE => "size",
|
27
|
+
SCORE => "score",
|
28
|
+
}
|
29
|
+
|
30
|
+
class << self
|
31
|
+
def parse(input, base_flags)
|
32
|
+
# TODO
|
33
|
+
base_flags
|
34
|
+
end
|
35
|
+
|
36
|
+
def label(flags)
|
37
|
+
labels = []
|
38
|
+
LABELS.each do |flag, label|
|
39
|
+
flags << label if (flags & flag) == flag
|
40
|
+
end
|
41
|
+
labels << "none" if labels.empty?
|
42
|
+
labels.join("|")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def log(flag, timestamp, info, message)
|
48
|
+
guard do
|
49
|
+
puts("#{timestamp}|#{info}#{message}")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def reopen
|
54
|
+
end
|
55
|
+
|
56
|
+
def fin
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
def guard
|
61
|
+
begin
|
62
|
+
yield
|
63
|
+
rescue Exception
|
64
|
+
$stderr.puts("#{$!.class}: #{$!.message}")
|
65
|
+
$stderr.puts($@)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class FileQueryLogger < QueryLogger
|
71
|
+
def initialize(file_name)
|
72
|
+
super()
|
73
|
+
@file = nil
|
74
|
+
@file_name = file_name
|
75
|
+
end
|
76
|
+
|
77
|
+
def reopen
|
78
|
+
guard do
|
79
|
+
return unless @file
|
80
|
+
@file.close
|
81
|
+
@file = nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def fin
|
86
|
+
guard do
|
87
|
+
return unless @file
|
88
|
+
@file.close
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
def ensure_open
|
94
|
+
return if @file
|
95
|
+
@file = File.open(@file_name, "ab")
|
96
|
+
end
|
97
|
+
|
98
|
+
def puts(*arguments)
|
99
|
+
ensure_open
|
100
|
+
@file.puts(*arguments)
|
101
|
+
@file.flush
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class CallbackQueryLogger < QueryLogger
|
106
|
+
def initialize(callback)
|
107
|
+
super()
|
108
|
+
@callback = callback
|
109
|
+
end
|
110
|
+
|
111
|
+
def log(flag, timestamp, info, message)
|
112
|
+
guard do
|
113
|
+
@callback.call(:log, flag, timestamp, info, message)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def reopen
|
118
|
+
guard do
|
119
|
+
@callback.call(:reopen)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def fin
|
124
|
+
guard do
|
125
|
+
@callback.call(:fin)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|