fluent-plugin-groonga 1.1.8 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/configuration.md +23 -20
- data/doc/text/constitution.md +19 -23
- data/doc/text/news.md +9 -3
- data/fluent-plugin-groonga.gemspec +3 -4
- data/lib/fluent/plugin/in_groonga.rb +357 -370
- data/lib/fluent/plugin/out_groonga.rb +599 -584
- data/sample/command.conf +5 -4
- data/sample/gqtp.conf +5 -4
- data/sample/http.conf +5 -4
- data/sample/store-apache.conf +5 -3
- data/sample/store-syslog.conf +5 -3
- data/sample/store.conf +5 -3
- data/test/output/test_table_definition.rb +1 -1
- data/test/output/test_table_index_definition.rb +2 -2
- data/test/output/test_type_guesser.rb +1 -1
- data/test/run-test.rb +4 -1
- data/test/test_input.rb +27 -75
- data/test/test_output.rb +27 -65
- metadata +6 -12
@@ -1,6 +1,3 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# Copyright (C) 2018 Yasuhiro Horimoto <horimoto@clear-code.com>
|
4
1
|
# Copyright (C) 2012-2017 Kouhei Sutou <kou@clear-code.com>
|
5
2
|
#
|
6
3
|
# This library is free software; you can redistribute it and/or
|
@@ -22,195 +19,141 @@ require "yajl"
|
|
22
19
|
|
23
20
|
require "groonga/client"
|
24
21
|
|
22
|
+
require "fluent/plugin/output"
|
23
|
+
|
25
24
|
module Fluent
|
26
|
-
|
27
|
-
|
25
|
+
module Plugin
|
26
|
+
class GroongaOutput < Output
|
27
|
+
Plugin.register_output("groonga", self)
|
28
|
+
helpers :compat_parameters
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
30
|
+
def initialize
|
31
|
+
super
|
32
|
+
end
|
33
|
+
|
34
|
+
config_param :protocol, :enum, :list => [:http, :gqtp, :command], :default => :http
|
32
35
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
:param_name => "tables",
|
39
|
-
:required => false,
|
40
|
-
:multi => true do
|
41
|
-
config_param :name, :string
|
42
|
-
config_param :flags, :string, :default => nil
|
43
|
-
config_param :key_type, :string, :default => nil
|
44
|
-
config_param :default_tokenizer, :string, :default => nil
|
45
|
-
config_param :token_filters, :string, :default => nil
|
46
|
-
config_param :normalizer, :string, :default => nil
|
47
|
-
config_section :index,
|
48
|
-
:param_name => "indexes",
|
36
|
+
# alias is just for backward compatibility
|
37
|
+
config_param :store_table, :string, :default => nil, :alias => :table
|
38
|
+
|
39
|
+
config_section :table,
|
40
|
+
:param_name => "tables",
|
49
41
|
:required => false,
|
50
42
|
:multi => true do
|
51
43
|
config_param :name, :string
|
52
|
-
config_param :
|
53
|
-
config_param :
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
44
|
+
config_param :flags, :string, :default => nil
|
45
|
+
config_param :key_type, :string, :default => nil
|
46
|
+
config_param :default_tokenizer, :string, :default => nil
|
47
|
+
config_param :token_filters, :string, :default => nil
|
48
|
+
config_param :normalizer, :string, :default => nil
|
49
|
+
config_section :index,
|
50
|
+
:param_name => "indexes",
|
51
|
+
:required => false,
|
52
|
+
:multi => true do
|
53
|
+
config_param :name, :string
|
54
|
+
config_param :source_table, :string
|
55
|
+
config_param :source_columns, :string
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
config_section :mapping,
|
60
|
+
:param_name => "mappings",
|
65
61
|
:required => false,
|
66
62
|
:multi => true do
|
67
|
-
config_param :table, :string
|
68
63
|
config_param :name, :string
|
69
|
-
config_param :
|
64
|
+
config_param :type, :string, :default => nil
|
65
|
+
config_section :index,
|
66
|
+
:param_name => "indexes",
|
67
|
+
:required => false,
|
68
|
+
:multi => true do
|
69
|
+
config_param :table, :string
|
70
|
+
config_param :name, :string
|
71
|
+
config_param :flags, :string, :default => nil
|
72
|
+
end
|
70
73
|
end
|
71
|
-
end
|
72
|
-
|
73
|
-
def configure(conf)
|
74
|
-
super
|
75
|
-
@client = create_client(@protocol)
|
76
|
-
@client.configure(conf)
|
77
74
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
@tables = @tables.collect do |table|
|
82
|
-
TableDefinition.new(table)
|
75
|
+
config_section :buffer do
|
76
|
+
config_set_default :@type, "memory"
|
77
|
+
config_set_default :chunk_keys, ['tag']
|
83
78
|
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def start
|
87
|
-
super
|
88
|
-
@client.start
|
89
|
-
@emitter.start
|
90
|
-
tables_creator = TablesCreator.new(@client, @tables)
|
91
|
-
tables_creator.create
|
92
|
-
end
|
93
|
-
|
94
|
-
def shutdown
|
95
|
-
super
|
96
|
-
@emitter.shutdown
|
97
|
-
@client.shutdown
|
98
|
-
end
|
99
|
-
|
100
|
-
def format(tag, time, record)
|
101
|
-
[tag, time, record].to_msgpack
|
102
|
-
end
|
103
79
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
def create_client(protocol)
|
110
|
-
case protocol
|
111
|
-
when :http, :gqtp
|
112
|
-
NetworkClient.new(protocol)
|
113
|
-
when :command
|
114
|
-
CommandClient.new
|
115
|
-
end
|
116
|
-
end
|
80
|
+
def configure(conf)
|
81
|
+
compat_parameters_convert(conf, :buffer)
|
82
|
+
super
|
83
|
+
@client = create_client(@protocol)
|
84
|
+
@client.configure(conf)
|
117
85
|
|
118
|
-
|
119
|
-
|
120
|
-
def parse_flags(flags)
|
121
|
-
if flags.is_a?(Array)
|
122
|
-
flags
|
123
|
-
else
|
124
|
-
flags.strip.split(/\s*\|\s*/)
|
125
|
-
end
|
126
|
-
end
|
86
|
+
@schema = Schema.new(@client, @store_table, @mappings)
|
87
|
+
@emitter = Emitter.new(@client, @store_table, @schema)
|
127
88
|
|
128
|
-
|
129
|
-
|
130
|
-
items
|
131
|
-
else
|
132
|
-
items.strip.split(/\s*,\s*/)
|
89
|
+
@tables = @tables.collect do |table|
|
90
|
+
TableDefinition.new(table)
|
133
91
|
end
|
134
92
|
end
|
135
|
-
end
|
136
|
-
|
137
|
-
class TableDefinition
|
138
|
-
include DefinitionParseMethods
|
139
|
-
|
140
|
-
def initialize(raw)
|
141
|
-
@raw = raw
|
142
|
-
end
|
143
93
|
|
144
|
-
def
|
145
|
-
|
94
|
+
def start
|
95
|
+
super
|
96
|
+
@client.start
|
97
|
+
@emitter.start
|
98
|
+
tables_creator = TablesCreator.new(@client, @tables)
|
99
|
+
tables_creator.create
|
146
100
|
end
|
147
101
|
|
148
|
-
def
|
149
|
-
|
102
|
+
def shutdown
|
103
|
+
super
|
104
|
+
@emitter.shutdown
|
105
|
+
@client.shutdown
|
150
106
|
end
|
151
107
|
|
152
|
-
def
|
153
|
-
|
108
|
+
def multi_workers_ready?
|
109
|
+
true
|
154
110
|
end
|
155
111
|
|
156
|
-
def
|
157
|
-
|
112
|
+
def format(tag, time, record)
|
113
|
+
[tag, time, record].to_msgpack
|
158
114
|
end
|
159
115
|
|
160
|
-
def
|
161
|
-
|
116
|
+
def formatted_to_msgpack_binary
|
117
|
+
true
|
162
118
|
end
|
163
119
|
|
164
|
-
def
|
165
|
-
@
|
120
|
+
def write(chunk)
|
121
|
+
@emitter.emit(chunk)
|
166
122
|
end
|
167
123
|
|
168
|
-
|
169
|
-
|
170
|
-
|
124
|
+
private
|
125
|
+
def create_client(protocol)
|
126
|
+
case protocol
|
127
|
+
when :http, :gqtp
|
128
|
+
NetworkClient.new(protocol, self)
|
129
|
+
when :command
|
130
|
+
CommandClient.new(self)
|
171
131
|
end
|
172
132
|
end
|
173
133
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
# TODO
|
185
|
-
# return true if table.token_filters.sort != token_filters.sort
|
186
|
-
|
187
|
-
return true if table.normalizer != normalizer
|
188
|
-
|
189
|
-
false
|
190
|
-
end
|
134
|
+
module DefinitionParseMethods
|
135
|
+
private
|
136
|
+
def parse_flags(flags)
|
137
|
+
if flags.is_a?(Array)
|
138
|
+
flags
|
139
|
+
else
|
140
|
+
flags.strip.split(/\s*\|\s*/)
|
141
|
+
end
|
142
|
+
end
|
191
143
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
# "token_filters" => token_filters.join("|"),
|
200
|
-
"normalizer" => normalizer,
|
201
|
-
}
|
202
|
-
arguments.keys.each do |key|
|
203
|
-
value = arguments[key]
|
204
|
-
arguments.delete(key) if value.nil? or value.empty?
|
205
|
-
end
|
206
|
-
arguments
|
144
|
+
def parse_items(items)
|
145
|
+
if items.is_a?(Array)
|
146
|
+
items
|
147
|
+
else
|
148
|
+
items.strip.split(/\s*,\s*/)
|
149
|
+
end
|
150
|
+
end
|
207
151
|
end
|
208
152
|
|
209
|
-
class
|
153
|
+
class TableDefinition
|
210
154
|
include DefinitionParseMethods
|
211
155
|
|
212
|
-
def initialize(
|
213
|
-
@table = table
|
156
|
+
def initialize(raw)
|
214
157
|
@raw = raw
|
215
158
|
end
|
216
159
|
|
@@ -218,547 +161,619 @@ module Fluent
|
|
218
161
|
@raw[:name]
|
219
162
|
end
|
220
163
|
|
221
|
-
def
|
222
|
-
@raw[:
|
164
|
+
def flags
|
165
|
+
parse_flags(@raw[:flags] || "TABLE_NO_KEY")
|
223
166
|
end
|
224
167
|
|
225
|
-
def
|
226
|
-
|
168
|
+
def key_type
|
169
|
+
@raw[:key_type]
|
227
170
|
end
|
228
171
|
|
229
|
-
def
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
172
|
+
def default_tokenizer
|
173
|
+
@raw[:default_tokenizer]
|
174
|
+
end
|
175
|
+
|
176
|
+
def token_filters
|
177
|
+
parse_items(@raw[:token_filters] || "")
|
178
|
+
end
|
179
|
+
|
180
|
+
def normalizer
|
181
|
+
@raw[:normalizer]
|
182
|
+
end
|
183
|
+
|
184
|
+
def indexes
|
185
|
+
(@raw[:indexes] || []).collect do |raw|
|
186
|
+
IndexDefinition.new(self, raw)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def have_difference?(table)
|
191
|
+
return true if table.name != name
|
192
|
+
|
193
|
+
table_flags = (parse_flags(table.flags) - ["PERSISTENT"])
|
194
|
+
return true if table_flags.sort != flags.sort
|
195
|
+
|
196
|
+
return true if table.domain != key_type
|
197
|
+
|
198
|
+
return true if table.default_tokenizer != default_tokenizer
|
199
|
+
|
200
|
+
# TODO
|
201
|
+
# return true if table.token_filters.sort != token_filters.sort
|
202
|
+
|
203
|
+
return true if table.normalizer != normalizer
|
204
|
+
|
205
|
+
false
|
234
206
|
end
|
235
207
|
|
236
208
|
def to_create_arguments
|
237
|
-
{
|
238
|
-
"
|
239
|
-
"
|
240
|
-
"
|
241
|
-
"
|
242
|
-
|
209
|
+
arguments = {
|
210
|
+
"name" => name,
|
211
|
+
"flags" => flags.join("|"),
|
212
|
+
"key_type" => key_type,
|
213
|
+
"default_tokenizer" => default_tokenizer,
|
214
|
+
# TODO
|
215
|
+
# "token_filters" => token_filters.join("|"),
|
216
|
+
"normalizer" => normalizer,
|
243
217
|
}
|
218
|
+
arguments.keys.each do |key|
|
219
|
+
value = arguments[key]
|
220
|
+
arguments.delete(key) if value.nil? or value.empty?
|
221
|
+
end
|
222
|
+
arguments
|
244
223
|
end
|
245
|
-
end
|
246
|
-
end
|
247
224
|
|
248
|
-
|
249
|
-
|
250
|
-
@client = client
|
251
|
-
@definitions = definitions
|
252
|
-
end
|
225
|
+
class IndexDefinition
|
226
|
+
include DefinitionParseMethods
|
253
227
|
|
254
|
-
|
255
|
-
|
228
|
+
def initialize(table, raw)
|
229
|
+
@table = table
|
230
|
+
@raw = raw
|
231
|
+
end
|
256
232
|
|
257
|
-
|
258
|
-
|
259
|
-
existing_table = table_list.find do |table|
|
260
|
-
table.name == definition.name
|
233
|
+
def name
|
234
|
+
@raw[:name]
|
261
235
|
end
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
@client.execute("table_remove", "name" => definition.name)
|
236
|
+
|
237
|
+
def source_table
|
238
|
+
@raw[:source_table]
|
266
239
|
end
|
267
240
|
|
268
|
-
|
269
|
-
|
270
|
-
@client.execute("column_create", index.to_create_arguments)
|
241
|
+
def source_columns
|
242
|
+
parse_items(@raw[:source_columns])
|
271
243
|
end
|
272
|
-
end
|
273
|
-
end
|
274
|
-
end
|
275
244
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
@columns = nil
|
283
|
-
end
|
245
|
+
def flags
|
246
|
+
_flags = ["COLUMN_INDEX"]
|
247
|
+
_flags << "WITH_POSITION" if @table.default_tokenizer
|
248
|
+
_flags << "WITH_SECTION" if source_columns.size >= 2
|
249
|
+
_flags
|
250
|
+
end
|
284
251
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
column = @columns[key]
|
294
|
-
if column.nil?
|
295
|
-
nonexistent_columns[key] ||= []
|
296
|
-
nonexistent_columns[key] << value
|
297
|
-
end
|
252
|
+
def to_create_arguments
|
253
|
+
{
|
254
|
+
"table" => @table.name,
|
255
|
+
"name" => name,
|
256
|
+
"flags" => flags.join("|"),
|
257
|
+
"type" => source_table,
|
258
|
+
"source" => source_columns.join(","),
|
259
|
+
}
|
298
260
|
end
|
299
261
|
end
|
262
|
+
end
|
300
263
|
|
301
|
-
|
302
|
-
|
264
|
+
class TablesCreator
|
265
|
+
def initialize(client, definitions)
|
266
|
+
@client = client
|
267
|
+
@definitions = definitions
|
303
268
|
end
|
304
|
-
end
|
305
269
|
|
306
|
-
|
307
|
-
|
308
|
-
return if @target_table
|
270
|
+
def create
|
271
|
+
return if @definitions.empty?
|
309
272
|
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
273
|
+
table_list = @client.execute("table_list")
|
274
|
+
@definitions.each do |definition|
|
275
|
+
existing_table = table_list.find do |table|
|
276
|
+
table.name == definition.name
|
277
|
+
end
|
278
|
+
if existing_table
|
279
|
+
next unless definition.have_difference?(existing_table)
|
280
|
+
# TODO: Is it OK?
|
281
|
+
@client.execute("table_remove", "name" => definition.name)
|
282
|
+
end
|
318
283
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
@target_table = Table.new(@table_name)
|
325
|
-
@tables[@table_name] = @target_table
|
284
|
+
@client.execute("table_create", definition.to_create_arguments)
|
285
|
+
definition.indexes.each do |index|
|
286
|
+
@client.execute("column_create", index.to_create_arguments)
|
287
|
+
end
|
288
|
+
end
|
326
289
|
end
|
327
290
|
end
|
328
291
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
@columns[name] = Column.new(name, column.range, column.vector?)
|
337
|
-
ensure_column_indexes(name)
|
292
|
+
class Schema
|
293
|
+
def initialize(client, table_name, mappings)
|
294
|
+
@client = client
|
295
|
+
@table_name = table_name
|
296
|
+
@mappings = mappings
|
297
|
+
@taget_table = nil
|
298
|
+
@columns = nil
|
338
299
|
end
|
339
|
-
end
|
340
300
|
|
341
|
-
|
342
|
-
|
343
|
-
|
301
|
+
def update(records)
|
302
|
+
ensure_table
|
303
|
+
ensure_columns
|
344
304
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
flags = "COLUMN_VECTOR"
|
357
|
-
else
|
358
|
-
flags = "COLUMN_SCALAR"
|
359
|
-
end
|
360
|
-
@client.execute("column_create",
|
361
|
-
"table" => @table_name,
|
362
|
-
"name" => name,
|
363
|
-
"flags" => flags,
|
364
|
-
"type" => value_type)
|
365
|
-
ensure_column_indexes(name)
|
366
|
-
|
367
|
-
Column.new(name, value_type, vector_p)
|
368
|
-
end
|
305
|
+
nonexistent_columns = {}
|
306
|
+
records.each do |record|
|
307
|
+
record.each do |key, value|
|
308
|
+
next if pseudo_column_name?(key)
|
309
|
+
column = @columns[key]
|
310
|
+
if column.nil?
|
311
|
+
nonexistent_columns[key] ||= []
|
312
|
+
nonexistent_columns[key] << value
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
369
316
|
|
370
|
-
|
371
|
-
|
372
|
-
|
317
|
+
nonexistent_columns.each do |name, values|
|
318
|
+
@columns[name] = create_column(name, values)
|
319
|
+
end
|
373
320
|
end
|
374
|
-
return if mapping.nil?
|
375
321
|
|
376
|
-
|
377
|
-
|
378
|
-
if
|
379
|
-
|
380
|
-
|
381
|
-
|
322
|
+
private
|
323
|
+
def ensure_table
|
324
|
+
return if @target_table
|
325
|
+
|
326
|
+
@tables = {}
|
327
|
+
@client.execute("table_list").collect do |table|
|
328
|
+
name = table.name
|
329
|
+
options = {
|
330
|
+
:default_tokenizer => table.default_tokenizer,
|
331
|
+
}
|
332
|
+
@tables[name] = Table.new(table.name, options)
|
382
333
|
end
|
383
334
|
|
384
|
-
|
385
|
-
|
386
|
-
|
335
|
+
@target_table = @tables[@table_name]
|
336
|
+
unless @target_table
|
337
|
+
@client.execute("table_create",
|
338
|
+
"name" => @table_name,
|
339
|
+
"flags" => "TABLE_NO_KEY")
|
340
|
+
@target_table = Table.new(@table_name)
|
341
|
+
@tables[@table_name] = @target_table
|
342
|
+
end
|
343
|
+
end
|
387
344
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
345
|
+
def ensure_columns
|
346
|
+
return if @columns
|
347
|
+
|
348
|
+
column_list = @client.execute("column_list", "table" => @table_name)
|
349
|
+
@columns = {}
|
350
|
+
column_list.each do |column|
|
351
|
+
name = column.name
|
352
|
+
vector_p = column.flags.split("|").include?("COLUMN_VECTOR")
|
353
|
+
@columns[name] = Column.new(name, column.range, vector_p)
|
354
|
+
ensure_column_indexes(name)
|
355
|
+
end
|
394
356
|
end
|
395
|
-
end
|
396
357
|
|
397
|
-
|
398
|
-
|
399
|
-
@sample_values = sample_values
|
358
|
+
def pseudo_column_name?(name)
|
359
|
+
name.start_with?("_")
|
400
360
|
end
|
401
361
|
|
402
|
-
def
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
362
|
+
def create_column(name, sample_values)
|
363
|
+
mapping = @mappings.find do |mapping|
|
364
|
+
mapping.name == name
|
365
|
+
end
|
366
|
+
if mapping
|
367
|
+
value_type = mapping[:type]
|
368
|
+
end
|
369
|
+
guesser = TypeGuesser.new(sample_values)
|
370
|
+
value_type ||= guesser.guess
|
371
|
+
vector_p = guesser.vector?
|
372
|
+
if vector_p
|
373
|
+
flags = "COLUMN_VECTOR"
|
374
|
+
else
|
375
|
+
flags = "COLUMN_SCALAR"
|
376
|
+
end
|
377
|
+
@client.execute("column_create",
|
378
|
+
"table" => @table_name,
|
379
|
+
"name" => name,
|
380
|
+
"flags" => flags,
|
381
|
+
"type" => value_type)
|
382
|
+
ensure_column_indexes(name)
|
411
383
|
|
412
|
-
|
384
|
+
Column.new(name, value_type, vector_p)
|
413
385
|
end
|
414
386
|
|
415
|
-
def
|
416
|
-
@
|
417
|
-
|
387
|
+
def ensure_column_indexes(name)
|
388
|
+
mapping = @mappings.find do |_mapping|
|
389
|
+
_mapping.name == name
|
390
|
+
end
|
391
|
+
return if mapping.nil?
|
392
|
+
|
393
|
+
mapping.indexes.each do |index|
|
394
|
+
table = @tables[index[:table]]
|
395
|
+
if table
|
396
|
+
column_list = @client.execute("column_list", "table" => table.name)
|
397
|
+
exist = column_list.any? {|column| column.name == index[:name]}
|
398
|
+
next if exist
|
399
|
+
end
|
400
|
+
|
401
|
+
index_flags = ["COLUMN_INDEX"]
|
402
|
+
index_flags << "WITH_POSITION" if table and table.default_tokenizer
|
403
|
+
index_flags << index[:flags] if index[:flags]
|
404
|
+
|
405
|
+
@client.execute("column_create",
|
406
|
+
"table" => index[:table],
|
407
|
+
"name" => index[:name],
|
408
|
+
"flags" => index_flags.join("|"),
|
409
|
+
"type" => @table_name,
|
410
|
+
"source" => name)
|
418
411
|
end
|
419
412
|
end
|
420
413
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
414
|
+
class TypeGuesser
|
415
|
+
def initialize(sample_values)
|
416
|
+
@sample_values = sample_values
|
417
|
+
end
|
418
|
+
|
419
|
+
def guess
|
420
|
+
return "Bool" if bool_values?
|
421
|
+
return "Time" if time_values?
|
422
|
+
return "Int32" if int32_values?
|
423
|
+
return "Int64" if int64_values?
|
424
|
+
return "Float" if float_values?
|
425
|
+
return "WGS84GeoPoint" if geo_point_values?
|
426
|
+
return "LongText" if long_text_values?
|
427
|
+
return "Text" if text_values?
|
428
|
+
|
429
|
+
"ShortText"
|
430
|
+
end
|
431
|
+
|
432
|
+
def vector?
|
433
|
+
@sample_values.any? do |sample_value|
|
434
|
+
sample_value.is_a?(Array)
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
private
|
439
|
+
def integer_value?(value)
|
440
|
+
case value
|
441
|
+
when String
|
442
|
+
begin
|
443
|
+
Integer(value)
|
444
|
+
true
|
445
|
+
rescue ArgumentError
|
446
|
+
false
|
447
|
+
end
|
448
|
+
when Integer
|
427
449
|
true
|
428
|
-
|
450
|
+
else
|
429
451
|
false
|
430
452
|
end
|
431
|
-
when Integer
|
432
|
-
true
|
433
|
-
else
|
434
|
-
false
|
435
453
|
end
|
436
|
-
end
|
437
454
|
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
455
|
+
BOOL_VALUES = [
|
456
|
+
true,
|
457
|
+
false,
|
458
|
+
"true",
|
459
|
+
"false",
|
460
|
+
]
|
461
|
+
def bool_values?
|
462
|
+
@sample_values.all? do |sample_value|
|
463
|
+
BOOL_VALUES.include?(sample_value)
|
464
|
+
end
|
447
465
|
end
|
448
|
-
end
|
449
466
|
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
467
|
+
def time_values?
|
468
|
+
now = Time.now.to_i
|
469
|
+
year_in_seconds = 365 * 24 * 60 * 60
|
470
|
+
window = 10 * year_in_seconds
|
471
|
+
new = now + window
|
472
|
+
old = now - window
|
473
|
+
recent_range = old..new
|
474
|
+
@sample_values.all? do |sample_value|
|
475
|
+
integer_value?(sample_value) and
|
476
|
+
recent_range.cover?(Integer(sample_value))
|
477
|
+
end
|
460
478
|
end
|
461
|
-
end
|
462
479
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
480
|
+
def int32_values?
|
481
|
+
int32_min = -(2 ** 31)
|
482
|
+
int32_max = 2 ** 31 - 1
|
483
|
+
range = int32_min..int32_max
|
484
|
+
@sample_values.all? do |sample_value|
|
485
|
+
integer_value?(sample_value) and
|
486
|
+
range.cover?(Integer(sample_value))
|
487
|
+
end
|
470
488
|
end
|
471
|
-
end
|
472
489
|
|
473
|
-
|
474
|
-
|
475
|
-
|
490
|
+
def int64_values?
|
491
|
+
@sample_values.all? do |sample_value|
|
492
|
+
integer_value?(sample_value)
|
493
|
+
end
|
476
494
|
end
|
477
|
-
end
|
478
495
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
496
|
+
def float_value?(value)
|
497
|
+
case value
|
498
|
+
when String
|
499
|
+
begin
|
500
|
+
Float(value)
|
501
|
+
true
|
502
|
+
rescue ArgumentError
|
503
|
+
false
|
504
|
+
end
|
505
|
+
when Float
|
484
506
|
true
|
485
|
-
|
507
|
+
else
|
486
508
|
false
|
487
509
|
end
|
488
|
-
when Float
|
489
|
-
true
|
490
|
-
else
|
491
|
-
false
|
492
510
|
end
|
493
|
-
end
|
494
511
|
|
495
|
-
|
496
|
-
|
497
|
-
|
512
|
+
def float_values?
|
513
|
+
@sample_values.all? do |sample_value|
|
514
|
+
float_value?(sample_value)
|
515
|
+
end
|
498
516
|
end
|
499
|
-
end
|
500
517
|
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
518
|
+
def geo_point_values?
|
519
|
+
@sample_values.all? do |sample_value|
|
520
|
+
sample_value.is_a?(String) and
|
521
|
+
/\A-?\d+(?:\.\d+)[,x]-?\d+(?:\.\d+)\z/ =~ sample_value
|
522
|
+
end
|
505
523
|
end
|
506
|
-
end
|
507
524
|
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
525
|
+
MAX_SHORT_TEXT_SIZE = 2 ** 12
|
526
|
+
MAX_TEXT_SIZE = 2 ** 16
|
527
|
+
def text_values?
|
528
|
+
@sample_values.any? do |sample_value|
|
529
|
+
sample_value.is_a?(String) and
|
530
|
+
sample_value.bytesize > MAX_SHORT_TEXT_SIZE
|
531
|
+
end
|
514
532
|
end
|
515
|
-
end
|
516
533
|
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
534
|
+
def long_text_values?
|
535
|
+
@sample_values.any? do |sample_value|
|
536
|
+
sample_value.is_a?(String) and
|
537
|
+
sample_value.bytesize > MAX_TEXT_SIZE
|
538
|
+
end
|
521
539
|
end
|
522
540
|
end
|
523
|
-
end
|
524
541
|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
542
|
+
class Table
|
543
|
+
attr_reader :name
|
544
|
+
attr_reader :flags
|
545
|
+
attr_reader :domain
|
546
|
+
attr_reader :range
|
547
|
+
attr_reader :default_tokenizer
|
548
|
+
attr_reader :normalizer
|
549
|
+
attr_reader :token_filters
|
550
|
+
def initialize(name, options={})
|
551
|
+
@name = name
|
552
|
+
@flags = options[:flags]
|
553
|
+
@domain = options[:domain]
|
554
|
+
@range = options[:range]
|
555
|
+
@default_tokenizer = options[:default_tokenizer]
|
556
|
+
@normalizer = options[:normalizer]
|
557
|
+
@token_filters = options[:token_filters]
|
558
|
+
end
|
541
559
|
end
|
542
|
-
end
|
543
560
|
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
561
|
+
class Column
|
562
|
+
def initialize(name, value_type, vector_p)
|
563
|
+
@name = name
|
564
|
+
@value_type = value_type
|
565
|
+
@vector_p = vector_p
|
566
|
+
end
|
549
567
|
end
|
550
568
|
end
|
551
|
-
end
|
552
569
|
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
570
|
+
class Emitter
|
571
|
+
def initialize(client, table, schema)
|
572
|
+
@client = client
|
573
|
+
@table = table
|
574
|
+
@schema = schema
|
575
|
+
end
|
559
576
|
|
560
|
-
|
561
|
-
|
577
|
+
def start
|
578
|
+
end
|
562
579
|
|
563
|
-
|
564
|
-
|
580
|
+
def shutdown
|
581
|
+
end
|
565
582
|
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
583
|
+
def emit(chunk)
|
584
|
+
records = []
|
585
|
+
chunk.msgpack_each do |message|
|
586
|
+
tag, _, record = message
|
587
|
+
if /\Agroonga\.command\./ =~ tag
|
588
|
+
name = $POSTMATCH
|
589
|
+
unless records.empty?
|
590
|
+
store_records(records)
|
591
|
+
records.clear
|
592
|
+
end
|
593
|
+
@client.execute(name, record)
|
594
|
+
else
|
595
|
+
records << record
|
576
596
|
end
|
577
|
-
@client.execute(name, record)
|
578
|
-
when "groonga.command"
|
579
|
-
name = record["name"]
|
580
|
-
arguments = record["arguments"]
|
581
|
-
@client.execute(name, arguments)
|
582
|
-
else
|
583
|
-
records << record
|
584
597
|
end
|
598
|
+
store_records(records) unless records.empty?
|
585
599
|
end
|
586
|
-
store_records(records) unless records.empty?
|
587
|
-
end
|
588
600
|
|
589
|
-
|
590
|
-
|
591
|
-
|
601
|
+
private
|
602
|
+
def store_records(records)
|
603
|
+
return if @table.nil?
|
592
604
|
|
593
|
-
|
605
|
+
@schema.update(records)
|
594
606
|
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
607
|
+
arguments = {
|
608
|
+
"table" => @table,
|
609
|
+
"values" => Yajl::Encoder.encode(records),
|
610
|
+
}
|
611
|
+
@client.execute("load", arguments)
|
612
|
+
end
|
600
613
|
end
|
601
|
-
end
|
602
614
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
615
|
+
class BaseClient
|
616
|
+
private
|
617
|
+
def build_command(name, arguments={})
|
618
|
+
command_class = Groonga::Command.find(name)
|
619
|
+
command_class.new(name, arguments)
|
620
|
+
end
|
608
621
|
end
|
609
|
-
end
|
610
622
|
|
611
|
-
|
612
|
-
|
623
|
+
class NetworkClient < BaseClient
|
624
|
+
include Configurable
|
613
625
|
|
614
|
-
|
615
|
-
|
626
|
+
config_param :host, :string, :default => nil
|
627
|
+
config_param :port, :integer, :default => nil
|
616
628
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
629
|
+
def initialize(protocol, output_plugin)
|
630
|
+
super()
|
631
|
+
@protocol = protocol
|
632
|
+
@output_plugin = output_plugin
|
633
|
+
end
|
621
634
|
|
622
|
-
|
623
|
-
|
624
|
-
|
635
|
+
def start
|
636
|
+
@client = nil
|
637
|
+
end
|
625
638
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
639
|
+
def shutdown
|
640
|
+
return if @client.nil?
|
641
|
+
@client.close
|
642
|
+
end
|
630
643
|
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
644
|
+
def execute(name, arguments={})
|
645
|
+
command = build_command(name, arguments)
|
646
|
+
@client ||= Groonga::Client.new(:protocol => @protocol,
|
647
|
+
:host => @host,
|
648
|
+
:port => @port,
|
649
|
+
:backend => :synchronous)
|
650
|
+
response = nil
|
651
|
+
begin
|
652
|
+
response = @client.execute(command)
|
653
|
+
rescue Groonga::Client::Error
|
654
|
+
@output_plugin.log.error("[output][groonga][error]",
|
655
|
+
:protocol => @protocol,
|
656
|
+
:host => @host,
|
657
|
+
:port => @port,
|
658
|
+
:command_name => name)
|
659
|
+
raise
|
660
|
+
end
|
661
|
+
unless response.success?
|
662
|
+
@output_plugin.log.error("[output][groonga][error]",
|
663
|
+
:status_code => response.status_code,
|
664
|
+
:message => response.message)
|
665
|
+
end
|
666
|
+
response
|
667
|
+
end
|
654
668
|
end
|
655
|
-
end
|
656
|
-
|
657
|
-
class CommandClient < BaseClient
|
658
|
-
include Configurable
|
659
669
|
|
660
|
-
|
661
|
-
|
662
|
-
config_param :arguments, :default => [] do |value|
|
663
|
-
Shellwords.split(value)
|
664
|
-
end
|
670
|
+
class CommandClient < BaseClient
|
671
|
+
include Configurable
|
665
672
|
|
666
|
-
|
667
|
-
|
668
|
-
|
673
|
+
config_param :groonga, :string, :default => "groonga"
|
674
|
+
config_param :database, :string
|
675
|
+
config_param :arguments, :default => [] do |value|
|
676
|
+
Shellwords.split(value)
|
677
|
+
end
|
669
678
|
|
670
|
-
|
671
|
-
|
672
|
-
|
679
|
+
def initialize(output_plugin)
|
680
|
+
super()
|
681
|
+
@output_plugin = output_plugin
|
682
|
+
end
|
673
683
|
|
674
|
-
|
675
|
-
|
676
|
-
|
684
|
+
def configure(conf)
|
685
|
+
super
|
686
|
+
end
|
677
687
|
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
@output.close
|
682
|
-
@error.close
|
683
|
-
Process.waitpid(@pid)
|
684
|
-
end
|
688
|
+
def start
|
689
|
+
run_groonga
|
690
|
+
end
|
685
691
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
692
|
+
def shutdown
|
693
|
+
@input.close
|
694
|
+
read_output("shutdown")
|
695
|
+
@output.close
|
696
|
+
@error.close
|
697
|
+
Process.waitpid(@pid)
|
691
698
|
end
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
body
|
696
|
-
|
699
|
+
|
700
|
+
def execute(name, arguments={})
|
701
|
+
command = build_command(name, arguments)
|
702
|
+
body = nil
|
703
|
+
if command.name == "load"
|
704
|
+
body = command.arguments.delete(:values)
|
697
705
|
end
|
706
|
+
uri = command.to_uri_format
|
707
|
+
@input.write("#{uri}\n")
|
708
|
+
if body
|
709
|
+
body.each_line do |line|
|
710
|
+
@input.write("#{line}\n")
|
711
|
+
end
|
712
|
+
end
|
713
|
+
@input.flush
|
714
|
+
read_output(uri)
|
698
715
|
end
|
699
|
-
@input.flush
|
700
|
-
read_output(uri)
|
701
|
-
end
|
702
716
|
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
717
|
+
private
|
718
|
+
def run_groonga
|
719
|
+
env = {}
|
720
|
+
input = IO.pipe("ASCII-8BIT")
|
721
|
+
output = IO.pipe("ASCII-8BIT")
|
722
|
+
error = IO.pipe("ASCII-8BIT")
|
723
|
+
input_fd = input[0].to_i
|
724
|
+
output_fd = output[1].to_i
|
725
|
+
options = {
|
726
|
+
input_fd => input_fd,
|
727
|
+
output_fd => output_fd,
|
728
|
+
:err => error[1],
|
729
|
+
}
|
730
|
+
arguments = @arguments
|
731
|
+
arguments += [
|
732
|
+
"--input-fd", input_fd.to_s,
|
733
|
+
"--output-fd", output_fd.to_s,
|
734
|
+
]
|
735
|
+
unless File.exist?(@database)
|
736
|
+
FileUtils.mkdir_p(File.dirname(@database))
|
737
|
+
arguments << "-n"
|
738
|
+
end
|
739
|
+
arguments << @database
|
740
|
+
@pid = spawn(env, @groonga, *arguments, options)
|
741
|
+
input[0].close
|
742
|
+
@input = input[1]
|
743
|
+
output[1].close
|
744
|
+
@output = output[0]
|
745
|
+
error[1].close
|
746
|
+
@error = error[0]
|
747
|
+
end
|
748
|
+
|
749
|
+
def read_output(context)
|
750
|
+
output_message = ""
|
751
|
+
error_message = ""
|
752
|
+
|
753
|
+
loop do
|
754
|
+
readables = IO.select([@output, @error], nil, nil, 0)
|
755
|
+
break if readables.nil?
|
756
|
+
|
757
|
+
readables.each do |readable|
|
758
|
+
case readable
|
759
|
+
when @output
|
760
|
+
output_message << @output.gets
|
761
|
+
when @error
|
762
|
+
error_message << @error.gets
|
763
|
+
end
|
749
764
|
end
|
750
765
|
end
|
751
|
-
end
|
752
766
|
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
767
|
+
unless output_message.empty?
|
768
|
+
@output_plugin.log.debug("[output][groonga][output]",
|
769
|
+
:context => context,
|
770
|
+
:message => output_message)
|
771
|
+
end
|
772
|
+
unless error_message.empty?
|
773
|
+
@output_plugin.log.error("[output][groonga][error]",
|
774
|
+
:context => context,
|
775
|
+
:message => error_message)
|
776
|
+
end
|
762
777
|
end
|
763
778
|
end
|
764
779
|
end
|