fluent-plugin-groonga 1.1.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/configuration.md +23 -20
- data/doc/text/constitution.md +19 -23
- data/doc/text/news.md +9 -3
- data/fluent-plugin-groonga.gemspec +3 -4
- data/lib/fluent/plugin/in_groonga.rb +357 -370
- data/lib/fluent/plugin/out_groonga.rb +599 -584
- data/sample/command.conf +5 -4
- data/sample/gqtp.conf +5 -4
- data/sample/http.conf +5 -4
- data/sample/store-apache.conf +5 -3
- data/sample/store-syslog.conf +5 -3
- data/sample/store.conf +5 -3
- data/test/output/test_table_definition.rb +1 -1
- data/test/output/test_table_index_definition.rb +2 -2
- data/test/output/test_type_guesser.rb +1 -1
- data/test/run-test.rb +4 -1
- data/test/test_input.rb +27 -75
- data/test/test_output.rb +27 -65
- metadata +6 -12
@@ -1,6 +1,3 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# Copyright (C) 2018 Yasuhiro Horimoto <horimoto@clear-code.com>
|
4
1
|
# Copyright (C) 2012-2017 Kouhei Sutou <kou@clear-code.com>
|
5
2
|
#
|
6
3
|
# This library is free software; you can redistribute it and/or
|
@@ -22,195 +19,141 @@ require "yajl"
|
|
22
19
|
|
23
20
|
require "groonga/client"
|
24
21
|
|
22
|
+
require "fluent/plugin/output"
|
23
|
+
|
25
24
|
module Fluent
|
26
|
-
|
27
|
-
|
25
|
+
module Plugin
|
26
|
+
class GroongaOutput < Output
|
27
|
+
Plugin.register_output("groonga", self)
|
28
|
+
helpers :compat_parameters
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
30
|
+
def initialize
|
31
|
+
super
|
32
|
+
end
|
33
|
+
|
34
|
+
config_param :protocol, :enum, :list => [:http, :gqtp, :command], :default => :http
|
32
35
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
:param_name => "tables",
|
39
|
-
:required => false,
|
40
|
-
:multi => true do
|
41
|
-
config_param :name, :string
|
42
|
-
config_param :flags, :string, :default => nil
|
43
|
-
config_param :key_type, :string, :default => nil
|
44
|
-
config_param :default_tokenizer, :string, :default => nil
|
45
|
-
config_param :token_filters, :string, :default => nil
|
46
|
-
config_param :normalizer, :string, :default => nil
|
47
|
-
config_section :index,
|
48
|
-
:param_name => "indexes",
|
36
|
+
# alias is just for backward compatibility
|
37
|
+
config_param :store_table, :string, :default => nil, :alias => :table
|
38
|
+
|
39
|
+
config_section :table,
|
40
|
+
:param_name => "tables",
|
49
41
|
:required => false,
|
50
42
|
:multi => true do
|
51
43
|
config_param :name, :string
|
52
|
-
config_param :
|
53
|
-
config_param :
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
44
|
+
config_param :flags, :string, :default => nil
|
45
|
+
config_param :key_type, :string, :default => nil
|
46
|
+
config_param :default_tokenizer, :string, :default => nil
|
47
|
+
config_param :token_filters, :string, :default => nil
|
48
|
+
config_param :normalizer, :string, :default => nil
|
49
|
+
config_section :index,
|
50
|
+
:param_name => "indexes",
|
51
|
+
:required => false,
|
52
|
+
:multi => true do
|
53
|
+
config_param :name, :string
|
54
|
+
config_param :source_table, :string
|
55
|
+
config_param :source_columns, :string
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
config_section :mapping,
|
60
|
+
:param_name => "mappings",
|
65
61
|
:required => false,
|
66
62
|
:multi => true do
|
67
|
-
config_param :table, :string
|
68
63
|
config_param :name, :string
|
69
|
-
config_param :
|
64
|
+
config_param :type, :string, :default => nil
|
65
|
+
config_section :index,
|
66
|
+
:param_name => "indexes",
|
67
|
+
:required => false,
|
68
|
+
:multi => true do
|
69
|
+
config_param :table, :string
|
70
|
+
config_param :name, :string
|
71
|
+
config_param :flags, :string, :default => nil
|
72
|
+
end
|
70
73
|
end
|
71
|
-
end
|
72
|
-
|
73
|
-
def configure(conf)
|
74
|
-
super
|
75
|
-
@client = create_client(@protocol)
|
76
|
-
@client.configure(conf)
|
77
74
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
@tables = @tables.collect do |table|
|
82
|
-
TableDefinition.new(table)
|
75
|
+
config_section :buffer do
|
76
|
+
config_set_default :@type, "memory"
|
77
|
+
config_set_default :chunk_keys, ['tag']
|
83
78
|
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def start
|
87
|
-
super
|
88
|
-
@client.start
|
89
|
-
@emitter.start
|
90
|
-
tables_creator = TablesCreator.new(@client, @tables)
|
91
|
-
tables_creator.create
|
92
|
-
end
|
93
|
-
|
94
|
-
def shutdown
|
95
|
-
super
|
96
|
-
@emitter.shutdown
|
97
|
-
@client.shutdown
|
98
|
-
end
|
99
|
-
|
100
|
-
def format(tag, time, record)
|
101
|
-
[tag, time, record].to_msgpack
|
102
|
-
end
|
103
79
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
def create_client(protocol)
|
110
|
-
case protocol
|
111
|
-
when :http, :gqtp
|
112
|
-
NetworkClient.new(protocol)
|
113
|
-
when :command
|
114
|
-
CommandClient.new
|
115
|
-
end
|
116
|
-
end
|
80
|
+
def configure(conf)
|
81
|
+
compat_parameters_convert(conf, :buffer)
|
82
|
+
super
|
83
|
+
@client = create_client(@protocol)
|
84
|
+
@client.configure(conf)
|
117
85
|
|
118
|
-
|
119
|
-
|
120
|
-
def parse_flags(flags)
|
121
|
-
if flags.is_a?(Array)
|
122
|
-
flags
|
123
|
-
else
|
124
|
-
flags.strip.split(/\s*\|\s*/)
|
125
|
-
end
|
126
|
-
end
|
86
|
+
@schema = Schema.new(@client, @store_table, @mappings)
|
87
|
+
@emitter = Emitter.new(@client, @store_table, @schema)
|
127
88
|
|
128
|
-
|
129
|
-
|
130
|
-
items
|
131
|
-
else
|
132
|
-
items.strip.split(/\s*,\s*/)
|
89
|
+
@tables = @tables.collect do |table|
|
90
|
+
TableDefinition.new(table)
|
133
91
|
end
|
134
92
|
end
|
135
|
-
end
|
136
|
-
|
137
|
-
class TableDefinition
|
138
|
-
include DefinitionParseMethods
|
139
|
-
|
140
|
-
def initialize(raw)
|
141
|
-
@raw = raw
|
142
|
-
end
|
143
93
|
|
144
|
-
def
|
145
|
-
|
94
|
+
def start
|
95
|
+
super
|
96
|
+
@client.start
|
97
|
+
@emitter.start
|
98
|
+
tables_creator = TablesCreator.new(@client, @tables)
|
99
|
+
tables_creator.create
|
146
100
|
end
|
147
101
|
|
148
|
-
def
|
149
|
-
|
102
|
+
def shutdown
|
103
|
+
super
|
104
|
+
@emitter.shutdown
|
105
|
+
@client.shutdown
|
150
106
|
end
|
151
107
|
|
152
|
-
def
|
153
|
-
|
108
|
+
def multi_workers_ready?
|
109
|
+
true
|
154
110
|
end
|
155
111
|
|
156
|
-
def
|
157
|
-
|
112
|
+
def format(tag, time, record)
|
113
|
+
[tag, time, record].to_msgpack
|
158
114
|
end
|
159
115
|
|
160
|
-
def
|
161
|
-
|
116
|
+
def formatted_to_msgpack_binary
|
117
|
+
true
|
162
118
|
end
|
163
119
|
|
164
|
-
def
|
165
|
-
@
|
120
|
+
def write(chunk)
|
121
|
+
@emitter.emit(chunk)
|
166
122
|
end
|
167
123
|
|
168
|
-
|
169
|
-
|
170
|
-
|
124
|
+
private
|
125
|
+
def create_client(protocol)
|
126
|
+
case protocol
|
127
|
+
when :http, :gqtp
|
128
|
+
NetworkClient.new(protocol, self)
|
129
|
+
when :command
|
130
|
+
CommandClient.new(self)
|
171
131
|
end
|
172
132
|
end
|
173
133
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
# TODO
|
185
|
-
# return true if table.token_filters.sort != token_filters.sort
|
186
|
-
|
187
|
-
return true if table.normalizer != normalizer
|
188
|
-
|
189
|
-
false
|
190
|
-
end
|
134
|
+
module DefinitionParseMethods
|
135
|
+
private
|
136
|
+
def parse_flags(flags)
|
137
|
+
if flags.is_a?(Array)
|
138
|
+
flags
|
139
|
+
else
|
140
|
+
flags.strip.split(/\s*\|\s*/)
|
141
|
+
end
|
142
|
+
end
|
191
143
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
# "token_filters" => token_filters.join("|"),
|
200
|
-
"normalizer" => normalizer,
|
201
|
-
}
|
202
|
-
arguments.keys.each do |key|
|
203
|
-
value = arguments[key]
|
204
|
-
arguments.delete(key) if value.nil? or value.empty?
|
205
|
-
end
|
206
|
-
arguments
|
144
|
+
def parse_items(items)
|
145
|
+
if items.is_a?(Array)
|
146
|
+
items
|
147
|
+
else
|
148
|
+
items.strip.split(/\s*,\s*/)
|
149
|
+
end
|
150
|
+
end
|
207
151
|
end
|
208
152
|
|
209
|
-
class
|
153
|
+
class TableDefinition
|
210
154
|
include DefinitionParseMethods
|
211
155
|
|
212
|
-
def initialize(
|
213
|
-
@table = table
|
156
|
+
def initialize(raw)
|
214
157
|
@raw = raw
|
215
158
|
end
|
216
159
|
|
@@ -218,547 +161,619 @@ module Fluent
|
|
218
161
|
@raw[:name]
|
219
162
|
end
|
220
163
|
|
221
|
-
def
|
222
|
-
@raw[:
|
164
|
+
def flags
|
165
|
+
parse_flags(@raw[:flags] || "TABLE_NO_KEY")
|
223
166
|
end
|
224
167
|
|
225
|
-
def
|
226
|
-
|
168
|
+
def key_type
|
169
|
+
@raw[:key_type]
|
227
170
|
end
|
228
171
|
|
229
|
-
def
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
172
|
+
def default_tokenizer
|
173
|
+
@raw[:default_tokenizer]
|
174
|
+
end
|
175
|
+
|
176
|
+
def token_filters
|
177
|
+
parse_items(@raw[:token_filters] || "")
|
178
|
+
end
|
179
|
+
|
180
|
+
def normalizer
|
181
|
+
@raw[:normalizer]
|
182
|
+
end
|
183
|
+
|
184
|
+
def indexes
|
185
|
+
(@raw[:indexes] || []).collect do |raw|
|
186
|
+
IndexDefinition.new(self, raw)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def have_difference?(table)
|
191
|
+
return true if table.name != name
|
192
|
+
|
193
|
+
table_flags = (parse_flags(table.flags) - ["PERSISTENT"])
|
194
|
+
return true if table_flags.sort != flags.sort
|
195
|
+
|
196
|
+
return true if table.domain != key_type
|
197
|
+
|
198
|
+
return true if table.default_tokenizer != default_tokenizer
|
199
|
+
|
200
|
+
# TODO
|
201
|
+
# return true if table.token_filters.sort != token_filters.sort
|
202
|
+
|
203
|
+
return true if table.normalizer != normalizer
|
204
|
+
|
205
|
+
false
|
234
206
|
end
|
235
207
|
|
236
208
|
def to_create_arguments
|
237
|
-
{
|
238
|
-
"
|
239
|
-
"
|
240
|
-
"
|
241
|
-
"
|
242
|
-
|
209
|
+
arguments = {
|
210
|
+
"name" => name,
|
211
|
+
"flags" => flags.join("|"),
|
212
|
+
"key_type" => key_type,
|
213
|
+
"default_tokenizer" => default_tokenizer,
|
214
|
+
# TODO
|
215
|
+
# "token_filters" => token_filters.join("|"),
|
216
|
+
"normalizer" => normalizer,
|
243
217
|
}
|
218
|
+
arguments.keys.each do |key|
|
219
|
+
value = arguments[key]
|
220
|
+
arguments.delete(key) if value.nil? or value.empty?
|
221
|
+
end
|
222
|
+
arguments
|
244
223
|
end
|
245
|
-
end
|
246
|
-
end
|
247
224
|
|
248
|
-
|
249
|
-
|
250
|
-
@client = client
|
251
|
-
@definitions = definitions
|
252
|
-
end
|
225
|
+
class IndexDefinition
|
226
|
+
include DefinitionParseMethods
|
253
227
|
|
254
|
-
|
255
|
-
|
228
|
+
def initialize(table, raw)
|
229
|
+
@table = table
|
230
|
+
@raw = raw
|
231
|
+
end
|
256
232
|
|
257
|
-
|
258
|
-
|
259
|
-
existing_table = table_list.find do |table|
|
260
|
-
table.name == definition.name
|
233
|
+
def name
|
234
|
+
@raw[:name]
|
261
235
|
end
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
@client.execute("table_remove", "name" => definition.name)
|
236
|
+
|
237
|
+
def source_table
|
238
|
+
@raw[:source_table]
|
266
239
|
end
|
267
240
|
|
268
|
-
|
269
|
-
|
270
|
-
@client.execute("column_create", index.to_create_arguments)
|
241
|
+
def source_columns
|
242
|
+
parse_items(@raw[:source_columns])
|
271
243
|
end
|
272
|
-
end
|
273
|
-
end
|
274
|
-
end
|
275
244
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
@columns = nil
|
283
|
-
end
|
245
|
+
def flags
|
246
|
+
_flags = ["COLUMN_INDEX"]
|
247
|
+
_flags << "WITH_POSITION" if @table.default_tokenizer
|
248
|
+
_flags << "WITH_SECTION" if source_columns.size >= 2
|
249
|
+
_flags
|
250
|
+
end
|
284
251
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
column = @columns[key]
|
294
|
-
if column.nil?
|
295
|
-
nonexistent_columns[key] ||= []
|
296
|
-
nonexistent_columns[key] << value
|
297
|
-
end
|
252
|
+
def to_create_arguments
|
253
|
+
{
|
254
|
+
"table" => @table.name,
|
255
|
+
"name" => name,
|
256
|
+
"flags" => flags.join("|"),
|
257
|
+
"type" => source_table,
|
258
|
+
"source" => source_columns.join(","),
|
259
|
+
}
|
298
260
|
end
|
299
261
|
end
|
262
|
+
end
|
300
263
|
|
301
|
-
|
302
|
-
|
264
|
+
class TablesCreator
|
265
|
+
def initialize(client, definitions)
|
266
|
+
@client = client
|
267
|
+
@definitions = definitions
|
303
268
|
end
|
304
|
-
end
|
305
269
|
|
306
|
-
|
307
|
-
|
308
|
-
return if @target_table
|
270
|
+
def create
|
271
|
+
return if @definitions.empty?
|
309
272
|
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
273
|
+
table_list = @client.execute("table_list")
|
274
|
+
@definitions.each do |definition|
|
275
|
+
existing_table = table_list.find do |table|
|
276
|
+
table.name == definition.name
|
277
|
+
end
|
278
|
+
if existing_table
|
279
|
+
next unless definition.have_difference?(existing_table)
|
280
|
+
# TODO: Is it OK?
|
281
|
+
@client.execute("table_remove", "name" => definition.name)
|
282
|
+
end
|
318
283
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
@target_table = Table.new(@table_name)
|
325
|
-
@tables[@table_name] = @target_table
|
284
|
+
@client.execute("table_create", definition.to_create_arguments)
|
285
|
+
definition.indexes.each do |index|
|
286
|
+
@client.execute("column_create", index.to_create_arguments)
|
287
|
+
end
|
288
|
+
end
|
326
289
|
end
|
327
290
|
end
|
328
291
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
@columns[name] = Column.new(name, column.range, column.vector?)
|
337
|
-
ensure_column_indexes(name)
|
292
|
+
class Schema
|
293
|
+
def initialize(client, table_name, mappings)
|
294
|
+
@client = client
|
295
|
+
@table_name = table_name
|
296
|
+
@mappings = mappings
|
297
|
+
@taget_table = nil
|
298
|
+
@columns = nil
|
338
299
|
end
|
339
|
-
end
|
340
300
|
|
341
|
-
|
342
|
-
|
343
|
-
|
301
|
+
def update(records)
|
302
|
+
ensure_table
|
303
|
+
ensure_columns
|
344
304
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
flags = "COLUMN_VECTOR"
|
357
|
-
else
|
358
|
-
flags = "COLUMN_SCALAR"
|
359
|
-
end
|
360
|
-
@client.execute("column_create",
|
361
|
-
"table" => @table_name,
|
362
|
-
"name" => name,
|
363
|
-
"flags" => flags,
|
364
|
-
"type" => value_type)
|
365
|
-
ensure_column_indexes(name)
|
366
|
-
|
367
|
-
Column.new(name, value_type, vector_p)
|
368
|
-
end
|
305
|
+
nonexistent_columns = {}
|
306
|
+
records.each do |record|
|
307
|
+
record.each do |key, value|
|
308
|
+
next if pseudo_column_name?(key)
|
309
|
+
column = @columns[key]
|
310
|
+
if column.nil?
|
311
|
+
nonexistent_columns[key] ||= []
|
312
|
+
nonexistent_columns[key] << value
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
369
316
|
|
370
|
-
|
371
|
-
|
372
|
-
|
317
|
+
nonexistent_columns.each do |name, values|
|
318
|
+
@columns[name] = create_column(name, values)
|
319
|
+
end
|
373
320
|
end
|
374
|
-
return if mapping.nil?
|
375
321
|
|
376
|
-
|
377
|
-
|
378
|
-
if
|
379
|
-
|
380
|
-
|
381
|
-
|
322
|
+
private
|
323
|
+
def ensure_table
|
324
|
+
return if @target_table
|
325
|
+
|
326
|
+
@tables = {}
|
327
|
+
@client.execute("table_list").collect do |table|
|
328
|
+
name = table.name
|
329
|
+
options = {
|
330
|
+
:default_tokenizer => table.default_tokenizer,
|
331
|
+
}
|
332
|
+
@tables[name] = Table.new(table.name, options)
|
382
333
|
end
|
383
334
|
|
384
|
-
|
385
|
-
|
386
|
-
|
335
|
+
@target_table = @tables[@table_name]
|
336
|
+
unless @target_table
|
337
|
+
@client.execute("table_create",
|
338
|
+
"name" => @table_name,
|
339
|
+
"flags" => "TABLE_NO_KEY")
|
340
|
+
@target_table = Table.new(@table_name)
|
341
|
+
@tables[@table_name] = @target_table
|
342
|
+
end
|
343
|
+
end
|
387
344
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
345
|
+
def ensure_columns
|
346
|
+
return if @columns
|
347
|
+
|
348
|
+
column_list = @client.execute("column_list", "table" => @table_name)
|
349
|
+
@columns = {}
|
350
|
+
column_list.each do |column|
|
351
|
+
name = column.name
|
352
|
+
vector_p = column.flags.split("|").include?("COLUMN_VECTOR")
|
353
|
+
@columns[name] = Column.new(name, column.range, vector_p)
|
354
|
+
ensure_column_indexes(name)
|
355
|
+
end
|
394
356
|
end
|
395
|
-
end
|
396
357
|
|
397
|
-
|
398
|
-
|
399
|
-
@sample_values = sample_values
|
358
|
+
def pseudo_column_name?(name)
|
359
|
+
name.start_with?("_")
|
400
360
|
end
|
401
361
|
|
402
|
-
def
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
362
|
+
def create_column(name, sample_values)
|
363
|
+
mapping = @mappings.find do |mapping|
|
364
|
+
mapping.name == name
|
365
|
+
end
|
366
|
+
if mapping
|
367
|
+
value_type = mapping[:type]
|
368
|
+
end
|
369
|
+
guesser = TypeGuesser.new(sample_values)
|
370
|
+
value_type ||= guesser.guess
|
371
|
+
vector_p = guesser.vector?
|
372
|
+
if vector_p
|
373
|
+
flags = "COLUMN_VECTOR"
|
374
|
+
else
|
375
|
+
flags = "COLUMN_SCALAR"
|
376
|
+
end
|
377
|
+
@client.execute("column_create",
|
378
|
+
"table" => @table_name,
|
379
|
+
"name" => name,
|
380
|
+
"flags" => flags,
|
381
|
+
"type" => value_type)
|
382
|
+
ensure_column_indexes(name)
|
411
383
|
|
412
|
-
|
384
|
+
Column.new(name, value_type, vector_p)
|
413
385
|
end
|
414
386
|
|
415
|
-
def
|
416
|
-
@
|
417
|
-
|
387
|
+
def ensure_column_indexes(name)
|
388
|
+
mapping = @mappings.find do |_mapping|
|
389
|
+
_mapping.name == name
|
390
|
+
end
|
391
|
+
return if mapping.nil?
|
392
|
+
|
393
|
+
mapping.indexes.each do |index|
|
394
|
+
table = @tables[index[:table]]
|
395
|
+
if table
|
396
|
+
column_list = @client.execute("column_list", "table" => table.name)
|
397
|
+
exist = column_list.any? {|column| column.name == index[:name]}
|
398
|
+
next if exist
|
399
|
+
end
|
400
|
+
|
401
|
+
index_flags = ["COLUMN_INDEX"]
|
402
|
+
index_flags << "WITH_POSITION" if table and table.default_tokenizer
|
403
|
+
index_flags << index[:flags] if index[:flags]
|
404
|
+
|
405
|
+
@client.execute("column_create",
|
406
|
+
"table" => index[:table],
|
407
|
+
"name" => index[:name],
|
408
|
+
"flags" => index_flags.join("|"),
|
409
|
+
"type" => @table_name,
|
410
|
+
"source" => name)
|
418
411
|
end
|
419
412
|
end
|
420
413
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
414
|
+
class TypeGuesser
|
415
|
+
def initialize(sample_values)
|
416
|
+
@sample_values = sample_values
|
417
|
+
end
|
418
|
+
|
419
|
+
def guess
|
420
|
+
return "Bool" if bool_values?
|
421
|
+
return "Time" if time_values?
|
422
|
+
return "Int32" if int32_values?
|
423
|
+
return "Int64" if int64_values?
|
424
|
+
return "Float" if float_values?
|
425
|
+
return "WGS84GeoPoint" if geo_point_values?
|
426
|
+
return "LongText" if long_text_values?
|
427
|
+
return "Text" if text_values?
|
428
|
+
|
429
|
+
"ShortText"
|
430
|
+
end
|
431
|
+
|
432
|
+
def vector?
|
433
|
+
@sample_values.any? do |sample_value|
|
434
|
+
sample_value.is_a?(Array)
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
private
|
439
|
+
def integer_value?(value)
|
440
|
+
case value
|
441
|
+
when String
|
442
|
+
begin
|
443
|
+
Integer(value)
|
444
|
+
true
|
445
|
+
rescue ArgumentError
|
446
|
+
false
|
447
|
+
end
|
448
|
+
when Integer
|
427
449
|
true
|
428
|
-
|
450
|
+
else
|
429
451
|
false
|
430
452
|
end
|
431
|
-
when Integer
|
432
|
-
true
|
433
|
-
else
|
434
|
-
false
|
435
453
|
end
|
436
|
-
end
|
437
454
|
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
455
|
+
BOOL_VALUES = [
|
456
|
+
true,
|
457
|
+
false,
|
458
|
+
"true",
|
459
|
+
"false",
|
460
|
+
]
|
461
|
+
def bool_values?
|
462
|
+
@sample_values.all? do |sample_value|
|
463
|
+
BOOL_VALUES.include?(sample_value)
|
464
|
+
end
|
447
465
|
end
|
448
|
-
end
|
449
466
|
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
467
|
+
def time_values?
|
468
|
+
now = Time.now.to_i
|
469
|
+
year_in_seconds = 365 * 24 * 60 * 60
|
470
|
+
window = 10 * year_in_seconds
|
471
|
+
new = now + window
|
472
|
+
old = now - window
|
473
|
+
recent_range = old..new
|
474
|
+
@sample_values.all? do |sample_value|
|
475
|
+
integer_value?(sample_value) and
|
476
|
+
recent_range.cover?(Integer(sample_value))
|
477
|
+
end
|
460
478
|
end
|
461
|
-
end
|
462
479
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
480
|
+
def int32_values?
|
481
|
+
int32_min = -(2 ** 31)
|
482
|
+
int32_max = 2 ** 31 - 1
|
483
|
+
range = int32_min..int32_max
|
484
|
+
@sample_values.all? do |sample_value|
|
485
|
+
integer_value?(sample_value) and
|
486
|
+
range.cover?(Integer(sample_value))
|
487
|
+
end
|
470
488
|
end
|
471
|
-
end
|
472
489
|
|
473
|
-
|
474
|
-
|
475
|
-
|
490
|
+
def int64_values?
|
491
|
+
@sample_values.all? do |sample_value|
|
492
|
+
integer_value?(sample_value)
|
493
|
+
end
|
476
494
|
end
|
477
|
-
end
|
478
495
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
496
|
+
def float_value?(value)
|
497
|
+
case value
|
498
|
+
when String
|
499
|
+
begin
|
500
|
+
Float(value)
|
501
|
+
true
|
502
|
+
rescue ArgumentError
|
503
|
+
false
|
504
|
+
end
|
505
|
+
when Float
|
484
506
|
true
|
485
|
-
|
507
|
+
else
|
486
508
|
false
|
487
509
|
end
|
488
|
-
when Float
|
489
|
-
true
|
490
|
-
else
|
491
|
-
false
|
492
510
|
end
|
493
|
-
end
|
494
511
|
|
495
|
-
|
496
|
-
|
497
|
-
|
512
|
+
def float_values?
|
513
|
+
@sample_values.all? do |sample_value|
|
514
|
+
float_value?(sample_value)
|
515
|
+
end
|
498
516
|
end
|
499
|
-
end
|
500
517
|
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
518
|
+
def geo_point_values?
|
519
|
+
@sample_values.all? do |sample_value|
|
520
|
+
sample_value.is_a?(String) and
|
521
|
+
/\A-?\d+(?:\.\d+)[,x]-?\d+(?:\.\d+)\z/ =~ sample_value
|
522
|
+
end
|
505
523
|
end
|
506
|
-
end
|
507
524
|
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
525
|
+
MAX_SHORT_TEXT_SIZE = 2 ** 12
|
526
|
+
MAX_TEXT_SIZE = 2 ** 16
|
527
|
+
def text_values?
|
528
|
+
@sample_values.any? do |sample_value|
|
529
|
+
sample_value.is_a?(String) and
|
530
|
+
sample_value.bytesize > MAX_SHORT_TEXT_SIZE
|
531
|
+
end
|
514
532
|
end
|
515
|
-
end
|
516
533
|
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
534
|
+
def long_text_values?
|
535
|
+
@sample_values.any? do |sample_value|
|
536
|
+
sample_value.is_a?(String) and
|
537
|
+
sample_value.bytesize > MAX_TEXT_SIZE
|
538
|
+
end
|
521
539
|
end
|
522
540
|
end
|
523
|
-
end
|
524
541
|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
542
|
+
class Table
|
543
|
+
attr_reader :name
|
544
|
+
attr_reader :flags
|
545
|
+
attr_reader :domain
|
546
|
+
attr_reader :range
|
547
|
+
attr_reader :default_tokenizer
|
548
|
+
attr_reader :normalizer
|
549
|
+
attr_reader :token_filters
|
550
|
+
def initialize(name, options={})
|
551
|
+
@name = name
|
552
|
+
@flags = options[:flags]
|
553
|
+
@domain = options[:domain]
|
554
|
+
@range = options[:range]
|
555
|
+
@default_tokenizer = options[:default_tokenizer]
|
556
|
+
@normalizer = options[:normalizer]
|
557
|
+
@token_filters = options[:token_filters]
|
558
|
+
end
|
541
559
|
end
|
542
|
-
end
|
543
560
|
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
561
|
+
class Column
|
562
|
+
def initialize(name, value_type, vector_p)
|
563
|
+
@name = name
|
564
|
+
@value_type = value_type
|
565
|
+
@vector_p = vector_p
|
566
|
+
end
|
549
567
|
end
|
550
568
|
end
|
551
|
-
end
|
552
569
|
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
570
|
+
class Emitter
|
571
|
+
def initialize(client, table, schema)
|
572
|
+
@client = client
|
573
|
+
@table = table
|
574
|
+
@schema = schema
|
575
|
+
end
|
559
576
|
|
560
|
-
|
561
|
-
|
577
|
+
def start
|
578
|
+
end
|
562
579
|
|
563
|
-
|
564
|
-
|
580
|
+
def shutdown
|
581
|
+
end
|
565
582
|
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
583
|
+
def emit(chunk)
|
584
|
+
records = []
|
585
|
+
chunk.msgpack_each do |message|
|
586
|
+
tag, _, record = message
|
587
|
+
if /\Agroonga\.command\./ =~ tag
|
588
|
+
name = $POSTMATCH
|
589
|
+
unless records.empty?
|
590
|
+
store_records(records)
|
591
|
+
records.clear
|
592
|
+
end
|
593
|
+
@client.execute(name, record)
|
594
|
+
else
|
595
|
+
records << record
|
576
596
|
end
|
577
|
-
@client.execute(name, record)
|
578
|
-
when "groonga.command"
|
579
|
-
name = record["name"]
|
580
|
-
arguments = record["arguments"]
|
581
|
-
@client.execute(name, arguments)
|
582
|
-
else
|
583
|
-
records << record
|
584
597
|
end
|
598
|
+
store_records(records) unless records.empty?
|
585
599
|
end
|
586
|
-
store_records(records) unless records.empty?
|
587
|
-
end
|
588
600
|
|
589
|
-
|
590
|
-
|
591
|
-
|
601
|
+
private
|
602
|
+
def store_records(records)
|
603
|
+
return if @table.nil?
|
592
604
|
|
593
|
-
|
605
|
+
@schema.update(records)
|
594
606
|
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
607
|
+
arguments = {
|
608
|
+
"table" => @table,
|
609
|
+
"values" => Yajl::Encoder.encode(records),
|
610
|
+
}
|
611
|
+
@client.execute("load", arguments)
|
612
|
+
end
|
600
613
|
end
|
601
|
-
end
|
602
614
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
615
|
+
class BaseClient
|
616
|
+
private
|
617
|
+
def build_command(name, arguments={})
|
618
|
+
command_class = Groonga::Command.find(name)
|
619
|
+
command_class.new(name, arguments)
|
620
|
+
end
|
608
621
|
end
|
609
|
-
end
|
610
622
|
|
611
|
-
|
612
|
-
|
623
|
+
class NetworkClient < BaseClient
|
624
|
+
include Configurable
|
613
625
|
|
614
|
-
|
615
|
-
|
626
|
+
config_param :host, :string, :default => nil
|
627
|
+
config_param :port, :integer, :default => nil
|
616
628
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
629
|
+
def initialize(protocol, output_plugin)
|
630
|
+
super()
|
631
|
+
@protocol = protocol
|
632
|
+
@output_plugin = output_plugin
|
633
|
+
end
|
621
634
|
|
622
|
-
|
623
|
-
|
624
|
-
|
635
|
+
def start
|
636
|
+
@client = nil
|
637
|
+
end
|
625
638
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
639
|
+
def shutdown
|
640
|
+
return if @client.nil?
|
641
|
+
@client.close
|
642
|
+
end
|
630
643
|
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
644
|
+
def execute(name, arguments={})
|
645
|
+
command = build_command(name, arguments)
|
646
|
+
@client ||= Groonga::Client.new(:protocol => @protocol,
|
647
|
+
:host => @host,
|
648
|
+
:port => @port,
|
649
|
+
:backend => :synchronous)
|
650
|
+
response = nil
|
651
|
+
begin
|
652
|
+
response = @client.execute(command)
|
653
|
+
rescue Groonga::Client::Error
|
654
|
+
@output_plugin.log.error("[output][groonga][error]",
|
655
|
+
:protocol => @protocol,
|
656
|
+
:host => @host,
|
657
|
+
:port => @port,
|
658
|
+
:command_name => name)
|
659
|
+
raise
|
660
|
+
end
|
661
|
+
unless response.success?
|
662
|
+
@output_plugin.log.error("[output][groonga][error]",
|
663
|
+
:status_code => response.status_code,
|
664
|
+
:message => response.message)
|
665
|
+
end
|
666
|
+
response
|
667
|
+
end
|
654
668
|
end
|
655
|
-
end
|
656
|
-
|
657
|
-
class CommandClient < BaseClient
|
658
|
-
include Configurable
|
659
669
|
|
660
|
-
|
661
|
-
|
662
|
-
config_param :arguments, :default => [] do |value|
|
663
|
-
Shellwords.split(value)
|
664
|
-
end
|
670
|
+
class CommandClient < BaseClient
|
671
|
+
include Configurable
|
665
672
|
|
666
|
-
|
667
|
-
|
668
|
-
|
673
|
+
config_param :groonga, :string, :default => "groonga"
|
674
|
+
config_param :database, :string
|
675
|
+
config_param :arguments, :default => [] do |value|
|
676
|
+
Shellwords.split(value)
|
677
|
+
end
|
669
678
|
|
670
|
-
|
671
|
-
|
672
|
-
|
679
|
+
def initialize(output_plugin)
|
680
|
+
super()
|
681
|
+
@output_plugin = output_plugin
|
682
|
+
end
|
673
683
|
|
674
|
-
|
675
|
-
|
676
|
-
|
684
|
+
def configure(conf)
|
685
|
+
super
|
686
|
+
end
|
677
687
|
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
@output.close
|
682
|
-
@error.close
|
683
|
-
Process.waitpid(@pid)
|
684
|
-
end
|
688
|
+
def start
|
689
|
+
run_groonga
|
690
|
+
end
|
685
691
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
692
|
+
def shutdown
|
693
|
+
@input.close
|
694
|
+
read_output("shutdown")
|
695
|
+
@output.close
|
696
|
+
@error.close
|
697
|
+
Process.waitpid(@pid)
|
691
698
|
end
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
body
|
696
|
-
|
699
|
+
|
700
|
+
def execute(name, arguments={})
|
701
|
+
command = build_command(name, arguments)
|
702
|
+
body = nil
|
703
|
+
if command.name == "load"
|
704
|
+
body = command.arguments.delete(:values)
|
697
705
|
end
|
706
|
+
uri = command.to_uri_format
|
707
|
+
@input.write("#{uri}\n")
|
708
|
+
if body
|
709
|
+
body.each_line do |line|
|
710
|
+
@input.write("#{line}\n")
|
711
|
+
end
|
712
|
+
end
|
713
|
+
@input.flush
|
714
|
+
read_output(uri)
|
698
715
|
end
|
699
|
-
@input.flush
|
700
|
-
read_output(uri)
|
701
|
-
end
|
702
716
|
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
717
|
+
private
|
718
|
+
def run_groonga
|
719
|
+
env = {}
|
720
|
+
input = IO.pipe("ASCII-8BIT")
|
721
|
+
output = IO.pipe("ASCII-8BIT")
|
722
|
+
error = IO.pipe("ASCII-8BIT")
|
723
|
+
input_fd = input[0].to_i
|
724
|
+
output_fd = output[1].to_i
|
725
|
+
options = {
|
726
|
+
input_fd => input_fd,
|
727
|
+
output_fd => output_fd,
|
728
|
+
:err => error[1],
|
729
|
+
}
|
730
|
+
arguments = @arguments
|
731
|
+
arguments += [
|
732
|
+
"--input-fd", input_fd.to_s,
|
733
|
+
"--output-fd", output_fd.to_s,
|
734
|
+
]
|
735
|
+
unless File.exist?(@database)
|
736
|
+
FileUtils.mkdir_p(File.dirname(@database))
|
737
|
+
arguments << "-n"
|
738
|
+
end
|
739
|
+
arguments << @database
|
740
|
+
@pid = spawn(env, @groonga, *arguments, options)
|
741
|
+
input[0].close
|
742
|
+
@input = input[1]
|
743
|
+
output[1].close
|
744
|
+
@output = output[0]
|
745
|
+
error[1].close
|
746
|
+
@error = error[0]
|
747
|
+
end
|
748
|
+
|
749
|
+
def read_output(context)
|
750
|
+
output_message = ""
|
751
|
+
error_message = ""
|
752
|
+
|
753
|
+
loop do
|
754
|
+
readables = IO.select([@output, @error], nil, nil, 0)
|
755
|
+
break if readables.nil?
|
756
|
+
|
757
|
+
readables.each do |readable|
|
758
|
+
case readable
|
759
|
+
when @output
|
760
|
+
output_message << @output.gets
|
761
|
+
when @error
|
762
|
+
error_message << @error.gets
|
763
|
+
end
|
749
764
|
end
|
750
765
|
end
|
751
|
-
end
|
752
766
|
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
767
|
+
unless output_message.empty?
|
768
|
+
@output_plugin.log.debug("[output][groonga][output]",
|
769
|
+
:context => context,
|
770
|
+
:message => output_message)
|
771
|
+
end
|
772
|
+
unless error_message.empty?
|
773
|
+
@output_plugin.log.error("[output][groonga][error]",
|
774
|
+
:context => context,
|
775
|
+
:message => error_message)
|
776
|
+
end
|
762
777
|
end
|
763
778
|
end
|
764
779
|
end
|