rroonga 1.0.8 → 1.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/NEWS.ja.rdoc +47 -0
- data/NEWS.rdoc +48 -0
- data/README.ja.rdoc +1 -0
- data/README.rdoc +1 -0
- data/Rakefile +32 -13
- data/benchmark/create-wikipedia-database.rb +212 -0
- data/benchmark/repeat-load.rb +213 -0
- data/benchmark/select.rb +1052 -0
- data/ext/groonga/mkmf.log +99 -0
- data/ext/groonga/rb-grn-column.c +57 -6
- data/ext/groonga/rb-grn-context.c +15 -9
- data/ext/groonga/rb-grn-expression.c +7 -7
- data/ext/groonga/{rb-grn-operation.c → rb-grn-operator.c} +89 -87
- data/ext/groonga/rb-grn-patricia-trie.c +5 -5
- data/ext/groonga/rb-grn-query.c +4 -4
- data/ext/groonga/rb-grn-table.c +16 -19
- data/ext/groonga/rb-grn.h +3 -3
- data/ext/groonga/rb-groonga.c +1 -1
- data/html/index.html +4 -4
- data/lib/groonga/context.rb +34 -0
- data/lib/groonga/expression-builder.rb +34 -2
- data/lib/groonga/record.rb +8 -6
- data/lib/groonga/schema.rb +40 -4
- data/rroonga-build.rb +2 -2
- data/test-unit/Rakefile +5 -18
- data/test-unit/html/classic.html +15 -0
- data/test-unit/html/index.html +13 -235
- data/test-unit/html/index.html.ja +15 -258
- data/test-unit/lib/test/unit.rb +1 -6
- data/test-unit/lib/test/unit/assertions.rb +11 -115
- data/test-unit/lib/test/unit/autorunner.rb +2 -5
- data/test-unit/lib/test/unit/collector/load.rb +1 -1
- data/test-unit/lib/test/unit/color-scheme.rb +2 -6
- data/test-unit/lib/test/unit/diff.rb +1 -17
- data/test-unit/lib/test/unit/testcase.rb +0 -7
- data/test-unit/lib/test/unit/testresult.rb +2 -34
- data/test-unit/lib/test/unit/ui/console/testrunner.rb +45 -9
- data/test-unit/lib/test/unit/ui/tap/testrunner.rb +12 -2
- data/test-unit/lib/test/unit/ui/testrunner.rb +0 -25
- data/test-unit/lib/test/unit/util/backtracefilter.rb +0 -1
- data/test-unit/lib/test/unit/version.rb +1 -1
- data/test-unit/test/test-color-scheme.rb +2 -4
- data/test-unit/test/test_assertions.rb +5 -51
- data/test/test-column.rb +31 -1
- data/test/test-context-select.rb +45 -14
- data/test/test-context.rb +36 -0
- data/test/test-database.rb +13 -0
- data/test/test-expression-builder.rb +32 -5
- data/test/test-record.rb +34 -1
- data/test/test-schema.rb +52 -2
- data/test/test-table-select-weight.rb +20 -1
- data/test/test-table.rb +58 -0
- metadata +13 -41
- data/test-unit-notify/Rakefile +0 -47
- data/test-unit-notify/lib/test/unit/notify.rb +0 -104
- data/test-unit/COPYING +0 -56
- data/test-unit/GPL +0 -340
- data/test-unit/PSFL +0 -271
- data/test-unit/html/bar.svg +0 -153
- data/test-unit/html/developer.svg +0 -469
- data/test-unit/html/favicon.ico +0 -0
- data/test-unit/html/favicon.svg +0 -82
- data/test-unit/html/heading-mark.svg +0 -393
- data/test-unit/html/install.svg +0 -636
- data/test-unit/html/logo.svg +0 -483
- data/test-unit/html/test-unit.css +0 -339
- data/test-unit/html/tutorial.svg +0 -559
- data/test-unit/lib/test/unit/util/output.rb +0 -31
- data/test-unit/test/ui/test_tap.rb +0 -33
- data/test-unit/test/util/test-output.rb +0 -11
data/benchmark/select.rb
ADDED
@@ -0,0 +1,1052 @@
|
|
1
|
+
#encoding: UTF-8
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'shellwords'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
require 'groonga'
|
8
|
+
|
9
|
+
Groonga::Logger.query_log_path = "/tmp/query.log"
|
10
|
+
|
11
|
+
module ColumnTokenizer
|
12
|
+
def tokenize_column_list(column_list)
|
13
|
+
tokens = column_list.split(/[\s,]/)
|
14
|
+
tokens.reject!(&:empty?)
|
15
|
+
tokens.select! do |token|
|
16
|
+
token == "*" || token =~ /[A-Za-z0-9_]/
|
17
|
+
end
|
18
|
+
tokens.each do |token|
|
19
|
+
unless token == "*"
|
20
|
+
token.sub!(/[^A-Za-z0-9_]\z/, '')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Query
|
27
|
+
attr_reader :options
|
28
|
+
attr_accessor :original_log_entry
|
29
|
+
def initialize(options)
|
30
|
+
@options = options
|
31
|
+
end
|
32
|
+
|
33
|
+
def table_name
|
34
|
+
@options[:table]
|
35
|
+
end
|
36
|
+
|
37
|
+
def match_columns
|
38
|
+
#raise "unsupported: #{@options[:match_columns].inspect}" if @options[:match_columns] =~ /\b/ # XXX
|
39
|
+
|
40
|
+
@options[:match_columns]
|
41
|
+
end
|
42
|
+
|
43
|
+
def filter
|
44
|
+
if match_columns and @options[:query]
|
45
|
+
#raise "unsupported" if @options[:filter]
|
46
|
+
@options[:query]
|
47
|
+
else
|
48
|
+
@options[:filter]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def limit
|
53
|
+
if @options[:limit]
|
54
|
+
@options[:limit].to_i
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def offset
|
61
|
+
if @options[:offset]
|
62
|
+
@options[:offset].to_i
|
63
|
+
else
|
64
|
+
nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def sort_by
|
69
|
+
if @options[:sortby]
|
70
|
+
@options[:sortby]
|
71
|
+
else
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def output_columns
|
77
|
+
if @options[:output_columns]
|
78
|
+
@options[:output_columns]
|
79
|
+
else
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def drilldown_columns
|
85
|
+
if @options[:drilldown]
|
86
|
+
@options[:drilldown]
|
87
|
+
else
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def drilldown_limit
|
93
|
+
if @options[:drilldown_limit]
|
94
|
+
@options[:drilldown_limit].to_i
|
95
|
+
else
|
96
|
+
nil
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def drilldown_offset
|
101
|
+
if @options[:drilldown_offset]
|
102
|
+
@options[:drilldown_offset].to_i
|
103
|
+
else
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def drilldown_sort_by
|
109
|
+
if @options[:drilldown_sortby]
|
110
|
+
@options[:drilldown_sortby]
|
111
|
+
else
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def drilldown_output_columns
|
117
|
+
if @options[:drilldown_output_columns]
|
118
|
+
@options[:drilldown_output_columns]
|
119
|
+
else
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def parameters
|
125
|
+
@options.dup.tap do |options|
|
126
|
+
options.delete(:table)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
class GroongaLogParser
|
131
|
+
def initialize(log)
|
132
|
+
@log = log
|
133
|
+
@tokens = []
|
134
|
+
@parameter_list = []
|
135
|
+
@parameters = {}
|
136
|
+
end
|
137
|
+
|
138
|
+
def parse
|
139
|
+
tokenize
|
140
|
+
build_parameter_list
|
141
|
+
build_parameters
|
142
|
+
create_query
|
143
|
+
end
|
144
|
+
|
145
|
+
class << self
|
146
|
+
def parse(log)
|
147
|
+
new(log).parse
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
def next_token(token)
|
153
|
+
@tokens << token
|
154
|
+
""
|
155
|
+
end
|
156
|
+
|
157
|
+
def tokenize
|
158
|
+
escape = nil
|
159
|
+
litaral = nil
|
160
|
+
token = ""
|
161
|
+
|
162
|
+
@tokens = Shellwords.split(@log)
|
163
|
+
@tokens = @tokens.reject(&:empty?)
|
164
|
+
end
|
165
|
+
|
166
|
+
IMPLICIT_PARAMETER_ORDER = [
|
167
|
+
:table,
|
168
|
+
:match_columns,
|
169
|
+
:query,
|
170
|
+
:filter,
|
171
|
+
:scorer,
|
172
|
+
:sortby,
|
173
|
+
:output_columns,
|
174
|
+
:offset,
|
175
|
+
:limit,
|
176
|
+
:drilldown,
|
177
|
+
:drilldown_sortby,
|
178
|
+
:drilldwon_output_columns,
|
179
|
+
:drilldown_offset,
|
180
|
+
:drilldown_limit,
|
181
|
+
:cache,
|
182
|
+
:match_escalation_threshold,
|
183
|
+
]
|
184
|
+
|
185
|
+
NAMED_PARAMETER_PREFIX = /\A--/
|
186
|
+
|
187
|
+
def build_parameter_list
|
188
|
+
command, parameter_tokens = @tokens.shift, @tokens
|
189
|
+
raise "command is not \"select\": #{command.inspect}" unless command == "select"
|
190
|
+
|
191
|
+
parameter_name = nil
|
192
|
+
parameter_tokens.each do |token|
|
193
|
+
if token =~ NAMED_PARAMETER_PREFIX
|
194
|
+
raise "bad" unless parameter_name.nil?
|
195
|
+
parameter_name = token
|
196
|
+
elsif parameter_name
|
197
|
+
@parameter_list << [parameter_name, token]
|
198
|
+
parameter_name = nil
|
199
|
+
else
|
200
|
+
@parameter_list << token
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def build_parameters
|
206
|
+
index = 0
|
207
|
+
@parameter_list.each do |parameter|
|
208
|
+
case parameter
|
209
|
+
when Array
|
210
|
+
name, value = parameter
|
211
|
+
@parameters[to_parameter_symbol(name)] = value
|
212
|
+
else
|
213
|
+
@parameters[IMPLICIT_PARAMETER_ORDER[index]] = parameter
|
214
|
+
index += 1
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def to_parameter_symbol(name)
|
220
|
+
name.sub(NAMED_PARAMETER_PREFIX, '').to_sym
|
221
|
+
end
|
222
|
+
|
223
|
+
def create_query
|
224
|
+
query = Query.new(@parameters)
|
225
|
+
query.original_log_entry = @log
|
226
|
+
query
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
class << self
|
231
|
+
def parse_groonga_query_log(log)
|
232
|
+
GroongaLogParser.parse(log)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
class Configuration
|
238
|
+
attr_accessor :database_path
|
239
|
+
end
|
240
|
+
|
241
|
+
class Selector
|
242
|
+
attr_reader :context, :database_path
|
243
|
+
def initialize(context, database_path)
|
244
|
+
@context = context
|
245
|
+
@database_path = database_path
|
246
|
+
@database = @context.open_database(@database_path)
|
247
|
+
end
|
248
|
+
|
249
|
+
def select(query)
|
250
|
+
raise "implement"
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
class SelectorByCommand < Selector
|
255
|
+
def select(query)
|
256
|
+
parameters = query.parameters.merge(:cache => :no)
|
257
|
+
parameters[:sortby] ||= :_id
|
258
|
+
parameters[:drilldown_sortby] ||= :_key
|
259
|
+
result = @context.select(query.table_name, parameters)
|
260
|
+
CommandResult.new(result)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
class SelectorByMethod < Selector
|
265
|
+
include ColumnTokenizer
|
266
|
+
|
267
|
+
def select(query)
|
268
|
+
table = @context[query.table_name]
|
269
|
+
filter = query.filter
|
270
|
+
if query.match_columns
|
271
|
+
default_column = table.column(query.match_columns)
|
272
|
+
end
|
273
|
+
|
274
|
+
result = do_select(filter, table, default_column)
|
275
|
+
sorted_result = sort(query, result)
|
276
|
+
formatted_result = format(query, sorted_result || result)
|
277
|
+
drilldown_results = drilldown(query, result)
|
278
|
+
|
279
|
+
MethodResult.new(result, sorted_result, formatted_result, drilldown_results)
|
280
|
+
end
|
281
|
+
|
282
|
+
def do_select(filter, table, default_column)
|
283
|
+
if filter
|
284
|
+
options = {
|
285
|
+
:syntax => :script
|
286
|
+
}
|
287
|
+
if default_column
|
288
|
+
options[:default_column] = default_column
|
289
|
+
options[:syntax] = :query
|
290
|
+
end
|
291
|
+
|
292
|
+
table.select(filter, options)
|
293
|
+
else
|
294
|
+
table.select
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
DEFAULT_LIMIT = 10
|
299
|
+
DEFAULT_DRILLDOWN_LIMIT = DEFAULT_LIMIT
|
300
|
+
|
301
|
+
def sort(query, result)
|
302
|
+
if needs_sort?(query)
|
303
|
+
sort_key = sort_key(query.sort_by)
|
304
|
+
limit = query.limit || DEFAULT_LIMIT
|
305
|
+
offset = query.offset
|
306
|
+
|
307
|
+
window_options = create_window_options(limit, offset)
|
308
|
+
sorted_result = result.sort(sort_key, window_options).collect do |record|
|
309
|
+
record.key
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def drilldown_sort(query, result)
|
315
|
+
sort_key = sort_key(query.drilldown_sort_by || "_key")
|
316
|
+
limit = query.drilldown_limit || DEFAULT_DRILLDOWN_LIMIT
|
317
|
+
offset = query.drilldown_offset
|
318
|
+
|
319
|
+
window_options = create_window_options(limit, offset)
|
320
|
+
|
321
|
+
sorted_result = result.sort(sort_key, window_options).collect do |record|
|
322
|
+
record
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
DEFAULT_OUTPUT_COLUMNS = "_id, _key, *"
|
327
|
+
DEFAULT_DRILLDOWN_OUTPUT_COLUMNS = "_key, _nsubrecs"
|
328
|
+
|
329
|
+
def format(query, result)
|
330
|
+
columns = query.output_columns || DEFAULT_OUTPUT_COLUMNS
|
331
|
+
format_result(result, columns)
|
332
|
+
end
|
333
|
+
|
334
|
+
def drilldown_format(query, result)
|
335
|
+
columns = query.drilldown_output_columns || DEFAULT_DRILLDOWN_OUTPUT_COLUMNS
|
336
|
+
format_result(result, columns)
|
337
|
+
end
|
338
|
+
|
339
|
+
def drilldown(query, result)
|
340
|
+
if needs_drilldown?(query)
|
341
|
+
drilldown_results = drilldown_result(result, query.drilldown_columns, query)
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
def drilldown_result(result, drilldown_columns, query)
|
346
|
+
columns = tokenize_column_list(drilldown_columns).uniq
|
347
|
+
columns.collect do |column|
|
348
|
+
drilldown_result = do_group(result, column)
|
349
|
+
sorted_drilldown_result = drilldown_sort(query, drilldown_result)
|
350
|
+
formatted_drilldown_result = drilldown_format(query, sorted_drilldown_result || drilldown_result)
|
351
|
+
|
352
|
+
{
|
353
|
+
:column => column,
|
354
|
+
:result => drilldown_result,
|
355
|
+
:sort => sorted_drilldown_result,
|
356
|
+
:format => formatted_drilldown_result,
|
357
|
+
}
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def do_group(result, column)
|
362
|
+
result.group(column)
|
363
|
+
end
|
364
|
+
|
365
|
+
def needs_sort?(query)
|
366
|
+
(query.limit.nil? or query.limit >= 0) or query.offset or query.sort_by
|
367
|
+
end
|
368
|
+
|
369
|
+
def needs_drilldown?(query)
|
370
|
+
query.drilldown_columns
|
371
|
+
end
|
372
|
+
|
373
|
+
DESCENDING_ORDER_PREFIX = /\A-/
|
374
|
+
def sort_key(sort_by)
|
375
|
+
if sort_by
|
376
|
+
build_sort_key(sort_by)
|
377
|
+
else
|
378
|
+
default_sort_key
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def build_sort_key(sort_by)
|
383
|
+
tokens = tokenize_column_list(sort_by)
|
384
|
+
|
385
|
+
tokens.collect do |token|
|
386
|
+
key = token.sub(DESCENDING_ORDER_PREFIX, '')
|
387
|
+
if token =~ DESCENDING_ORDER_PREFIX
|
388
|
+
descending_order_sort_key(key)
|
389
|
+
else
|
390
|
+
ascending_order_sort_key(key)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
def descending_order_sort_key(key)
|
396
|
+
{
|
397
|
+
:key => key,
|
398
|
+
:order => "descending",
|
399
|
+
}
|
400
|
+
end
|
401
|
+
|
402
|
+
def ascending_order_sort_key(key)
|
403
|
+
{
|
404
|
+
:key => key,
|
405
|
+
:order => "ascending",
|
406
|
+
}
|
407
|
+
end
|
408
|
+
|
409
|
+
def default_sort_key #XX use #ascending_order_sort_key("_id")
|
410
|
+
[
|
411
|
+
{
|
412
|
+
:key => "_id",
|
413
|
+
:order => "ascending",
|
414
|
+
}
|
415
|
+
]
|
416
|
+
end
|
417
|
+
|
418
|
+
def create_window_options(limit, offset)
|
419
|
+
window_options = {}
|
420
|
+
if limit
|
421
|
+
window_options[:limit] = limit
|
422
|
+
end
|
423
|
+
if offset
|
424
|
+
window_options[:offset] = offset
|
425
|
+
end
|
426
|
+
window_options
|
427
|
+
end
|
428
|
+
|
429
|
+
def access_column(table, column)
|
430
|
+
columns = column.split(".")
|
431
|
+
columns.each do |name|
|
432
|
+
table = table.column(name).range
|
433
|
+
end
|
434
|
+
table
|
435
|
+
end
|
436
|
+
|
437
|
+
def format_result(result, output_columns)
|
438
|
+
if result.empty?
|
439
|
+
return []
|
440
|
+
end
|
441
|
+
|
442
|
+
column_tokens = tokenize_column_list(output_columns)
|
443
|
+
column_list = build_column_list(result, column_tokens)
|
444
|
+
|
445
|
+
result.collect do |record|
|
446
|
+
format_record(column_list, record)
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
def format_record(column_list, record)
|
451
|
+
column_list.collect do |column, access_column|
|
452
|
+
value = record[column]
|
453
|
+
to_json(value, access_column)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
def to_json(value, column)
|
458
|
+
case value
|
459
|
+
when ::Time
|
460
|
+
value.to_f
|
461
|
+
when nil
|
462
|
+
if column.name =~ /Int/
|
463
|
+
0
|
464
|
+
else
|
465
|
+
""
|
466
|
+
end
|
467
|
+
when Groonga::Record
|
468
|
+
value["_key"]
|
469
|
+
when Array
|
470
|
+
value.collect do |element|
|
471
|
+
to_json(element, value)
|
472
|
+
end
|
473
|
+
else
|
474
|
+
value
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
def column_included_in_record?(column, record)
|
479
|
+
if record.respond_to?(:have_column?)
|
480
|
+
record.have_column?(column)
|
481
|
+
else
|
482
|
+
record.include?(column)
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
def build_column_list(result, columns)
|
487
|
+
access_table = result.first.table
|
488
|
+
|
489
|
+
table = result.first.key
|
490
|
+
unless table.is_a?(Groonga::Table)
|
491
|
+
table = result.first.table
|
492
|
+
end
|
493
|
+
columns = columns.collect do |column|
|
494
|
+
if column == "*"
|
495
|
+
table.columns.collect(&:name).collect do |name|
|
496
|
+
name.sub(/\A[A-Za-z0-9_]+\./, '')
|
497
|
+
end
|
498
|
+
else
|
499
|
+
column if column_included_in_record?(column, result.first)
|
500
|
+
end
|
501
|
+
end.flatten.compact
|
502
|
+
|
503
|
+
columns.collect do |column|
|
504
|
+
[column, access_column(access_table, column)]
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
class Result
|
510
|
+
def ==(other) # XXX needs more strict/rigid check
|
511
|
+
results = [
|
512
|
+
hit_count == other.hit_count,
|
513
|
+
result_count == other.result_count,
|
514
|
+
formatted_result == other.formatted_result,
|
515
|
+
drilldown_results == other.drilldown_results,
|
516
|
+
]
|
517
|
+
|
518
|
+
results.all?
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
class CommandResult < Result
|
523
|
+
def initialize(result)
|
524
|
+
@result = result
|
525
|
+
end
|
526
|
+
|
527
|
+
def hit_count
|
528
|
+
@result.n_hits
|
529
|
+
end
|
530
|
+
|
531
|
+
def result_count
|
532
|
+
@result.records.size
|
533
|
+
end
|
534
|
+
|
535
|
+
def formatted_result
|
536
|
+
@result.values
|
537
|
+
end
|
538
|
+
|
539
|
+
def drilldown_results
|
540
|
+
@result.drill_down.values.collect(&:values)
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
class MethodResult < Result
|
545
|
+
def initialize(result, sorted_result, formatted_result, drilldown_results)
|
546
|
+
@result = result
|
547
|
+
@sorted_result = sorted_result
|
548
|
+
@formatted_result = formatted_result
|
549
|
+
@drilldown_results = drilldown_results
|
550
|
+
end
|
551
|
+
|
552
|
+
def hit_count
|
553
|
+
@result.size
|
554
|
+
end
|
555
|
+
|
556
|
+
def result_count
|
557
|
+
sorted_result.size
|
558
|
+
end
|
559
|
+
|
560
|
+
def formatted_result
|
561
|
+
@formatted_result
|
562
|
+
end
|
563
|
+
|
564
|
+
def drilldown_results
|
565
|
+
if @drilldown_results.nil?
|
566
|
+
[]
|
567
|
+
else
|
568
|
+
@drilldown_results.collect do |result|
|
569
|
+
result[:format]
|
570
|
+
end
|
571
|
+
end
|
572
|
+
end
|
573
|
+
|
574
|
+
private
|
575
|
+
def sorted_result
|
576
|
+
@sorted_result || @result
|
577
|
+
end
|
578
|
+
end
|
579
|
+
|
580
|
+
class BenchmarkResult
|
581
|
+
attr_accessor :result
|
582
|
+
attr_accessor :profile
|
583
|
+
attr_reader :benchmark_result
|
584
|
+
|
585
|
+
class Time < BenchmarkResult
|
586
|
+
def initialize(profile, target_object, query, &block)
|
587
|
+
@intercepted_method_times = {}
|
588
|
+
@profile = profile
|
589
|
+
@target_object = target_object
|
590
|
+
@query = query
|
591
|
+
each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
|
592
|
+
intercept_method(klass, method_name, depth)
|
593
|
+
end
|
594
|
+
|
595
|
+
measure_time(&block)
|
596
|
+
|
597
|
+
each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
|
598
|
+
reset_intercepted_method(klass, method_name, depth)
|
599
|
+
end
|
600
|
+
end
|
601
|
+
|
602
|
+
def lines
|
603
|
+
super + intercepted_method_lines
|
604
|
+
end
|
605
|
+
|
606
|
+
def padding(depth)
|
607
|
+
" " * (depth + 1)
|
608
|
+
end
|
609
|
+
|
610
|
+
def intercepted_method_lines
|
611
|
+
lines = []
|
612
|
+
@intercepted_method_times.each do |method_name, status|
|
613
|
+
depth = status[:depth]
|
614
|
+
count = status[:benchmark_results].size
|
615
|
+
results = status[:benchmark_results]
|
616
|
+
|
617
|
+
if count == 1
|
618
|
+
result = results.first
|
619
|
+
lines << single_line(method_name, result, depth)
|
620
|
+
elsif count == 0
|
621
|
+
# do nothing
|
622
|
+
else
|
623
|
+
total = results.inject do |result, _total|
|
624
|
+
result + _total
|
625
|
+
end
|
626
|
+
|
627
|
+
total_result = ["#{padding(depth)}#{method_name}", total]
|
628
|
+
lines << total_result
|
629
|
+
|
630
|
+
lines += multile_lines(method_name, results, depth + 1)
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
lines
|
635
|
+
end
|
636
|
+
|
637
|
+
def single_line(method_name, result, depth)
|
638
|
+
|
639
|
+
["#{padding(depth)}#{method_name}", result]
|
640
|
+
end
|
641
|
+
|
642
|
+
def multile_lines(method_name, results, depth)
|
643
|
+
index = 0
|
644
|
+
|
645
|
+
results.collect do |result|
|
646
|
+
index += 1
|
647
|
+
if @profile.respond_to?(:guess_invocation_label)
|
648
|
+
label = @profile.guess_invocation_label(@query, method_name, index)
|
649
|
+
end
|
650
|
+
label ||= index
|
651
|
+
|
652
|
+
["#{padding(depth)}#{label}", result]
|
653
|
+
end
|
654
|
+
end
|
655
|
+
|
656
|
+
def +(other)
|
657
|
+
intercepted_method_times = other.instance_variable_get(:@intercepted_method_times)
|
658
|
+
intercepted_method_times.each do |method_name, time|
|
659
|
+
time[:benchmark_results].each_with_index do |result, index|
|
660
|
+
@intercepted_method_times[method_name][:benchmark_results][index] += result
|
661
|
+
end
|
662
|
+
end
|
663
|
+
super(other)
|
664
|
+
end
|
665
|
+
|
666
|
+
private
|
667
|
+
def measure_time
|
668
|
+
@benchmark_result = Benchmark.measure do
|
669
|
+
@result = yield
|
670
|
+
end
|
671
|
+
end
|
672
|
+
|
673
|
+
def each_intercepted_methods(intercepted_methods, depth=0, &block)
|
674
|
+
intercepted_methods.each do |method|
|
675
|
+
case method
|
676
|
+
when Symbol
|
677
|
+
yield(@target_object.class, method, depth)
|
678
|
+
when Method
|
679
|
+
if method.receiver.is_a?(Class)
|
680
|
+
yield(method.owner, method.name, depth)
|
681
|
+
else
|
682
|
+
yield(method.receiver.class, method.name, depth)
|
683
|
+
end
|
684
|
+
when Array
|
685
|
+
each_intercepted_methods(method, depth + 1, &block)
|
686
|
+
else
|
687
|
+
raise "bad"
|
688
|
+
end
|
689
|
+
end
|
690
|
+
end
|
691
|
+
|
692
|
+
def intercept_method(klass, method_name, depth)
|
693
|
+
intercepted_method_times = @intercepted_method_times
|
694
|
+
original_method_name = original_method_name(method_name)
|
695
|
+
|
696
|
+
intercepted_method_times[method_name] = {}
|
697
|
+
intercepted_method_times[method_name][:benchmark_results] = []
|
698
|
+
intercepted_method_times[method_name][:depth] = depth
|
699
|
+
|
700
|
+
klass.class_exec do
|
701
|
+
alias_method original_method_name, method_name
|
702
|
+
define_method method_name do |*arguments, &block|
|
703
|
+
returned_object = nil
|
704
|
+
benchmark_result = Benchmark.measure do
|
705
|
+
returned_object = __send__(original_method_name, *arguments, &block)
|
706
|
+
end
|
707
|
+
intercepted_method_times[method_name][:benchmark_results] << benchmark_result
|
708
|
+
returned_object
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
def reset_intercepted_method(klass, method_name, depth)
|
714
|
+
original_method_name = original_method_name(method_name)
|
715
|
+
|
716
|
+
klass.class_exec do
|
717
|
+
alias_method method_name, original_method_name
|
718
|
+
end
|
719
|
+
end
|
720
|
+
|
721
|
+
def original_method_name(method_name)
|
722
|
+
:"__intercepted__#{method_name}"
|
723
|
+
end
|
724
|
+
end
|
725
|
+
|
726
|
+
def lines
|
727
|
+
[["#{name} (#{result.hit_count} hits)", @benchmark_result]]
|
728
|
+
end
|
729
|
+
|
730
|
+
def name
|
731
|
+
profile.name
|
732
|
+
end
|
733
|
+
|
734
|
+
def +(other)
|
735
|
+
@benchmark_result += other.benchmark_result
|
736
|
+
self
|
737
|
+
end
|
738
|
+
end
|
739
|
+
|
740
|
+
class Profile
|
741
|
+
include ColumnTokenizer
|
742
|
+
|
743
|
+
attr_accessor :mode
|
744
|
+
attr_reader :name, :intercepted_methods
|
745
|
+
def initialize(name, selector, intercepted_methods=[])
|
746
|
+
@name = name
|
747
|
+
@selector = selector
|
748
|
+
@intercepted_methods = intercepted_methods
|
749
|
+
end
|
750
|
+
|
751
|
+
def take_benchmark(query)
|
752
|
+
if mode == :measure_time
|
753
|
+
measure_time(query)
|
754
|
+
else
|
755
|
+
raise "bad"
|
756
|
+
end
|
757
|
+
end
|
758
|
+
|
759
|
+
def guess_invocation_label(query, method_name, index)
|
760
|
+
if method_name.to_s =~ /drilldown|do_group/
|
761
|
+
columns = tokenize_column_list(query.drilldown_columns).uniq
|
762
|
+
columns[index - 1]
|
763
|
+
else
|
764
|
+
raise "bad: #{method_name}"
|
765
|
+
end
|
766
|
+
end
|
767
|
+
|
768
|
+
private
|
769
|
+
def measure_time(query)
|
770
|
+
BenchmarkResult::Time.new(self, @selector, query) do
|
771
|
+
result = @selector.select(query)
|
772
|
+
result
|
773
|
+
end
|
774
|
+
end
|
775
|
+
end
|
776
|
+
|
777
|
+
class BenchmarkRunner
|
778
|
+
attr_accessor :context
|
779
|
+
DEFAULT_MODE = :measure_time # :mesure_memory, :mesure_io, :mesure_???
|
780
|
+
|
781
|
+
def initialize(options={})
|
782
|
+
@options = options
|
783
|
+
@profiles = []
|
784
|
+
@queries = []
|
785
|
+
end
|
786
|
+
|
787
|
+
def benchmark_mode
|
788
|
+
@options[:mode] || DEFAULT_MODE
|
789
|
+
end
|
790
|
+
|
791
|
+
def add_profile(profile)
|
792
|
+
profile.mode = benchmark_mode
|
793
|
+
@profiles << profile
|
794
|
+
end
|
795
|
+
|
796
|
+
def add_query(query, label=nil)
|
797
|
+
@queries << [query, label]
|
798
|
+
end
|
799
|
+
|
800
|
+
LOCK_TIMEOUT_SECONDS = 10
|
801
|
+
def lock
|
802
|
+
@context.database.lock(:timeout => LOCK_TIMEOUT_SECONDS * 1000) do
|
803
|
+
yield
|
804
|
+
end
|
805
|
+
end
|
806
|
+
|
807
|
+
def collect_benchmarks(query)
|
808
|
+
lock do
|
809
|
+
@profiles.collect do |profile|
|
810
|
+
profile.take_benchmark(query)
|
811
|
+
end
|
812
|
+
end
|
813
|
+
end
|
814
|
+
|
815
|
+
def debug_benchmarks(query, benchmarks)
|
816
|
+
if ENV["DEBUG"]
|
817
|
+
pp query
|
818
|
+
pp benchmarks
|
819
|
+
end
|
820
|
+
end
|
821
|
+
|
822
|
+
def run_once(query)
|
823
|
+
benchmarks = do_run_once(query)
|
824
|
+
report_benchmarks(benchmarks, query)
|
825
|
+
end
|
826
|
+
|
827
|
+
def do_run_once(query)
|
828
|
+
benchmarks = collect_benchmarks(query)
|
829
|
+
|
830
|
+
debug_benchmarks(query, benchmarks)
|
831
|
+
verify_results(benchmarks)
|
832
|
+
|
833
|
+
benchmarks
|
834
|
+
end
|
835
|
+
|
836
|
+
def report_benchmarks(benchmarks, query, label)
|
837
|
+
report = create_report(benchmarks, query, label)
|
838
|
+
report.print
|
839
|
+
end
|
840
|
+
|
841
|
+
DEFAULT_REPEAT_COUNT = 5
|
842
|
+
def repeat_count
|
843
|
+
@options[:repeat_count] || DEFAULT_REPEAT_COUNT
|
844
|
+
end
|
845
|
+
|
846
|
+
def run(query=nil)
|
847
|
+
if query
|
848
|
+
do_run(query)
|
849
|
+
else
|
850
|
+
raise "no query" if @queries.empty?
|
851
|
+
|
852
|
+
index = 0
|
853
|
+
@queries.each do |query, label|
|
854
|
+
index += 1
|
855
|
+
do_run(query, "#{index}. #{label}")
|
856
|
+
puts
|
857
|
+
puts
|
858
|
+
end
|
859
|
+
end
|
860
|
+
end
|
861
|
+
|
862
|
+
def do_run(query, label=nil)
|
863
|
+
benchmarks_set = repeat_count.times.collect do
|
864
|
+
do_run_once(query)
|
865
|
+
end
|
866
|
+
total_benchmarks = benchmarks_set.shift
|
867
|
+
benchmarks_set.each do |benchmarks|
|
868
|
+
benchmarks.each_with_index do |benchmark, index|
|
869
|
+
total_benchmarks[index] += benchmark
|
870
|
+
end
|
871
|
+
end
|
872
|
+
report_benchmarks(total_benchmarks, query, label)
|
873
|
+
end
|
874
|
+
|
875
|
+
def verify_results(benchmarks)
|
876
|
+
return if ENV["NO_VERIFY"]
|
877
|
+
benchmarks = benchmarks.dup
|
878
|
+
|
879
|
+
expected_result = benchmarks.shift.result
|
880
|
+
benchmarks.each do |benchmark|
|
881
|
+
raise "bad" unless assert_equivalent_to(expected_result, benchmark.result)
|
882
|
+
end
|
883
|
+
end
|
884
|
+
|
885
|
+
def assert_equivalent_to(first_result, second_result)
|
886
|
+
first_result == second_result
|
887
|
+
end
|
888
|
+
|
889
|
+
def create_report(benchmarks, query, label=nil)
|
890
|
+
Report.new(query, label, benchmarks, repeat_count)
|
891
|
+
end
|
892
|
+
|
893
|
+
DEFAULT_WIKIPEDIA_DATABASE_LOCATION = "/tmp/wikipedia-db/db"
|
894
|
+
class << self
|
895
|
+
def select_benchmark_default_setup(runner, options=nil)
|
896
|
+
options ||= {}
|
897
|
+
|
898
|
+
configuration = Configuration.new
|
899
|
+
configuration.database_path = options[:database_path] || DEFAULT_WIKIPEDIA_DATABASE_LOCATION
|
900
|
+
ensure_database(configuration)
|
901
|
+
|
902
|
+
context = Groonga::Context.new
|
903
|
+
select_command = SelectorByCommand.new(context, configuration.database_path)
|
904
|
+
select_method = SelectorByMethod.new(context, configuration.database_path)
|
905
|
+
select_command_profile = command_selector_profile(select_command)
|
906
|
+
select_method_profile = method_selector_profile(select_method)
|
907
|
+
|
908
|
+
runner.context = context
|
909
|
+
runner.add_profile(select_command_profile)
|
910
|
+
runner.add_profile(select_method_profile)
|
911
|
+
end
|
912
|
+
|
913
|
+
def ensure_database(configuration)
|
914
|
+
unless File.exist?(configuration.database_path)
|
915
|
+
puts 'you must create wikipedia database to use, or specify it via "DATABASE_PATH" environment variable'
|
916
|
+
puts
|
917
|
+
puts 'how to create wikipedia database'
|
918
|
+
puts '1. download wikipedia dump.'
|
919
|
+
puts ' $ wget -c http://download.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles.xml.bz2'
|
920
|
+
puts '2. create groonga database from the dump'
|
921
|
+
puts ' $ cat jawiki-latest-pages-articles.xml.bz2 | bunzip2 | ruby1.9.1 ./benchmark/create-wikipedia-database.rb'
|
922
|
+
exit 1
|
923
|
+
end
|
924
|
+
end
|
925
|
+
|
926
|
+
def command_selector_profile(select_command)
|
927
|
+
Profile.new("select by command",
|
928
|
+
select_command,
|
929
|
+
[select_command.context.method(:send),
|
930
|
+
Groonga::Context::SelectResult.method(:parse)])
|
931
|
+
end
|
932
|
+
|
933
|
+
def method_selector_profile(select_method)
|
934
|
+
Profile.new("select by method",
|
935
|
+
select_method,
|
936
|
+
[:do_select,
|
937
|
+
:sort,
|
938
|
+
:format,
|
939
|
+
:drilldown, [:do_group,
|
940
|
+
:drilldown_sort,
|
941
|
+
:drilldown_format]])
|
942
|
+
end
|
943
|
+
|
944
|
+
def output_columns_without_content
|
945
|
+
"--output_columns '_id _key year wday timestamp month hour date last_contributor'"
|
946
|
+
end
|
947
|
+
|
948
|
+
def output_columns_with_content
|
949
|
+
"--output_columns '_id _key year wday timestamp month hour date last_contributor content'"
|
950
|
+
end
|
951
|
+
|
952
|
+
def predefined_queries
|
953
|
+
[
|
954
|
+
["select Documents",
|
955
|
+
"minimum command"],
|
956
|
+
["select Documents --filter true",
|
957
|
+
"select all"],
|
958
|
+
["select Documents --filter false",
|
959
|
+
"select none"],
|
960
|
+
["select Documents content アルミ #{output_columns_without_content}",
|
961
|
+
"full text search"],
|
962
|
+
["select Documents content アルミ #{output_columns_without_content} --limit 0",
|
963
|
+
"full text search with no limit"],
|
964
|
+
["select Documents content アルミ #{output_columns_with_content}",
|
965
|
+
"full text search output long text column"],
|
966
|
+
["select Documents content アルミ #{output_columns_without_content} --limit 1000",
|
967
|
+
"full text search with large limit"],
|
968
|
+
["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs",
|
969
|
+
"drilldown"],
|
970
|
+
["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs --drilldown_limit 10000",
|
971
|
+
"drilldown with large drilldown_limit"],
|
972
|
+
["select Documents --sortby _key",
|
973
|
+
"sort"],
|
974
|
+
["select Documents --sortby _key --drilldown 'year month date wday hour, last_contributor links' --drilldown_sortby _nsubrecs",
|
975
|
+
"sort with drilldown"],
|
976
|
+
]
|
977
|
+
end
|
978
|
+
|
979
|
+
def load_predefined_queries(runner, options)
|
980
|
+
predefined_queries.each do |command, label|
|
981
|
+
query = Query.parse_groonga_query_log(command)
|
982
|
+
runner.add_query(query, label)
|
983
|
+
end
|
984
|
+
end
|
985
|
+
end
|
986
|
+
end
|
987
|
+
|
988
|
+
class Report
|
989
|
+
def initialize(query, query_label, benchmarks, repeat_count)
|
990
|
+
@query = query
|
991
|
+
@query_label = query_label
|
992
|
+
@benchmarks = benchmarks
|
993
|
+
@repeat_count = repeat_count
|
994
|
+
end
|
995
|
+
|
996
|
+
def compare
|
997
|
+
end
|
998
|
+
|
999
|
+
def print
|
1000
|
+
puts "select command: #{@query_label}"
|
1001
|
+
puts " #{@query.original_log_entry}"
|
1002
|
+
puts
|
1003
|
+
puts "repeated #{@repeat_count} time(s). Average times are:"
|
1004
|
+
|
1005
|
+
lines = []
|
1006
|
+
@benchmarks.each do |benchmark|
|
1007
|
+
lines += benchmark.lines
|
1008
|
+
end
|
1009
|
+
width = lines.collect(&:first).collect(&:size).max
|
1010
|
+
|
1011
|
+
puts(" " * (width - 1) + Benchmark::Tms::CAPTION.rstrip)
|
1012
|
+
lines.each do |label, result|
|
1013
|
+
puts "#{label.ljust(width)} #{(result / @repeat_count).to_s.strip}"
|
1014
|
+
end
|
1015
|
+
end
|
1016
|
+
end
|
1017
|
+
|
1018
|
+
options = {
|
1019
|
+
:method => [:measure_time],
|
1020
|
+
}
|
1021
|
+
|
1022
|
+
OptionParser.new do |parser|
|
1023
|
+
parser.on("--repeat=COUNT",
|
1024
|
+
"repeat each query COUNT times",
|
1025
|
+
"(default: #{BenchmarkRunner::DEFAULT_REPEAT_COUNT})") do |count|
|
1026
|
+
options[:repeat_count] = count.to_i
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
parser.on("--command=COMMAND",
|
1030
|
+
"use COMMAND instead of default predefined ones") do |command|
|
1031
|
+
options[:query] = Query.parse_groonga_query_log(command)
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
parser.on("--database=PATH",
|
1035
|
+
"use database located at PATH",
|
1036
|
+
"(default: #{BenchmarkRunner::DEFAULT_WIKIPEDIA_DATABASE_LOCATION})") do |command|
|
1037
|
+
options[:database_path] = command
|
1038
|
+
end
|
1039
|
+
end.parse!(ARGV)
|
1040
|
+
|
1041
|
+
runner = BenchmarkRunner.new(options).tap do |runner|
|
1042
|
+
BenchmarkRunner.select_benchmark_default_setup(runner, options)
|
1043
|
+
if options[:query].nil?
|
1044
|
+
BenchmarkRunner.load_predefined_queries(runner, options)
|
1045
|
+
end
|
1046
|
+
end
|
1047
|
+
|
1048
|
+
if options[:query]
|
1049
|
+
runner.run(options[:query])
|
1050
|
+
else
|
1051
|
+
runner.run
|
1052
|
+
end
|