rroonga 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/NEWS.ja.rdoc +47 -0
- data/NEWS.rdoc +48 -0
- data/README.ja.rdoc +1 -0
- data/README.rdoc +1 -0
- data/Rakefile +32 -13
- data/benchmark/create-wikipedia-database.rb +212 -0
- data/benchmark/repeat-load.rb +213 -0
- data/benchmark/select.rb +1052 -0
- data/ext/groonga/mkmf.log +99 -0
- data/ext/groonga/rb-grn-column.c +57 -6
- data/ext/groonga/rb-grn-context.c +15 -9
- data/ext/groonga/rb-grn-expression.c +7 -7
- data/ext/groonga/{rb-grn-operation.c → rb-grn-operator.c} +89 -87
- data/ext/groonga/rb-grn-patricia-trie.c +5 -5
- data/ext/groonga/rb-grn-query.c +4 -4
- data/ext/groonga/rb-grn-table.c +16 -19
- data/ext/groonga/rb-grn.h +3 -3
- data/ext/groonga/rb-groonga.c +1 -1
- data/html/index.html +4 -4
- data/lib/groonga/context.rb +34 -0
- data/lib/groonga/expression-builder.rb +34 -2
- data/lib/groonga/record.rb +8 -6
- data/lib/groonga/schema.rb +40 -4
- data/rroonga-build.rb +2 -2
- data/test-unit/Rakefile +5 -18
- data/test-unit/html/classic.html +15 -0
- data/test-unit/html/index.html +13 -235
- data/test-unit/html/index.html.ja +15 -258
- data/test-unit/lib/test/unit.rb +1 -6
- data/test-unit/lib/test/unit/assertions.rb +11 -115
- data/test-unit/lib/test/unit/autorunner.rb +2 -5
- data/test-unit/lib/test/unit/collector/load.rb +1 -1
- data/test-unit/lib/test/unit/color-scheme.rb +2 -6
- data/test-unit/lib/test/unit/diff.rb +1 -17
- data/test-unit/lib/test/unit/testcase.rb +0 -7
- data/test-unit/lib/test/unit/testresult.rb +2 -34
- data/test-unit/lib/test/unit/ui/console/testrunner.rb +45 -9
- data/test-unit/lib/test/unit/ui/tap/testrunner.rb +12 -2
- data/test-unit/lib/test/unit/ui/testrunner.rb +0 -25
- data/test-unit/lib/test/unit/util/backtracefilter.rb +0 -1
- data/test-unit/lib/test/unit/version.rb +1 -1
- data/test-unit/test/test-color-scheme.rb +2 -4
- data/test-unit/test/test_assertions.rb +5 -51
- data/test/test-column.rb +31 -1
- data/test/test-context-select.rb +45 -14
- data/test/test-context.rb +36 -0
- data/test/test-database.rb +13 -0
- data/test/test-expression-builder.rb +32 -5
- data/test/test-record.rb +34 -1
- data/test/test-schema.rb +52 -2
- data/test/test-table-select-weight.rb +20 -1
- data/test/test-table.rb +58 -0
- metadata +13 -41
- data/test-unit-notify/Rakefile +0 -47
- data/test-unit-notify/lib/test/unit/notify.rb +0 -104
- data/test-unit/COPYING +0 -56
- data/test-unit/GPL +0 -340
- data/test-unit/PSFL +0 -271
- data/test-unit/html/bar.svg +0 -153
- data/test-unit/html/developer.svg +0 -469
- data/test-unit/html/favicon.ico +0 -0
- data/test-unit/html/favicon.svg +0 -82
- data/test-unit/html/heading-mark.svg +0 -393
- data/test-unit/html/install.svg +0 -636
- data/test-unit/html/logo.svg +0 -483
- data/test-unit/html/test-unit.css +0 -339
- data/test-unit/html/tutorial.svg +0 -559
- data/test-unit/lib/test/unit/util/output.rb +0 -31
- data/test-unit/test/ui/test_tap.rb +0 -33
- data/test-unit/test/util/test-output.rb +0 -11
data/benchmark/select.rb
ADDED
@@ -0,0 +1,1052 @@
|
|
1
|
+
#encoding: UTF-8
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'shellwords'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
require 'groonga'
|
8
|
+
|
9
|
+
Groonga::Logger.query_log_path = "/tmp/query.log"
|
10
|
+
|
11
|
+
module ColumnTokenizer
|
12
|
+
def tokenize_column_list(column_list)
|
13
|
+
tokens = column_list.split(/[\s,]/)
|
14
|
+
tokens.reject!(&:empty?)
|
15
|
+
tokens.select! do |token|
|
16
|
+
token == "*" || token =~ /[A-Za-z0-9_]/
|
17
|
+
end
|
18
|
+
tokens.each do |token|
|
19
|
+
unless token == "*"
|
20
|
+
token.sub!(/[^A-Za-z0-9_]\z/, '')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Query
|
27
|
+
attr_reader :options
|
28
|
+
attr_accessor :original_log_entry
|
29
|
+
def initialize(options)
|
30
|
+
@options = options
|
31
|
+
end
|
32
|
+
|
33
|
+
def table_name
|
34
|
+
@options[:table]
|
35
|
+
end
|
36
|
+
|
37
|
+
def match_columns
|
38
|
+
#raise "unsupported: #{@options[:match_columns].inspect}" if @options[:match_columns] =~ /\b/ # XXX
|
39
|
+
|
40
|
+
@options[:match_columns]
|
41
|
+
end
|
42
|
+
|
43
|
+
def filter
|
44
|
+
if match_columns and @options[:query]
|
45
|
+
#raise "unsupported" if @options[:filter]
|
46
|
+
@options[:query]
|
47
|
+
else
|
48
|
+
@options[:filter]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def limit
|
53
|
+
if @options[:limit]
|
54
|
+
@options[:limit].to_i
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def offset
|
61
|
+
if @options[:offset]
|
62
|
+
@options[:offset].to_i
|
63
|
+
else
|
64
|
+
nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def sort_by
|
69
|
+
if @options[:sortby]
|
70
|
+
@options[:sortby]
|
71
|
+
else
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def output_columns
|
77
|
+
if @options[:output_columns]
|
78
|
+
@options[:output_columns]
|
79
|
+
else
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def drilldown_columns
|
85
|
+
if @options[:drilldown]
|
86
|
+
@options[:drilldown]
|
87
|
+
else
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def drilldown_limit
|
93
|
+
if @options[:drilldown_limit]
|
94
|
+
@options[:drilldown_limit].to_i
|
95
|
+
else
|
96
|
+
nil
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def drilldown_offset
|
101
|
+
if @options[:drilldown_offset]
|
102
|
+
@options[:drilldown_offset].to_i
|
103
|
+
else
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def drilldown_sort_by
|
109
|
+
if @options[:drilldown_sortby]
|
110
|
+
@options[:drilldown_sortby]
|
111
|
+
else
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def drilldown_output_columns
|
117
|
+
if @options[:drilldown_output_columns]
|
118
|
+
@options[:drilldown_output_columns]
|
119
|
+
else
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def parameters
|
125
|
+
@options.dup.tap do |options|
|
126
|
+
options.delete(:table)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
class GroongaLogParser
|
131
|
+
def initialize(log)
|
132
|
+
@log = log
|
133
|
+
@tokens = []
|
134
|
+
@parameter_list = []
|
135
|
+
@parameters = {}
|
136
|
+
end
|
137
|
+
|
138
|
+
def parse
|
139
|
+
tokenize
|
140
|
+
build_parameter_list
|
141
|
+
build_parameters
|
142
|
+
create_query
|
143
|
+
end
|
144
|
+
|
145
|
+
class << self
|
146
|
+
def parse(log)
|
147
|
+
new(log).parse
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
def next_token(token)
|
153
|
+
@tokens << token
|
154
|
+
""
|
155
|
+
end
|
156
|
+
|
157
|
+
def tokenize
|
158
|
+
escape = nil
|
159
|
+
litaral = nil
|
160
|
+
token = ""
|
161
|
+
|
162
|
+
@tokens = Shellwords.split(@log)
|
163
|
+
@tokens = @tokens.reject(&:empty?)
|
164
|
+
end
|
165
|
+
|
166
|
+
IMPLICIT_PARAMETER_ORDER = [
|
167
|
+
:table,
|
168
|
+
:match_columns,
|
169
|
+
:query,
|
170
|
+
:filter,
|
171
|
+
:scorer,
|
172
|
+
:sortby,
|
173
|
+
:output_columns,
|
174
|
+
:offset,
|
175
|
+
:limit,
|
176
|
+
:drilldown,
|
177
|
+
:drilldown_sortby,
|
178
|
+
:drilldwon_output_columns,
|
179
|
+
:drilldown_offset,
|
180
|
+
:drilldown_limit,
|
181
|
+
:cache,
|
182
|
+
:match_escalation_threshold,
|
183
|
+
]
|
184
|
+
|
185
|
+
NAMED_PARAMETER_PREFIX = /\A--/
|
186
|
+
|
187
|
+
def build_parameter_list
|
188
|
+
command, parameter_tokens = @tokens.shift, @tokens
|
189
|
+
raise "command is not \"select\": #{command.inspect}" unless command == "select"
|
190
|
+
|
191
|
+
parameter_name = nil
|
192
|
+
parameter_tokens.each do |token|
|
193
|
+
if token =~ NAMED_PARAMETER_PREFIX
|
194
|
+
raise "bad" unless parameter_name.nil?
|
195
|
+
parameter_name = token
|
196
|
+
elsif parameter_name
|
197
|
+
@parameter_list << [parameter_name, token]
|
198
|
+
parameter_name = nil
|
199
|
+
else
|
200
|
+
@parameter_list << token
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def build_parameters
|
206
|
+
index = 0
|
207
|
+
@parameter_list.each do |parameter|
|
208
|
+
case parameter
|
209
|
+
when Array
|
210
|
+
name, value = parameter
|
211
|
+
@parameters[to_parameter_symbol(name)] = value
|
212
|
+
else
|
213
|
+
@parameters[IMPLICIT_PARAMETER_ORDER[index]] = parameter
|
214
|
+
index += 1
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def to_parameter_symbol(name)
|
220
|
+
name.sub(NAMED_PARAMETER_PREFIX, '').to_sym
|
221
|
+
end
|
222
|
+
|
223
|
+
def create_query
|
224
|
+
query = Query.new(@parameters)
|
225
|
+
query.original_log_entry = @log
|
226
|
+
query
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
class << self
|
231
|
+
def parse_groonga_query_log(log)
|
232
|
+
GroongaLogParser.parse(log)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
class Configuration
|
238
|
+
attr_accessor :database_path
|
239
|
+
end
|
240
|
+
|
241
|
+
class Selector
|
242
|
+
attr_reader :context, :database_path
|
243
|
+
def initialize(context, database_path)
|
244
|
+
@context = context
|
245
|
+
@database_path = database_path
|
246
|
+
@database = @context.open_database(@database_path)
|
247
|
+
end
|
248
|
+
|
249
|
+
def select(query)
|
250
|
+
raise "implement"
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
class SelectorByCommand < Selector
|
255
|
+
def select(query)
|
256
|
+
parameters = query.parameters.merge(:cache => :no)
|
257
|
+
parameters[:sortby] ||= :_id
|
258
|
+
parameters[:drilldown_sortby] ||= :_key
|
259
|
+
result = @context.select(query.table_name, parameters)
|
260
|
+
CommandResult.new(result)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
class SelectorByMethod < Selector
|
265
|
+
include ColumnTokenizer
|
266
|
+
|
267
|
+
def select(query)
|
268
|
+
table = @context[query.table_name]
|
269
|
+
filter = query.filter
|
270
|
+
if query.match_columns
|
271
|
+
default_column = table.column(query.match_columns)
|
272
|
+
end
|
273
|
+
|
274
|
+
result = do_select(filter, table, default_column)
|
275
|
+
sorted_result = sort(query, result)
|
276
|
+
formatted_result = format(query, sorted_result || result)
|
277
|
+
drilldown_results = drilldown(query, result)
|
278
|
+
|
279
|
+
MethodResult.new(result, sorted_result, formatted_result, drilldown_results)
|
280
|
+
end
|
281
|
+
|
282
|
+
def do_select(filter, table, default_column)
|
283
|
+
if filter
|
284
|
+
options = {
|
285
|
+
:syntax => :script
|
286
|
+
}
|
287
|
+
if default_column
|
288
|
+
options[:default_column] = default_column
|
289
|
+
options[:syntax] = :query
|
290
|
+
end
|
291
|
+
|
292
|
+
table.select(filter, options)
|
293
|
+
else
|
294
|
+
table.select
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
DEFAULT_LIMIT = 10
|
299
|
+
DEFAULT_DRILLDOWN_LIMIT = DEFAULT_LIMIT
|
300
|
+
|
301
|
+
def sort(query, result)
|
302
|
+
if needs_sort?(query)
|
303
|
+
sort_key = sort_key(query.sort_by)
|
304
|
+
limit = query.limit || DEFAULT_LIMIT
|
305
|
+
offset = query.offset
|
306
|
+
|
307
|
+
window_options = create_window_options(limit, offset)
|
308
|
+
sorted_result = result.sort(sort_key, window_options).collect do |record|
|
309
|
+
record.key
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def drilldown_sort(query, result)
|
315
|
+
sort_key = sort_key(query.drilldown_sort_by || "_key")
|
316
|
+
limit = query.drilldown_limit || DEFAULT_DRILLDOWN_LIMIT
|
317
|
+
offset = query.drilldown_offset
|
318
|
+
|
319
|
+
window_options = create_window_options(limit, offset)
|
320
|
+
|
321
|
+
sorted_result = result.sort(sort_key, window_options).collect do |record|
|
322
|
+
record
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
DEFAULT_OUTPUT_COLUMNS = "_id, _key, *"
|
327
|
+
DEFAULT_DRILLDOWN_OUTPUT_COLUMNS = "_key, _nsubrecs"
|
328
|
+
|
329
|
+
def format(query, result)
|
330
|
+
columns = query.output_columns || DEFAULT_OUTPUT_COLUMNS
|
331
|
+
format_result(result, columns)
|
332
|
+
end
|
333
|
+
|
334
|
+
def drilldown_format(query, result)
|
335
|
+
columns = query.drilldown_output_columns || DEFAULT_DRILLDOWN_OUTPUT_COLUMNS
|
336
|
+
format_result(result, columns)
|
337
|
+
end
|
338
|
+
|
339
|
+
def drilldown(query, result)
|
340
|
+
if needs_drilldown?(query)
|
341
|
+
drilldown_results = drilldown_result(result, query.drilldown_columns, query)
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
def drilldown_result(result, drilldown_columns, query)
|
346
|
+
columns = tokenize_column_list(drilldown_columns).uniq
|
347
|
+
columns.collect do |column|
|
348
|
+
drilldown_result = do_group(result, column)
|
349
|
+
sorted_drilldown_result = drilldown_sort(query, drilldown_result)
|
350
|
+
formatted_drilldown_result = drilldown_format(query, sorted_drilldown_result || drilldown_result)
|
351
|
+
|
352
|
+
{
|
353
|
+
:column => column,
|
354
|
+
:result => drilldown_result,
|
355
|
+
:sort => sorted_drilldown_result,
|
356
|
+
:format => formatted_drilldown_result,
|
357
|
+
}
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def do_group(result, column)
|
362
|
+
result.group(column)
|
363
|
+
end
|
364
|
+
|
365
|
+
def needs_sort?(query)
|
366
|
+
(query.limit.nil? or query.limit >= 0) or query.offset or query.sort_by
|
367
|
+
end
|
368
|
+
|
369
|
+
def needs_drilldown?(query)
|
370
|
+
query.drilldown_columns
|
371
|
+
end
|
372
|
+
|
373
|
+
DESCENDING_ORDER_PREFIX = /\A-/
|
374
|
+
def sort_key(sort_by)
|
375
|
+
if sort_by
|
376
|
+
build_sort_key(sort_by)
|
377
|
+
else
|
378
|
+
default_sort_key
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def build_sort_key(sort_by)
|
383
|
+
tokens = tokenize_column_list(sort_by)
|
384
|
+
|
385
|
+
tokens.collect do |token|
|
386
|
+
key = token.sub(DESCENDING_ORDER_PREFIX, '')
|
387
|
+
if token =~ DESCENDING_ORDER_PREFIX
|
388
|
+
descending_order_sort_key(key)
|
389
|
+
else
|
390
|
+
ascending_order_sort_key(key)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
def descending_order_sort_key(key)
|
396
|
+
{
|
397
|
+
:key => key,
|
398
|
+
:order => "descending",
|
399
|
+
}
|
400
|
+
end
|
401
|
+
|
402
|
+
def ascending_order_sort_key(key)
|
403
|
+
{
|
404
|
+
:key => key,
|
405
|
+
:order => "ascending",
|
406
|
+
}
|
407
|
+
end
|
408
|
+
|
409
|
+
def default_sort_key #XX use #ascending_order_sort_key("_id")
|
410
|
+
[
|
411
|
+
{
|
412
|
+
:key => "_id",
|
413
|
+
:order => "ascending",
|
414
|
+
}
|
415
|
+
]
|
416
|
+
end
|
417
|
+
|
418
|
+
def create_window_options(limit, offset)
|
419
|
+
window_options = {}
|
420
|
+
if limit
|
421
|
+
window_options[:limit] = limit
|
422
|
+
end
|
423
|
+
if offset
|
424
|
+
window_options[:offset] = offset
|
425
|
+
end
|
426
|
+
window_options
|
427
|
+
end
|
428
|
+
|
429
|
+
def access_column(table, column)
|
430
|
+
columns = column.split(".")
|
431
|
+
columns.each do |name|
|
432
|
+
table = table.column(name).range
|
433
|
+
end
|
434
|
+
table
|
435
|
+
end
|
436
|
+
|
437
|
+
def format_result(result, output_columns)
|
438
|
+
if result.empty?
|
439
|
+
return []
|
440
|
+
end
|
441
|
+
|
442
|
+
column_tokens = tokenize_column_list(output_columns)
|
443
|
+
column_list = build_column_list(result, column_tokens)
|
444
|
+
|
445
|
+
result.collect do |record|
|
446
|
+
format_record(column_list, record)
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
def format_record(column_list, record)
|
451
|
+
column_list.collect do |column, access_column|
|
452
|
+
value = record[column]
|
453
|
+
to_json(value, access_column)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
def to_json(value, column)
|
458
|
+
case value
|
459
|
+
when ::Time
|
460
|
+
value.to_f
|
461
|
+
when nil
|
462
|
+
if column.name =~ /Int/
|
463
|
+
0
|
464
|
+
else
|
465
|
+
""
|
466
|
+
end
|
467
|
+
when Groonga::Record
|
468
|
+
value["_key"]
|
469
|
+
when Array
|
470
|
+
value.collect do |element|
|
471
|
+
to_json(element, value)
|
472
|
+
end
|
473
|
+
else
|
474
|
+
value
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
def column_included_in_record?(column, record)
|
479
|
+
if record.respond_to?(:have_column?)
|
480
|
+
record.have_column?(column)
|
481
|
+
else
|
482
|
+
record.include?(column)
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
def build_column_list(result, columns)
|
487
|
+
access_table = result.first.table
|
488
|
+
|
489
|
+
table = result.first.key
|
490
|
+
unless table.is_a?(Groonga::Table)
|
491
|
+
table = result.first.table
|
492
|
+
end
|
493
|
+
columns = columns.collect do |column|
|
494
|
+
if column == "*"
|
495
|
+
table.columns.collect(&:name).collect do |name|
|
496
|
+
name.sub(/\A[A-Za-z0-9_]+\./, '')
|
497
|
+
end
|
498
|
+
else
|
499
|
+
column if column_included_in_record?(column, result.first)
|
500
|
+
end
|
501
|
+
end.flatten.compact
|
502
|
+
|
503
|
+
columns.collect do |column|
|
504
|
+
[column, access_column(access_table, column)]
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
class Result
|
510
|
+
def ==(other) # XXX needs more strict/rigid check
|
511
|
+
results = [
|
512
|
+
hit_count == other.hit_count,
|
513
|
+
result_count == other.result_count,
|
514
|
+
formatted_result == other.formatted_result,
|
515
|
+
drilldown_results == other.drilldown_results,
|
516
|
+
]
|
517
|
+
|
518
|
+
results.all?
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
class CommandResult < Result
|
523
|
+
def initialize(result)
|
524
|
+
@result = result
|
525
|
+
end
|
526
|
+
|
527
|
+
def hit_count
|
528
|
+
@result.n_hits
|
529
|
+
end
|
530
|
+
|
531
|
+
def result_count
|
532
|
+
@result.records.size
|
533
|
+
end
|
534
|
+
|
535
|
+
def formatted_result
|
536
|
+
@result.values
|
537
|
+
end
|
538
|
+
|
539
|
+
def drilldown_results
|
540
|
+
@result.drill_down.values.collect(&:values)
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
class MethodResult < Result
|
545
|
+
def initialize(result, sorted_result, formatted_result, drilldown_results)
|
546
|
+
@result = result
|
547
|
+
@sorted_result = sorted_result
|
548
|
+
@formatted_result = formatted_result
|
549
|
+
@drilldown_results = drilldown_results
|
550
|
+
end
|
551
|
+
|
552
|
+
def hit_count
|
553
|
+
@result.size
|
554
|
+
end
|
555
|
+
|
556
|
+
def result_count
|
557
|
+
sorted_result.size
|
558
|
+
end
|
559
|
+
|
560
|
+
def formatted_result
|
561
|
+
@formatted_result
|
562
|
+
end
|
563
|
+
|
564
|
+
def drilldown_results
|
565
|
+
if @drilldown_results.nil?
|
566
|
+
[]
|
567
|
+
else
|
568
|
+
@drilldown_results.collect do |result|
|
569
|
+
result[:format]
|
570
|
+
end
|
571
|
+
end
|
572
|
+
end
|
573
|
+
|
574
|
+
private
|
575
|
+
def sorted_result
|
576
|
+
@sorted_result || @result
|
577
|
+
end
|
578
|
+
end
|
579
|
+
|
580
|
+
class BenchmarkResult
|
581
|
+
attr_accessor :result
|
582
|
+
attr_accessor :profile
|
583
|
+
attr_reader :benchmark_result
|
584
|
+
|
585
|
+
class Time < BenchmarkResult
|
586
|
+
def initialize(profile, target_object, query, &block)
|
587
|
+
@intercepted_method_times = {}
|
588
|
+
@profile = profile
|
589
|
+
@target_object = target_object
|
590
|
+
@query = query
|
591
|
+
each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
|
592
|
+
intercept_method(klass, method_name, depth)
|
593
|
+
end
|
594
|
+
|
595
|
+
measure_time(&block)
|
596
|
+
|
597
|
+
each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
|
598
|
+
reset_intercepted_method(klass, method_name, depth)
|
599
|
+
end
|
600
|
+
end
|
601
|
+
|
602
|
+
def lines
|
603
|
+
super + intercepted_method_lines
|
604
|
+
end
|
605
|
+
|
606
|
+
def padding(depth)
|
607
|
+
" " * (depth + 1)
|
608
|
+
end
|
609
|
+
|
610
|
+
def intercepted_method_lines
|
611
|
+
lines = []
|
612
|
+
@intercepted_method_times.each do |method_name, status|
|
613
|
+
depth = status[:depth]
|
614
|
+
count = status[:benchmark_results].size
|
615
|
+
results = status[:benchmark_results]
|
616
|
+
|
617
|
+
if count == 1
|
618
|
+
result = results.first
|
619
|
+
lines << single_line(method_name, result, depth)
|
620
|
+
elsif count == 0
|
621
|
+
# do nothing
|
622
|
+
else
|
623
|
+
total = results.inject do |result, _total|
|
624
|
+
result + _total
|
625
|
+
end
|
626
|
+
|
627
|
+
total_result = ["#{padding(depth)}#{method_name}", total]
|
628
|
+
lines << total_result
|
629
|
+
|
630
|
+
lines += multile_lines(method_name, results, depth + 1)
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
lines
|
635
|
+
end
|
636
|
+
|
637
|
+
def single_line(method_name, result, depth)
|
638
|
+
|
639
|
+
["#{padding(depth)}#{method_name}", result]
|
640
|
+
end
|
641
|
+
|
642
|
+
def multile_lines(method_name, results, depth)
|
643
|
+
index = 0
|
644
|
+
|
645
|
+
results.collect do |result|
|
646
|
+
index += 1
|
647
|
+
if @profile.respond_to?(:guess_invocation_label)
|
648
|
+
label = @profile.guess_invocation_label(@query, method_name, index)
|
649
|
+
end
|
650
|
+
label ||= index
|
651
|
+
|
652
|
+
["#{padding(depth)}#{label}", result]
|
653
|
+
end
|
654
|
+
end
|
655
|
+
|
656
|
+
def +(other)
|
657
|
+
intercepted_method_times = other.instance_variable_get(:@intercepted_method_times)
|
658
|
+
intercepted_method_times.each do |method_name, time|
|
659
|
+
time[:benchmark_results].each_with_index do |result, index|
|
660
|
+
@intercepted_method_times[method_name][:benchmark_results][index] += result
|
661
|
+
end
|
662
|
+
end
|
663
|
+
super(other)
|
664
|
+
end
|
665
|
+
|
666
|
+
private
|
667
|
+
def measure_time
|
668
|
+
@benchmark_result = Benchmark.measure do
|
669
|
+
@result = yield
|
670
|
+
end
|
671
|
+
end
|
672
|
+
|
673
|
+
def each_intercepted_methods(intercepted_methods, depth=0, &block)
|
674
|
+
intercepted_methods.each do |method|
|
675
|
+
case method
|
676
|
+
when Symbol
|
677
|
+
yield(@target_object.class, method, depth)
|
678
|
+
when Method
|
679
|
+
if method.receiver.is_a?(Class)
|
680
|
+
yield(method.owner, method.name, depth)
|
681
|
+
else
|
682
|
+
yield(method.receiver.class, method.name, depth)
|
683
|
+
end
|
684
|
+
when Array
|
685
|
+
each_intercepted_methods(method, depth + 1, &block)
|
686
|
+
else
|
687
|
+
raise "bad"
|
688
|
+
end
|
689
|
+
end
|
690
|
+
end
|
691
|
+
|
692
|
+
def intercept_method(klass, method_name, depth)
|
693
|
+
intercepted_method_times = @intercepted_method_times
|
694
|
+
original_method_name = original_method_name(method_name)
|
695
|
+
|
696
|
+
intercepted_method_times[method_name] = {}
|
697
|
+
intercepted_method_times[method_name][:benchmark_results] = []
|
698
|
+
intercepted_method_times[method_name][:depth] = depth
|
699
|
+
|
700
|
+
klass.class_exec do
|
701
|
+
alias_method original_method_name, method_name
|
702
|
+
define_method method_name do |*arguments, &block|
|
703
|
+
returned_object = nil
|
704
|
+
benchmark_result = Benchmark.measure do
|
705
|
+
returned_object = __send__(original_method_name, *arguments, &block)
|
706
|
+
end
|
707
|
+
intercepted_method_times[method_name][:benchmark_results] << benchmark_result
|
708
|
+
returned_object
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
def reset_intercepted_method(klass, method_name, depth)
|
714
|
+
original_method_name = original_method_name(method_name)
|
715
|
+
|
716
|
+
klass.class_exec do
|
717
|
+
alias_method method_name, original_method_name
|
718
|
+
end
|
719
|
+
end
|
720
|
+
|
721
|
+
def original_method_name(method_name)
|
722
|
+
:"__intercepted__#{method_name}"
|
723
|
+
end
|
724
|
+
end
|
725
|
+
|
726
|
+
def lines
|
727
|
+
[["#{name} (#{result.hit_count} hits)", @benchmark_result]]
|
728
|
+
end
|
729
|
+
|
730
|
+
def name
|
731
|
+
profile.name
|
732
|
+
end
|
733
|
+
|
734
|
+
def +(other)
|
735
|
+
@benchmark_result += other.benchmark_result
|
736
|
+
self
|
737
|
+
end
|
738
|
+
end
|
739
|
+
|
740
|
+
class Profile
|
741
|
+
include ColumnTokenizer
|
742
|
+
|
743
|
+
attr_accessor :mode
|
744
|
+
attr_reader :name, :intercepted_methods
|
745
|
+
def initialize(name, selector, intercepted_methods=[])
|
746
|
+
@name = name
|
747
|
+
@selector = selector
|
748
|
+
@intercepted_methods = intercepted_methods
|
749
|
+
end
|
750
|
+
|
751
|
+
def take_benchmark(query)
|
752
|
+
if mode == :measure_time
|
753
|
+
measure_time(query)
|
754
|
+
else
|
755
|
+
raise "bad"
|
756
|
+
end
|
757
|
+
end
|
758
|
+
|
759
|
+
def guess_invocation_label(query, method_name, index)
|
760
|
+
if method_name.to_s =~ /drilldown|do_group/
|
761
|
+
columns = tokenize_column_list(query.drilldown_columns).uniq
|
762
|
+
columns[index - 1]
|
763
|
+
else
|
764
|
+
raise "bad: #{method_name}"
|
765
|
+
end
|
766
|
+
end
|
767
|
+
|
768
|
+
private
|
769
|
+
def measure_time(query)
|
770
|
+
BenchmarkResult::Time.new(self, @selector, query) do
|
771
|
+
result = @selector.select(query)
|
772
|
+
result
|
773
|
+
end
|
774
|
+
end
|
775
|
+
end
|
776
|
+
|
777
|
+
class BenchmarkRunner
|
778
|
+
attr_accessor :context
|
779
|
+
DEFAULT_MODE = :measure_time # :mesure_memory, :mesure_io, :mesure_???
|
780
|
+
|
781
|
+
def initialize(options={})
|
782
|
+
@options = options
|
783
|
+
@profiles = []
|
784
|
+
@queries = []
|
785
|
+
end
|
786
|
+
|
787
|
+
def benchmark_mode
|
788
|
+
@options[:mode] || DEFAULT_MODE
|
789
|
+
end
|
790
|
+
|
791
|
+
def add_profile(profile)
|
792
|
+
profile.mode = benchmark_mode
|
793
|
+
@profiles << profile
|
794
|
+
end
|
795
|
+
|
796
|
+
def add_query(query, label=nil)
|
797
|
+
@queries << [query, label]
|
798
|
+
end
|
799
|
+
|
800
|
+
LOCK_TIMEOUT_SECONDS = 10
|
801
|
+
def lock
|
802
|
+
@context.database.lock(:timeout => LOCK_TIMEOUT_SECONDS * 1000) do
|
803
|
+
yield
|
804
|
+
end
|
805
|
+
end
|
806
|
+
|
807
|
+
def collect_benchmarks(query)
|
808
|
+
lock do
|
809
|
+
@profiles.collect do |profile|
|
810
|
+
profile.take_benchmark(query)
|
811
|
+
end
|
812
|
+
end
|
813
|
+
end
|
814
|
+
|
815
|
+
def debug_benchmarks(query, benchmarks)
|
816
|
+
if ENV["DEBUG"]
|
817
|
+
pp query
|
818
|
+
pp benchmarks
|
819
|
+
end
|
820
|
+
end
|
821
|
+
|
822
|
+
def run_once(query)
|
823
|
+
benchmarks = do_run_once(query)
|
824
|
+
report_benchmarks(benchmarks, query)
|
825
|
+
end
|
826
|
+
|
827
|
+
def do_run_once(query)
|
828
|
+
benchmarks = collect_benchmarks(query)
|
829
|
+
|
830
|
+
debug_benchmarks(query, benchmarks)
|
831
|
+
verify_results(benchmarks)
|
832
|
+
|
833
|
+
benchmarks
|
834
|
+
end
|
835
|
+
|
836
|
+
def report_benchmarks(benchmarks, query, label)
|
837
|
+
report = create_report(benchmarks, query, label)
|
838
|
+
report.print
|
839
|
+
end
|
840
|
+
|
841
|
+
DEFAULT_REPEAT_COUNT = 5
|
842
|
+
def repeat_count
|
843
|
+
@options[:repeat_count] || DEFAULT_REPEAT_COUNT
|
844
|
+
end
|
845
|
+
|
846
|
+
def run(query=nil)
|
847
|
+
if query
|
848
|
+
do_run(query)
|
849
|
+
else
|
850
|
+
raise "no query" if @queries.empty?
|
851
|
+
|
852
|
+
index = 0
|
853
|
+
@queries.each do |query, label|
|
854
|
+
index += 1
|
855
|
+
do_run(query, "#{index}. #{label}")
|
856
|
+
puts
|
857
|
+
puts
|
858
|
+
end
|
859
|
+
end
|
860
|
+
end
|
861
|
+
|
862
|
+
def do_run(query, label=nil)
|
863
|
+
benchmarks_set = repeat_count.times.collect do
|
864
|
+
do_run_once(query)
|
865
|
+
end
|
866
|
+
total_benchmarks = benchmarks_set.shift
|
867
|
+
benchmarks_set.each do |benchmarks|
|
868
|
+
benchmarks.each_with_index do |benchmark, index|
|
869
|
+
total_benchmarks[index] += benchmark
|
870
|
+
end
|
871
|
+
end
|
872
|
+
report_benchmarks(total_benchmarks, query, label)
|
873
|
+
end
|
874
|
+
|
875
|
+
def verify_results(benchmarks)
|
876
|
+
return if ENV["NO_VERIFY"]
|
877
|
+
benchmarks = benchmarks.dup
|
878
|
+
|
879
|
+
expected_result = benchmarks.shift.result
|
880
|
+
benchmarks.each do |benchmark|
|
881
|
+
raise "bad" unless assert_equivalent_to(expected_result, benchmark.result)
|
882
|
+
end
|
883
|
+
end
|
884
|
+
|
885
|
+
def assert_equivalent_to(first_result, second_result)
|
886
|
+
first_result == second_result
|
887
|
+
end
|
888
|
+
|
889
|
+
def create_report(benchmarks, query, label=nil)
|
890
|
+
Report.new(query, label, benchmarks, repeat_count)
|
891
|
+
end
|
892
|
+
|
893
|
+
DEFAULT_WIKIPEDIA_DATABASE_LOCATION = "/tmp/wikipedia-db/db"
|
894
|
+
class << self
|
895
|
+
def select_benchmark_default_setup(runner, options=nil)
|
896
|
+
options ||= {}
|
897
|
+
|
898
|
+
configuration = Configuration.new
|
899
|
+
configuration.database_path = options[:database_path] || DEFAULT_WIKIPEDIA_DATABASE_LOCATION
|
900
|
+
ensure_database(configuration)
|
901
|
+
|
902
|
+
context = Groonga::Context.new
|
903
|
+
select_command = SelectorByCommand.new(context, configuration.database_path)
|
904
|
+
select_method = SelectorByMethod.new(context, configuration.database_path)
|
905
|
+
select_command_profile = command_selector_profile(select_command)
|
906
|
+
select_method_profile = method_selector_profile(select_method)
|
907
|
+
|
908
|
+
runner.context = context
|
909
|
+
runner.add_profile(select_command_profile)
|
910
|
+
runner.add_profile(select_method_profile)
|
911
|
+
end
|
912
|
+
|
913
|
+
def ensure_database(configuration)
|
914
|
+
unless File.exist?(configuration.database_path)
|
915
|
+
puts 'you must create wikipedia database to use, or specify it via "DATABASE_PATH" environment variable'
|
916
|
+
puts
|
917
|
+
puts 'how to create wikipedia database'
|
918
|
+
puts '1. download wikipedia dump.'
|
919
|
+
puts ' $ wget -c http://download.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles.xml.bz2'
|
920
|
+
puts '2. create groonga database from the dump'
|
921
|
+
puts ' $ cat jawiki-latest-pages-articles.xml.bz2 | bunzip2 | ruby1.9.1 ./benchmark/create-wikipedia-database.rb'
|
922
|
+
exit 1
|
923
|
+
end
|
924
|
+
end
|
925
|
+
|
926
|
+
def command_selector_profile(select_command)
|
927
|
+
Profile.new("select by command",
|
928
|
+
select_command,
|
929
|
+
[select_command.context.method(:send),
|
930
|
+
Groonga::Context::SelectResult.method(:parse)])
|
931
|
+
end
|
932
|
+
|
933
|
+
def method_selector_profile(select_method)
|
934
|
+
Profile.new("select by method",
|
935
|
+
select_method,
|
936
|
+
[:do_select,
|
937
|
+
:sort,
|
938
|
+
:format,
|
939
|
+
:drilldown, [:do_group,
|
940
|
+
:drilldown_sort,
|
941
|
+
:drilldown_format]])
|
942
|
+
end
|
943
|
+
|
944
|
+
def output_columns_without_content
|
945
|
+
"--output_columns '_id _key year wday timestamp month hour date last_contributor'"
|
946
|
+
end
|
947
|
+
|
948
|
+
def output_columns_with_content
|
949
|
+
"--output_columns '_id _key year wday timestamp month hour date last_contributor content'"
|
950
|
+
end
|
951
|
+
|
952
|
+
def predefined_queries
|
953
|
+
[
|
954
|
+
["select Documents",
|
955
|
+
"minimum command"],
|
956
|
+
["select Documents --filter true",
|
957
|
+
"select all"],
|
958
|
+
["select Documents --filter false",
|
959
|
+
"select none"],
|
960
|
+
["select Documents content アルミ #{output_columns_without_content}",
|
961
|
+
"full text search"],
|
962
|
+
["select Documents content アルミ #{output_columns_without_content} --limit 0",
|
963
|
+
"full text search with no limit"],
|
964
|
+
["select Documents content アルミ #{output_columns_with_content}",
|
965
|
+
"full text search output long text column"],
|
966
|
+
["select Documents content アルミ #{output_columns_without_content} --limit 1000",
|
967
|
+
"full text search with large limit"],
|
968
|
+
["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs",
|
969
|
+
"drilldown"],
|
970
|
+
["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs --drilldown_limit 10000",
|
971
|
+
"drilldown with large drilldown_limit"],
|
972
|
+
["select Documents --sortby _key",
|
973
|
+
"sort"],
|
974
|
+
["select Documents --sortby _key --drilldown 'year month date wday hour, last_contributor links' --drilldown_sortby _nsubrecs",
|
975
|
+
"sort with drilldown"],
|
976
|
+
]
|
977
|
+
end
|
978
|
+
|
979
|
+
def load_predefined_queries(runner, options)
|
980
|
+
predefined_queries.each do |command, label|
|
981
|
+
query = Query.parse_groonga_query_log(command)
|
982
|
+
runner.add_query(query, label)
|
983
|
+
end
|
984
|
+
end
|
985
|
+
end
|
986
|
+
end
|
987
|
+
|
988
|
+
class Report
|
989
|
+
def initialize(query, query_label, benchmarks, repeat_count)
|
990
|
+
@query = query
|
991
|
+
@query_label = query_label
|
992
|
+
@benchmarks = benchmarks
|
993
|
+
@repeat_count = repeat_count
|
994
|
+
end
|
995
|
+
|
996
|
+
def compare
|
997
|
+
end
|
998
|
+
|
999
|
+
def print
|
1000
|
+
puts "select command: #{@query_label}"
|
1001
|
+
puts " #{@query.original_log_entry}"
|
1002
|
+
puts
|
1003
|
+
puts "repeated #{@repeat_count} time(s). Average times are:"
|
1004
|
+
|
1005
|
+
lines = []
|
1006
|
+
@benchmarks.each do |benchmark|
|
1007
|
+
lines += benchmark.lines
|
1008
|
+
end
|
1009
|
+
width = lines.collect(&:first).collect(&:size).max
|
1010
|
+
|
1011
|
+
puts(" " * (width - 1) + Benchmark::Tms::CAPTION.rstrip)
|
1012
|
+
lines.each do |label, result|
|
1013
|
+
puts "#{label.ljust(width)} #{(result / @repeat_count).to_s.strip}"
|
1014
|
+
end
|
1015
|
+
end
|
1016
|
+
end
|
1017
|
+
|
1018
|
+
options = {
|
1019
|
+
:method => [:measure_time],
|
1020
|
+
}
|
1021
|
+
|
1022
|
+
OptionParser.new do |parser|
|
1023
|
+
parser.on("--repeat=COUNT",
|
1024
|
+
"repeat each query COUNT times",
|
1025
|
+
"(default: #{BenchmarkRunner::DEFAULT_REPEAT_COUNT})") do |count|
|
1026
|
+
options[:repeat_count] = count.to_i
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
parser.on("--command=COMMAND",
|
1030
|
+
"use COMMAND instead of default predefined ones") do |command|
|
1031
|
+
options[:query] = Query.parse_groonga_query_log(command)
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
parser.on("--database=PATH",
|
1035
|
+
"use database located at PATH",
|
1036
|
+
"(default: #{BenchmarkRunner::DEFAULT_WIKIPEDIA_DATABASE_LOCATION})") do |command|
|
1037
|
+
options[:database_path] = command
|
1038
|
+
end
|
1039
|
+
end.parse!(ARGV)
|
1040
|
+
|
1041
|
+
runner = BenchmarkRunner.new(options).tap do |runner|
|
1042
|
+
BenchmarkRunner.select_benchmark_default_setup(runner, options)
|
1043
|
+
if options[:query].nil?
|
1044
|
+
BenchmarkRunner.load_predefined_queries(runner, options)
|
1045
|
+
end
|
1046
|
+
end
|
1047
|
+
|
1048
|
+
if options[:query]
|
1049
|
+
runner.run(options[:query])
|
1050
|
+
else
|
1051
|
+
runner.run
|
1052
|
+
end
|