rroonga 1.0.8 → 1.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/NEWS.ja.rdoc +47 -0
  2. data/NEWS.rdoc +48 -0
  3. data/README.ja.rdoc +1 -0
  4. data/README.rdoc +1 -0
  5. data/Rakefile +32 -13
  6. data/benchmark/create-wikipedia-database.rb +212 -0
  7. data/benchmark/repeat-load.rb +213 -0
  8. data/benchmark/select.rb +1052 -0
  9. data/ext/groonga/mkmf.log +99 -0
  10. data/ext/groonga/rb-grn-column.c +57 -6
  11. data/ext/groonga/rb-grn-context.c +15 -9
  12. data/ext/groonga/rb-grn-expression.c +7 -7
  13. data/ext/groonga/{rb-grn-operation.c → rb-grn-operator.c} +89 -87
  14. data/ext/groonga/rb-grn-patricia-trie.c +5 -5
  15. data/ext/groonga/rb-grn-query.c +4 -4
  16. data/ext/groonga/rb-grn-table.c +16 -19
  17. data/ext/groonga/rb-grn.h +3 -3
  18. data/ext/groonga/rb-groonga.c +1 -1
  19. data/html/index.html +4 -4
  20. data/lib/groonga/context.rb +34 -0
  21. data/lib/groonga/expression-builder.rb +34 -2
  22. data/lib/groonga/record.rb +8 -6
  23. data/lib/groonga/schema.rb +40 -4
  24. data/rroonga-build.rb +2 -2
  25. data/test-unit/Rakefile +5 -18
  26. data/test-unit/html/classic.html +15 -0
  27. data/test-unit/html/index.html +13 -235
  28. data/test-unit/html/index.html.ja +15 -258
  29. data/test-unit/lib/test/unit.rb +1 -6
  30. data/test-unit/lib/test/unit/assertions.rb +11 -115
  31. data/test-unit/lib/test/unit/autorunner.rb +2 -5
  32. data/test-unit/lib/test/unit/collector/load.rb +1 -1
  33. data/test-unit/lib/test/unit/color-scheme.rb +2 -6
  34. data/test-unit/lib/test/unit/diff.rb +1 -17
  35. data/test-unit/lib/test/unit/testcase.rb +0 -7
  36. data/test-unit/lib/test/unit/testresult.rb +2 -34
  37. data/test-unit/lib/test/unit/ui/console/testrunner.rb +45 -9
  38. data/test-unit/lib/test/unit/ui/tap/testrunner.rb +12 -2
  39. data/test-unit/lib/test/unit/ui/testrunner.rb +0 -25
  40. data/test-unit/lib/test/unit/util/backtracefilter.rb +0 -1
  41. data/test-unit/lib/test/unit/version.rb +1 -1
  42. data/test-unit/test/test-color-scheme.rb +2 -4
  43. data/test-unit/test/test_assertions.rb +5 -51
  44. data/test/test-column.rb +31 -1
  45. data/test/test-context-select.rb +45 -14
  46. data/test/test-context.rb +36 -0
  47. data/test/test-database.rb +13 -0
  48. data/test/test-expression-builder.rb +32 -5
  49. data/test/test-record.rb +34 -1
  50. data/test/test-schema.rb +52 -2
  51. data/test/test-table-select-weight.rb +20 -1
  52. data/test/test-table.rb +58 -0
  53. metadata +13 -41
  54. data/test-unit-notify/Rakefile +0 -47
  55. data/test-unit-notify/lib/test/unit/notify.rb +0 -104
  56. data/test-unit/COPYING +0 -56
  57. data/test-unit/GPL +0 -340
  58. data/test-unit/PSFL +0 -271
  59. data/test-unit/html/bar.svg +0 -153
  60. data/test-unit/html/developer.svg +0 -469
  61. data/test-unit/html/favicon.ico +0 -0
  62. data/test-unit/html/favicon.svg +0 -82
  63. data/test-unit/html/heading-mark.svg +0 -393
  64. data/test-unit/html/install.svg +0 -636
  65. data/test-unit/html/logo.svg +0 -483
  66. data/test-unit/html/test-unit.css +0 -339
  67. data/test-unit/html/tutorial.svg +0 -559
  68. data/test-unit/lib/test/unit/util/output.rb +0 -31
  69. data/test-unit/test/ui/test_tap.rb +0 -33
  70. data/test-unit/test/util/test-output.rb +0 -11
@@ -0,0 +1,1052 @@
1
+ #encoding: UTF-8
2
+
3
+ require 'benchmark'
4
+ require 'shellwords'
5
+ require 'optparse'
6
+
7
+ require 'groonga'
8
+
9
+ Groonga::Logger.query_log_path = "/tmp/query.log"
10
+
11
+ module ColumnTokenizer
12
+ def tokenize_column_list(column_list)
13
+ tokens = column_list.split(/[\s,]/)
14
+ tokens.reject!(&:empty?)
15
+ tokens.select! do |token|
16
+ token == "*" || token =~ /[A-Za-z0-9_]/
17
+ end
18
+ tokens.each do |token|
19
+ unless token == "*"
20
+ token.sub!(/[^A-Za-z0-9_]\z/, '')
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ class Query
27
+ attr_reader :options
28
+ attr_accessor :original_log_entry
29
+ def initialize(options)
30
+ @options = options
31
+ end
32
+
33
+ def table_name
34
+ @options[:table]
35
+ end
36
+
37
+ def match_columns
38
+ #raise "unsupported: #{@options[:match_columns].inspect}" if @options[:match_columns] =~ /\b/ # XXX
39
+
40
+ @options[:match_columns]
41
+ end
42
+
43
+ def filter
44
+ if match_columns and @options[:query]
45
+ #raise "unsupported" if @options[:filter]
46
+ @options[:query]
47
+ else
48
+ @options[:filter]
49
+ end
50
+ end
51
+
52
+ def limit
53
+ if @options[:limit]
54
+ @options[:limit].to_i
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ def offset
61
+ if @options[:offset]
62
+ @options[:offset].to_i
63
+ else
64
+ nil
65
+ end
66
+ end
67
+
68
+ def sort_by
69
+ if @options[:sortby]
70
+ @options[:sortby]
71
+ else
72
+ nil
73
+ end
74
+ end
75
+
76
+ def output_columns
77
+ if @options[:output_columns]
78
+ @options[:output_columns]
79
+ else
80
+ nil
81
+ end
82
+ end
83
+
84
+ def drilldown_columns
85
+ if @options[:drilldown]
86
+ @options[:drilldown]
87
+ else
88
+ nil
89
+ end
90
+ end
91
+
92
+ def drilldown_limit
93
+ if @options[:drilldown_limit]
94
+ @options[:drilldown_limit].to_i
95
+ else
96
+ nil
97
+ end
98
+ end
99
+
100
+ def drilldown_offset
101
+ if @options[:drilldown_offset]
102
+ @options[:drilldown_offset].to_i
103
+ else
104
+ nil
105
+ end
106
+ end
107
+
108
+ def drilldown_sort_by
109
+ if @options[:drilldown_sortby]
110
+ @options[:drilldown_sortby]
111
+ else
112
+ nil
113
+ end
114
+ end
115
+
116
+ def drilldown_output_columns
117
+ if @options[:drilldown_output_columns]
118
+ @options[:drilldown_output_columns]
119
+ else
120
+ nil
121
+ end
122
+ end
123
+
124
+ def parameters
125
+ @options.dup.tap do |options|
126
+ options.delete(:table)
127
+ end
128
+ end
129
+
130
+ class GroongaLogParser
131
+ def initialize(log)
132
+ @log = log
133
+ @tokens = []
134
+ @parameter_list = []
135
+ @parameters = {}
136
+ end
137
+
138
+ def parse
139
+ tokenize
140
+ build_parameter_list
141
+ build_parameters
142
+ create_query
143
+ end
144
+
145
+ class << self
146
+ def parse(log)
147
+ new(log).parse
148
+ end
149
+ end
150
+
151
+ private
152
+ def next_token(token)
153
+ @tokens << token
154
+ ""
155
+ end
156
+
157
+ def tokenize
158
+ escape = nil
159
+ litaral = nil
160
+ token = ""
161
+
162
+ @tokens = Shellwords.split(@log)
163
+ @tokens = @tokens.reject(&:empty?)
164
+ end
165
+
166
+ IMPLICIT_PARAMETER_ORDER = [
167
+ :table,
168
+ :match_columns,
169
+ :query,
170
+ :filter,
171
+ :scorer,
172
+ :sortby,
173
+ :output_columns,
174
+ :offset,
175
+ :limit,
176
+ :drilldown,
177
+ :drilldown_sortby,
178
+ :drilldwon_output_columns,
179
+ :drilldown_offset,
180
+ :drilldown_limit,
181
+ :cache,
182
+ :match_escalation_threshold,
183
+ ]
184
+
185
+ NAMED_PARAMETER_PREFIX = /\A--/
186
+
187
+ def build_parameter_list
188
+ command, parameter_tokens = @tokens.shift, @tokens
189
+ raise "command is not \"select\": #{command.inspect}" unless command == "select"
190
+
191
+ parameter_name = nil
192
+ parameter_tokens.each do |token|
193
+ if token =~ NAMED_PARAMETER_PREFIX
194
+ raise "bad" unless parameter_name.nil?
195
+ parameter_name = token
196
+ elsif parameter_name
197
+ @parameter_list << [parameter_name, token]
198
+ parameter_name = nil
199
+ else
200
+ @parameter_list << token
201
+ end
202
+ end
203
+ end
204
+
205
+ def build_parameters
206
+ index = 0
207
+ @parameter_list.each do |parameter|
208
+ case parameter
209
+ when Array
210
+ name, value = parameter
211
+ @parameters[to_parameter_symbol(name)] = value
212
+ else
213
+ @parameters[IMPLICIT_PARAMETER_ORDER[index]] = parameter
214
+ index += 1
215
+ end
216
+ end
217
+ end
218
+
219
+ def to_parameter_symbol(name)
220
+ name.sub(NAMED_PARAMETER_PREFIX, '').to_sym
221
+ end
222
+
223
+ def create_query
224
+ query = Query.new(@parameters)
225
+ query.original_log_entry = @log
226
+ query
227
+ end
228
+ end
229
+
230
+ class << self
231
+ def parse_groonga_query_log(log)
232
+ GroongaLogParser.parse(log)
233
+ end
234
+ end
235
+ end
236
+
237
+ class Configuration
238
+ attr_accessor :database_path
239
+ end
240
+
241
+ class Selector
242
+ attr_reader :context, :database_path
243
+ def initialize(context, database_path)
244
+ @context = context
245
+ @database_path = database_path
246
+ @database = @context.open_database(@database_path)
247
+ end
248
+
249
+ def select(query)
250
+ raise "implement"
251
+ end
252
+ end
253
+
254
+ class SelectorByCommand < Selector
255
+ def select(query)
256
+ parameters = query.parameters.merge(:cache => :no)
257
+ parameters[:sortby] ||= :_id
258
+ parameters[:drilldown_sortby] ||= :_key
259
+ result = @context.select(query.table_name, parameters)
260
+ CommandResult.new(result)
261
+ end
262
+ end
263
+
264
+ class SelectorByMethod < Selector
265
+ include ColumnTokenizer
266
+
267
+ def select(query)
268
+ table = @context[query.table_name]
269
+ filter = query.filter
270
+ if query.match_columns
271
+ default_column = table.column(query.match_columns)
272
+ end
273
+
274
+ result = do_select(filter, table, default_column)
275
+ sorted_result = sort(query, result)
276
+ formatted_result = format(query, sorted_result || result)
277
+ drilldown_results = drilldown(query, result)
278
+
279
+ MethodResult.new(result, sorted_result, formatted_result, drilldown_results)
280
+ end
281
+
282
+ def do_select(filter, table, default_column)
283
+ if filter
284
+ options = {
285
+ :syntax => :script
286
+ }
287
+ if default_column
288
+ options[:default_column] = default_column
289
+ options[:syntax] = :query
290
+ end
291
+
292
+ table.select(filter, options)
293
+ else
294
+ table.select
295
+ end
296
+ end
297
+
298
+ DEFAULT_LIMIT = 10
299
+ DEFAULT_DRILLDOWN_LIMIT = DEFAULT_LIMIT
300
+
301
+ def sort(query, result)
302
+ if needs_sort?(query)
303
+ sort_key = sort_key(query.sort_by)
304
+ limit = query.limit || DEFAULT_LIMIT
305
+ offset = query.offset
306
+
307
+ window_options = create_window_options(limit, offset)
308
+ sorted_result = result.sort(sort_key, window_options).collect do |record|
309
+ record.key
310
+ end
311
+ end
312
+ end
313
+
314
+ def drilldown_sort(query, result)
315
+ sort_key = sort_key(query.drilldown_sort_by || "_key")
316
+ limit = query.drilldown_limit || DEFAULT_DRILLDOWN_LIMIT
317
+ offset = query.drilldown_offset
318
+
319
+ window_options = create_window_options(limit, offset)
320
+
321
+ sorted_result = result.sort(sort_key, window_options).collect do |record|
322
+ record
323
+ end
324
+ end
325
+
326
+ DEFAULT_OUTPUT_COLUMNS = "_id, _key, *"
327
+ DEFAULT_DRILLDOWN_OUTPUT_COLUMNS = "_key, _nsubrecs"
328
+
329
+ def format(query, result)
330
+ columns = query.output_columns || DEFAULT_OUTPUT_COLUMNS
331
+ format_result(result, columns)
332
+ end
333
+
334
+ def drilldown_format(query, result)
335
+ columns = query.drilldown_output_columns || DEFAULT_DRILLDOWN_OUTPUT_COLUMNS
336
+ format_result(result, columns)
337
+ end
338
+
339
+ def drilldown(query, result)
340
+ if needs_drilldown?(query)
341
+ drilldown_results = drilldown_result(result, query.drilldown_columns, query)
342
+ end
343
+ end
344
+
345
+ def drilldown_result(result, drilldown_columns, query)
346
+ columns = tokenize_column_list(drilldown_columns).uniq
347
+ columns.collect do |column|
348
+ drilldown_result = do_group(result, column)
349
+ sorted_drilldown_result = drilldown_sort(query, drilldown_result)
350
+ formatted_drilldown_result = drilldown_format(query, sorted_drilldown_result || drilldown_result)
351
+
352
+ {
353
+ :column => column,
354
+ :result => drilldown_result,
355
+ :sort => sorted_drilldown_result,
356
+ :format => formatted_drilldown_result,
357
+ }
358
+ end
359
+ end
360
+
361
+ def do_group(result, column)
362
+ result.group(column)
363
+ end
364
+
365
+ def needs_sort?(query)
366
+ (query.limit.nil? or query.limit >= 0) or query.offset or query.sort_by
367
+ end
368
+
369
+ def needs_drilldown?(query)
370
+ query.drilldown_columns
371
+ end
372
+
373
+ DESCENDING_ORDER_PREFIX = /\A-/
374
+ def sort_key(sort_by)
375
+ if sort_by
376
+ build_sort_key(sort_by)
377
+ else
378
+ default_sort_key
379
+ end
380
+ end
381
+
382
+ def build_sort_key(sort_by)
383
+ tokens = tokenize_column_list(sort_by)
384
+
385
+ tokens.collect do |token|
386
+ key = token.sub(DESCENDING_ORDER_PREFIX, '')
387
+ if token =~ DESCENDING_ORDER_PREFIX
388
+ descending_order_sort_key(key)
389
+ else
390
+ ascending_order_sort_key(key)
391
+ end
392
+ end
393
+ end
394
+
395
+ def descending_order_sort_key(key)
396
+ {
397
+ :key => key,
398
+ :order => "descending",
399
+ }
400
+ end
401
+
402
+ def ascending_order_sort_key(key)
403
+ {
404
+ :key => key,
405
+ :order => "ascending",
406
+ }
407
+ end
408
+
409
+ def default_sort_key #XX use #ascending_order_sort_key("_id")
410
+ [
411
+ {
412
+ :key => "_id",
413
+ :order => "ascending",
414
+ }
415
+ ]
416
+ end
417
+
418
+ def create_window_options(limit, offset)
419
+ window_options = {}
420
+ if limit
421
+ window_options[:limit] = limit
422
+ end
423
+ if offset
424
+ window_options[:offset] = offset
425
+ end
426
+ window_options
427
+ end
428
+
429
+ def access_column(table, column)
430
+ columns = column.split(".")
431
+ columns.each do |name|
432
+ table = table.column(name).range
433
+ end
434
+ table
435
+ end
436
+
437
+ def format_result(result, output_columns)
438
+ if result.empty?
439
+ return []
440
+ end
441
+
442
+ column_tokens = tokenize_column_list(output_columns)
443
+ column_list = build_column_list(result, column_tokens)
444
+
445
+ result.collect do |record|
446
+ format_record(column_list, record)
447
+ end
448
+ end
449
+
450
+ def format_record(column_list, record)
451
+ column_list.collect do |column, access_column|
452
+ value = record[column]
453
+ to_json(value, access_column)
454
+ end
455
+ end
456
+
457
+ def to_json(value, column)
458
+ case value
459
+ when ::Time
460
+ value.to_f
461
+ when nil
462
+ if column.name =~ /Int/
463
+ 0
464
+ else
465
+ ""
466
+ end
467
+ when Groonga::Record
468
+ value["_key"]
469
+ when Array
470
+ value.collect do |element|
471
+ to_json(element, value)
472
+ end
473
+ else
474
+ value
475
+ end
476
+ end
477
+
478
+ def column_included_in_record?(column, record)
479
+ if record.respond_to?(:have_column?)
480
+ record.have_column?(column)
481
+ else
482
+ record.include?(column)
483
+ end
484
+ end
485
+
486
+ def build_column_list(result, columns)
487
+ access_table = result.first.table
488
+
489
+ table = result.first.key
490
+ unless table.is_a?(Groonga::Table)
491
+ table = result.first.table
492
+ end
493
+ columns = columns.collect do |column|
494
+ if column == "*"
495
+ table.columns.collect(&:name).collect do |name|
496
+ name.sub(/\A[A-Za-z0-9_]+\./, '')
497
+ end
498
+ else
499
+ column if column_included_in_record?(column, result.first)
500
+ end
501
+ end.flatten.compact
502
+
503
+ columns.collect do |column|
504
+ [column, access_column(access_table, column)]
505
+ end
506
+ end
507
+ end
508
+
509
+ class Result
510
+ def ==(other) # XXX needs more strict/rigid check
511
+ results = [
512
+ hit_count == other.hit_count,
513
+ result_count == other.result_count,
514
+ formatted_result == other.formatted_result,
515
+ drilldown_results == other.drilldown_results,
516
+ ]
517
+
518
+ results.all?
519
+ end
520
+ end
521
+
522
+ class CommandResult < Result
523
+ def initialize(result)
524
+ @result = result
525
+ end
526
+
527
+ def hit_count
528
+ @result.n_hits
529
+ end
530
+
531
+ def result_count
532
+ @result.records.size
533
+ end
534
+
535
+ def formatted_result
536
+ @result.values
537
+ end
538
+
539
+ def drilldown_results
540
+ @result.drill_down.values.collect(&:values)
541
+ end
542
+ end
543
+
544
+ class MethodResult < Result
545
+ def initialize(result, sorted_result, formatted_result, drilldown_results)
546
+ @result = result
547
+ @sorted_result = sorted_result
548
+ @formatted_result = formatted_result
549
+ @drilldown_results = drilldown_results
550
+ end
551
+
552
+ def hit_count
553
+ @result.size
554
+ end
555
+
556
+ def result_count
557
+ sorted_result.size
558
+ end
559
+
560
+ def formatted_result
561
+ @formatted_result
562
+ end
563
+
564
+ def drilldown_results
565
+ if @drilldown_results.nil?
566
+ []
567
+ else
568
+ @drilldown_results.collect do |result|
569
+ result[:format]
570
+ end
571
+ end
572
+ end
573
+
574
+ private
575
+ def sorted_result
576
+ @sorted_result || @result
577
+ end
578
+ end
579
+
580
+ class BenchmarkResult
581
+ attr_accessor :result
582
+ attr_accessor :profile
583
+ attr_reader :benchmark_result
584
+
585
+ class Time < BenchmarkResult
586
+ def initialize(profile, target_object, query, &block)
587
+ @intercepted_method_times = {}
588
+ @profile = profile
589
+ @target_object = target_object
590
+ @query = query
591
+ each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
592
+ intercept_method(klass, method_name, depth)
593
+ end
594
+
595
+ measure_time(&block)
596
+
597
+ each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
598
+ reset_intercepted_method(klass, method_name, depth)
599
+ end
600
+ end
601
+
602
+ def lines
603
+ super + intercepted_method_lines
604
+ end
605
+
606
+ def padding(depth)
607
+ " " * (depth + 1)
608
+ end
609
+
610
+ def intercepted_method_lines
611
+ lines = []
612
+ @intercepted_method_times.each do |method_name, status|
613
+ depth = status[:depth]
614
+ count = status[:benchmark_results].size
615
+ results = status[:benchmark_results]
616
+
617
+ if count == 1
618
+ result = results.first
619
+ lines << single_line(method_name, result, depth)
620
+ elsif count == 0
621
+ # do nothing
622
+ else
623
+ total = results.inject do |result, _total|
624
+ result + _total
625
+ end
626
+
627
+ total_result = ["#{padding(depth)}#{method_name}", total]
628
+ lines << total_result
629
+
630
+ lines += multile_lines(method_name, results, depth + 1)
631
+ end
632
+ end
633
+
634
+ lines
635
+ end
636
+
637
+ def single_line(method_name, result, depth)
638
+
639
+ ["#{padding(depth)}#{method_name}", result]
640
+ end
641
+
642
+ def multile_lines(method_name, results, depth)
643
+ index = 0
644
+
645
+ results.collect do |result|
646
+ index += 1
647
+ if @profile.respond_to?(:guess_invocation_label)
648
+ label = @profile.guess_invocation_label(@query, method_name, index)
649
+ end
650
+ label ||= index
651
+
652
+ ["#{padding(depth)}#{label}", result]
653
+ end
654
+ end
655
+
656
+ def +(other)
657
+ intercepted_method_times = other.instance_variable_get(:@intercepted_method_times)
658
+ intercepted_method_times.each do |method_name, time|
659
+ time[:benchmark_results].each_with_index do |result, index|
660
+ @intercepted_method_times[method_name][:benchmark_results][index] += result
661
+ end
662
+ end
663
+ super(other)
664
+ end
665
+
666
+ private
667
+ def measure_time
668
+ @benchmark_result = Benchmark.measure do
669
+ @result = yield
670
+ end
671
+ end
672
+
673
+ def each_intercepted_methods(intercepted_methods, depth=0, &block)
674
+ intercepted_methods.each do |method|
675
+ case method
676
+ when Symbol
677
+ yield(@target_object.class, method, depth)
678
+ when Method
679
+ if method.receiver.is_a?(Class)
680
+ yield(method.owner, method.name, depth)
681
+ else
682
+ yield(method.receiver.class, method.name, depth)
683
+ end
684
+ when Array
685
+ each_intercepted_methods(method, depth + 1, &block)
686
+ else
687
+ raise "bad"
688
+ end
689
+ end
690
+ end
691
+
692
+ def intercept_method(klass, method_name, depth)
693
+ intercepted_method_times = @intercepted_method_times
694
+ original_method_name = original_method_name(method_name)
695
+
696
+ intercepted_method_times[method_name] = {}
697
+ intercepted_method_times[method_name][:benchmark_results] = []
698
+ intercepted_method_times[method_name][:depth] = depth
699
+
700
+ klass.class_exec do
701
+ alias_method original_method_name, method_name
702
+ define_method method_name do |*arguments, &block|
703
+ returned_object = nil
704
+ benchmark_result = Benchmark.measure do
705
+ returned_object = __send__(original_method_name, *arguments, &block)
706
+ end
707
+ intercepted_method_times[method_name][:benchmark_results] << benchmark_result
708
+ returned_object
709
+ end
710
+ end
711
+ end
712
+
713
+ def reset_intercepted_method(klass, method_name, depth)
714
+ original_method_name = original_method_name(method_name)
715
+
716
+ klass.class_exec do
717
+ alias_method method_name, original_method_name
718
+ end
719
+ end
720
+
721
+ def original_method_name(method_name)
722
+ :"__intercepted__#{method_name}"
723
+ end
724
+ end
725
+
726
+ def lines
727
+ [["#{name} (#{result.hit_count} hits)", @benchmark_result]]
728
+ end
729
+
730
+ def name
731
+ profile.name
732
+ end
733
+
734
+ def +(other)
735
+ @benchmark_result += other.benchmark_result
736
+ self
737
+ end
738
+ end
739
+
740
+ class Profile
741
+ include ColumnTokenizer
742
+
743
+ attr_accessor :mode
744
+ attr_reader :name, :intercepted_methods
745
+ def initialize(name, selector, intercepted_methods=[])
746
+ @name = name
747
+ @selector = selector
748
+ @intercepted_methods = intercepted_methods
749
+ end
750
+
751
+ def take_benchmark(query)
752
+ if mode == :measure_time
753
+ measure_time(query)
754
+ else
755
+ raise "bad"
756
+ end
757
+ end
758
+
759
+ def guess_invocation_label(query, method_name, index)
760
+ if method_name.to_s =~ /drilldown|do_group/
761
+ columns = tokenize_column_list(query.drilldown_columns).uniq
762
+ columns[index - 1]
763
+ else
764
+ raise "bad: #{method_name}"
765
+ end
766
+ end
767
+
768
+ private
769
+ def measure_time(query)
770
+ BenchmarkResult::Time.new(self, @selector, query) do
771
+ result = @selector.select(query)
772
+ result
773
+ end
774
+ end
775
+ end
776
+
777
+ class BenchmarkRunner
778
+ attr_accessor :context
779
+ DEFAULT_MODE = :measure_time # :mesure_memory, :mesure_io, :mesure_???
780
+
781
+ def initialize(options={})
782
+ @options = options
783
+ @profiles = []
784
+ @queries = []
785
+ end
786
+
787
+ def benchmark_mode
788
+ @options[:mode] || DEFAULT_MODE
789
+ end
790
+
791
+ def add_profile(profile)
792
+ profile.mode = benchmark_mode
793
+ @profiles << profile
794
+ end
795
+
796
+ def add_query(query, label=nil)
797
+ @queries << [query, label]
798
+ end
799
+
800
+ LOCK_TIMEOUT_SECONDS = 10
801
+ def lock
802
+ @context.database.lock(:timeout => LOCK_TIMEOUT_SECONDS * 1000) do
803
+ yield
804
+ end
805
+ end
806
+
807
+ def collect_benchmarks(query)
808
+ lock do
809
+ @profiles.collect do |profile|
810
+ profile.take_benchmark(query)
811
+ end
812
+ end
813
+ end
814
+
815
+ def debug_benchmarks(query, benchmarks)
816
+ if ENV["DEBUG"]
817
+ pp query
818
+ pp benchmarks
819
+ end
820
+ end
821
+
822
+ def run_once(query)
823
+ benchmarks = do_run_once(query)
824
+ report_benchmarks(benchmarks, query)
825
+ end
826
+
827
+ def do_run_once(query)
828
+ benchmarks = collect_benchmarks(query)
829
+
830
+ debug_benchmarks(query, benchmarks)
831
+ verify_results(benchmarks)
832
+
833
+ benchmarks
834
+ end
835
+
836
+ def report_benchmarks(benchmarks, query, label)
837
+ report = create_report(benchmarks, query, label)
838
+ report.print
839
+ end
840
+
841
+ DEFAULT_REPEAT_COUNT = 5
842
+ def repeat_count
843
+ @options[:repeat_count] || DEFAULT_REPEAT_COUNT
844
+ end
845
+
846
+ def run(query=nil)
847
+ if query
848
+ do_run(query)
849
+ else
850
+ raise "no query" if @queries.empty?
851
+
852
+ index = 0
853
+ @queries.each do |query, label|
854
+ index += 1
855
+ do_run(query, "#{index}. #{label}")
856
+ puts
857
+ puts
858
+ end
859
+ end
860
+ end
861
+
862
+ def do_run(query, label=nil)
863
+ benchmarks_set = repeat_count.times.collect do
864
+ do_run_once(query)
865
+ end
866
+ total_benchmarks = benchmarks_set.shift
867
+ benchmarks_set.each do |benchmarks|
868
+ benchmarks.each_with_index do |benchmark, index|
869
+ total_benchmarks[index] += benchmark
870
+ end
871
+ end
872
+ report_benchmarks(total_benchmarks, query, label)
873
+ end
874
+
875
+ def verify_results(benchmarks)
876
+ return if ENV["NO_VERIFY"]
877
+ benchmarks = benchmarks.dup
878
+
879
+ expected_result = benchmarks.shift.result
880
+ benchmarks.each do |benchmark|
881
+ raise "bad" unless assert_equivalent_to(expected_result, benchmark.result)
882
+ end
883
+ end
884
+
885
+ def assert_equivalent_to(first_result, second_result)
886
+ first_result == second_result
887
+ end
888
+
889
+ def create_report(benchmarks, query, label=nil)
890
+ Report.new(query, label, benchmarks, repeat_count)
891
+ end
892
+
893
+ DEFAULT_WIKIPEDIA_DATABASE_LOCATION = "/tmp/wikipedia-db/db"
894
+ class << self
895
+ def select_benchmark_default_setup(runner, options=nil)
896
+ options ||= {}
897
+
898
+ configuration = Configuration.new
899
+ configuration.database_path = options[:database_path] || DEFAULT_WIKIPEDIA_DATABASE_LOCATION
900
+ ensure_database(configuration)
901
+
902
+ context = Groonga::Context.new
903
+ select_command = SelectorByCommand.new(context, configuration.database_path)
904
+ select_method = SelectorByMethod.new(context, configuration.database_path)
905
+ select_command_profile = command_selector_profile(select_command)
906
+ select_method_profile = method_selector_profile(select_method)
907
+
908
+ runner.context = context
909
+ runner.add_profile(select_command_profile)
910
+ runner.add_profile(select_method_profile)
911
+ end
912
+
913
+ def ensure_database(configuration)
914
+ unless File.exist?(configuration.database_path)
915
+ puts 'you must create wikipedia database to use, or specify it via "DATABASE_PATH" environment variable'
916
+ puts
917
+ puts 'how to create wikipedia database'
918
+ puts '1. download wikipedia dump.'
919
+ puts ' $ wget -c http://download.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles.xml.bz2'
920
+ puts '2. create groonga database from the dump'
921
+ puts ' $ cat jawiki-latest-pages-articles.xml.bz2 | bunzip2 | ruby1.9.1 ./benchmark/create-wikipedia-database.rb'
922
+ exit 1
923
+ end
924
+ end
925
+
926
+ def command_selector_profile(select_command)
927
+ Profile.new("select by command",
928
+ select_command,
929
+ [select_command.context.method(:send),
930
+ Groonga::Context::SelectResult.method(:parse)])
931
+ end
932
+
933
+ def method_selector_profile(select_method)
934
+ Profile.new("select by method",
935
+ select_method,
936
+ [:do_select,
937
+ :sort,
938
+ :format,
939
+ :drilldown, [:do_group,
940
+ :drilldown_sort,
941
+ :drilldown_format]])
942
+ end
943
+
944
+ def output_columns_without_content
945
+ "--output_columns '_id _key year wday timestamp month hour date last_contributor'"
946
+ end
947
+
948
+ def output_columns_with_content
949
+ "--output_columns '_id _key year wday timestamp month hour date last_contributor content'"
950
+ end
951
+
952
+ def predefined_queries
953
+ [
954
+ ["select Documents",
955
+ "minimum command"],
956
+ ["select Documents --filter true",
957
+ "select all"],
958
+ ["select Documents --filter false",
959
+ "select none"],
960
+ ["select Documents content アルミ #{output_columns_without_content}",
961
+ "full text search"],
962
+ ["select Documents content アルミ #{output_columns_without_content} --limit 0",
963
+ "full text search with no limit"],
964
+ ["select Documents content アルミ #{output_columns_with_content}",
965
+ "full text search output long text column"],
966
+ ["select Documents content アルミ #{output_columns_without_content} --limit 1000",
967
+ "full text search with large limit"],
968
+ ["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs",
969
+ "drilldown"],
970
+ ["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs --drilldown_limit 10000",
971
+ "drilldown with large drilldown_limit"],
972
+ ["select Documents --sortby _key",
973
+ "sort"],
974
+ ["select Documents --sortby _key --drilldown 'year month date wday hour, last_contributor links' --drilldown_sortby _nsubrecs",
975
+ "sort with drilldown"],
976
+ ]
977
+ end
978
+
979
+ def load_predefined_queries(runner, options)
980
+ predefined_queries.each do |command, label|
981
+ query = Query.parse_groonga_query_log(command)
982
+ runner.add_query(query, label)
983
+ end
984
+ end
985
+ end
986
+ end
987
+
988
+ class Report
989
+ def initialize(query, query_label, benchmarks, repeat_count)
990
+ @query = query
991
+ @query_label = query_label
992
+ @benchmarks = benchmarks
993
+ @repeat_count = repeat_count
994
+ end
995
+
996
+ def compare
997
+ end
998
+
999
+ def print
1000
+ puts "select command: #{@query_label}"
1001
+ puts " #{@query.original_log_entry}"
1002
+ puts
1003
+ puts "repeated #{@repeat_count} time(s). Average times are:"
1004
+
1005
+ lines = []
1006
+ @benchmarks.each do |benchmark|
1007
+ lines += benchmark.lines
1008
+ end
1009
+ width = lines.collect(&:first).collect(&:size).max
1010
+
1011
+ puts(" " * (width - 1) + Benchmark::Tms::CAPTION.rstrip)
1012
+ lines.each do |label, result|
1013
+ puts "#{label.ljust(width)} #{(result / @repeat_count).to_s.strip}"
1014
+ end
1015
+ end
1016
+ end
1017
+
1018
+ options = {
1019
+ :method => [:measure_time],
1020
+ }
1021
+
1022
+ OptionParser.new do |parser|
1023
+ parser.on("--repeat=COUNT",
1024
+ "repeat each query COUNT times",
1025
+ "(default: #{BenchmarkRunner::DEFAULT_REPEAT_COUNT})") do |count|
1026
+ options[:repeat_count] = count.to_i
1027
+ end
1028
+
1029
+ parser.on("--command=COMMAND",
1030
+ "use COMMAND instead of default predefined ones") do |command|
1031
+ options[:query] = Query.parse_groonga_query_log(command)
1032
+ end
1033
+
1034
+ parser.on("--database=PATH",
1035
+ "use database located at PATH",
1036
+ "(default: #{BenchmarkRunner::DEFAULT_WIKIPEDIA_DATABASE_LOCATION})") do |command|
1037
+ options[:database_path] = command
1038
+ end
1039
+ end.parse!(ARGV)
1040
+
1041
+ runner = BenchmarkRunner.new(options).tap do |runner|
1042
+ BenchmarkRunner.select_benchmark_default_setup(runner, options)
1043
+ if options[:query].nil?
1044
+ BenchmarkRunner.load_predefined_queries(runner, options)
1045
+ end
1046
+ end
1047
+
1048
+ if options[:query]
1049
+ runner.run(options[:query])
1050
+ else
1051
+ runner.run
1052
+ end