rroonga 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/NEWS.ja.rdoc +47 -0
  2. data/NEWS.rdoc +48 -0
  3. data/README.ja.rdoc +1 -0
  4. data/README.rdoc +1 -0
  5. data/Rakefile +32 -13
  6. data/benchmark/create-wikipedia-database.rb +212 -0
  7. data/benchmark/repeat-load.rb +213 -0
  8. data/benchmark/select.rb +1052 -0
  9. data/ext/groonga/mkmf.log +99 -0
  10. data/ext/groonga/rb-grn-column.c +57 -6
  11. data/ext/groonga/rb-grn-context.c +15 -9
  12. data/ext/groonga/rb-grn-expression.c +7 -7
  13. data/ext/groonga/{rb-grn-operation.c → rb-grn-operator.c} +89 -87
  14. data/ext/groonga/rb-grn-patricia-trie.c +5 -5
  15. data/ext/groonga/rb-grn-query.c +4 -4
  16. data/ext/groonga/rb-grn-table.c +16 -19
  17. data/ext/groonga/rb-grn.h +3 -3
  18. data/ext/groonga/rb-groonga.c +1 -1
  19. data/html/index.html +4 -4
  20. data/lib/groonga/context.rb +34 -0
  21. data/lib/groonga/expression-builder.rb +34 -2
  22. data/lib/groonga/record.rb +8 -6
  23. data/lib/groonga/schema.rb +40 -4
  24. data/rroonga-build.rb +2 -2
  25. data/test-unit/Rakefile +5 -18
  26. data/test-unit/html/classic.html +15 -0
  27. data/test-unit/html/index.html +13 -235
  28. data/test-unit/html/index.html.ja +15 -258
  29. data/test-unit/lib/test/unit.rb +1 -6
  30. data/test-unit/lib/test/unit/assertions.rb +11 -115
  31. data/test-unit/lib/test/unit/autorunner.rb +2 -5
  32. data/test-unit/lib/test/unit/collector/load.rb +1 -1
  33. data/test-unit/lib/test/unit/color-scheme.rb +2 -6
  34. data/test-unit/lib/test/unit/diff.rb +1 -17
  35. data/test-unit/lib/test/unit/testcase.rb +0 -7
  36. data/test-unit/lib/test/unit/testresult.rb +2 -34
  37. data/test-unit/lib/test/unit/ui/console/testrunner.rb +45 -9
  38. data/test-unit/lib/test/unit/ui/tap/testrunner.rb +12 -2
  39. data/test-unit/lib/test/unit/ui/testrunner.rb +0 -25
  40. data/test-unit/lib/test/unit/util/backtracefilter.rb +0 -1
  41. data/test-unit/lib/test/unit/version.rb +1 -1
  42. data/test-unit/test/test-color-scheme.rb +2 -4
  43. data/test-unit/test/test_assertions.rb +5 -51
  44. data/test/test-column.rb +31 -1
  45. data/test/test-context-select.rb +45 -14
  46. data/test/test-context.rb +36 -0
  47. data/test/test-database.rb +13 -0
  48. data/test/test-expression-builder.rb +32 -5
  49. data/test/test-record.rb +34 -1
  50. data/test/test-schema.rb +52 -2
  51. data/test/test-table-select-weight.rb +20 -1
  52. data/test/test-table.rb +58 -0
  53. metadata +13 -41
  54. data/test-unit-notify/Rakefile +0 -47
  55. data/test-unit-notify/lib/test/unit/notify.rb +0 -104
  56. data/test-unit/COPYING +0 -56
  57. data/test-unit/GPL +0 -340
  58. data/test-unit/PSFL +0 -271
  59. data/test-unit/html/bar.svg +0 -153
  60. data/test-unit/html/developer.svg +0 -469
  61. data/test-unit/html/favicon.ico +0 -0
  62. data/test-unit/html/favicon.svg +0 -82
  63. data/test-unit/html/heading-mark.svg +0 -393
  64. data/test-unit/html/install.svg +0 -636
  65. data/test-unit/html/logo.svg +0 -483
  66. data/test-unit/html/test-unit.css +0 -339
  67. data/test-unit/html/tutorial.svg +0 -559
  68. data/test-unit/lib/test/unit/util/output.rb +0 -31
  69. data/test-unit/test/ui/test_tap.rb +0 -33
  70. data/test-unit/test/util/test-output.rb +0 -11
@@ -0,0 +1,1052 @@
1
+ #encoding: UTF-8
2
+
3
+ require 'benchmark'
4
+ require 'shellwords'
5
+ require 'optparse'
6
+
7
+ require 'groonga'
8
+
9
+ Groonga::Logger.query_log_path = "/tmp/query.log"
10
+
11
+ module ColumnTokenizer
12
+ def tokenize_column_list(column_list)
13
+ tokens = column_list.split(/[\s,]/)
14
+ tokens.reject!(&:empty?)
15
+ tokens.select! do |token|
16
+ token == "*" || token =~ /[A-Za-z0-9_]/
17
+ end
18
+ tokens.each do |token|
19
+ unless token == "*"
20
+ token.sub!(/[^A-Za-z0-9_]\z/, '')
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ class Query
27
+ attr_reader :options
28
+ attr_accessor :original_log_entry
29
+ def initialize(options)
30
+ @options = options
31
+ end
32
+
33
+ def table_name
34
+ @options[:table]
35
+ end
36
+
37
+ def match_columns
38
+ #raise "unsupported: #{@options[:match_columns].inspect}" if @options[:match_columns] =~ /\b/ # XXX
39
+
40
+ @options[:match_columns]
41
+ end
42
+
43
+ def filter
44
+ if match_columns and @options[:query]
45
+ #raise "unsupported" if @options[:filter]
46
+ @options[:query]
47
+ else
48
+ @options[:filter]
49
+ end
50
+ end
51
+
52
+ def limit
53
+ if @options[:limit]
54
+ @options[:limit].to_i
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ def offset
61
+ if @options[:offset]
62
+ @options[:offset].to_i
63
+ else
64
+ nil
65
+ end
66
+ end
67
+
68
+ def sort_by
69
+ if @options[:sortby]
70
+ @options[:sortby]
71
+ else
72
+ nil
73
+ end
74
+ end
75
+
76
+ def output_columns
77
+ if @options[:output_columns]
78
+ @options[:output_columns]
79
+ else
80
+ nil
81
+ end
82
+ end
83
+
84
+ def drilldown_columns
85
+ if @options[:drilldown]
86
+ @options[:drilldown]
87
+ else
88
+ nil
89
+ end
90
+ end
91
+
92
+ def drilldown_limit
93
+ if @options[:drilldown_limit]
94
+ @options[:drilldown_limit].to_i
95
+ else
96
+ nil
97
+ end
98
+ end
99
+
100
+ def drilldown_offset
101
+ if @options[:drilldown_offset]
102
+ @options[:drilldown_offset].to_i
103
+ else
104
+ nil
105
+ end
106
+ end
107
+
108
+ def drilldown_sort_by
109
+ if @options[:drilldown_sortby]
110
+ @options[:drilldown_sortby]
111
+ else
112
+ nil
113
+ end
114
+ end
115
+
116
+ def drilldown_output_columns
117
+ if @options[:drilldown_output_columns]
118
+ @options[:drilldown_output_columns]
119
+ else
120
+ nil
121
+ end
122
+ end
123
+
124
+ def parameters
125
+ @options.dup.tap do |options|
126
+ options.delete(:table)
127
+ end
128
+ end
129
+
130
+ class GroongaLogParser
131
+ def initialize(log)
132
+ @log = log
133
+ @tokens = []
134
+ @parameter_list = []
135
+ @parameters = {}
136
+ end
137
+
138
+ def parse
139
+ tokenize
140
+ build_parameter_list
141
+ build_parameters
142
+ create_query
143
+ end
144
+
145
+ class << self
146
+ def parse(log)
147
+ new(log).parse
148
+ end
149
+ end
150
+
151
+ private
152
+ def next_token(token)
153
+ @tokens << token
154
+ ""
155
+ end
156
+
157
+ def tokenize
158
+ escape = nil
159
+ litaral = nil
160
+ token = ""
161
+
162
+ @tokens = Shellwords.split(@log)
163
+ @tokens = @tokens.reject(&:empty?)
164
+ end
165
+
166
+ IMPLICIT_PARAMETER_ORDER = [
167
+ :table,
168
+ :match_columns,
169
+ :query,
170
+ :filter,
171
+ :scorer,
172
+ :sortby,
173
+ :output_columns,
174
+ :offset,
175
+ :limit,
176
+ :drilldown,
177
+ :drilldown_sortby,
178
+ :drilldwon_output_columns,
179
+ :drilldown_offset,
180
+ :drilldown_limit,
181
+ :cache,
182
+ :match_escalation_threshold,
183
+ ]
184
+
185
+ NAMED_PARAMETER_PREFIX = /\A--/
186
+
187
+ def build_parameter_list
188
+ command, parameter_tokens = @tokens.shift, @tokens
189
+ raise "command is not \"select\": #{command.inspect}" unless command == "select"
190
+
191
+ parameter_name = nil
192
+ parameter_tokens.each do |token|
193
+ if token =~ NAMED_PARAMETER_PREFIX
194
+ raise "bad" unless parameter_name.nil?
195
+ parameter_name = token
196
+ elsif parameter_name
197
+ @parameter_list << [parameter_name, token]
198
+ parameter_name = nil
199
+ else
200
+ @parameter_list << token
201
+ end
202
+ end
203
+ end
204
+
205
+ def build_parameters
206
+ index = 0
207
+ @parameter_list.each do |parameter|
208
+ case parameter
209
+ when Array
210
+ name, value = parameter
211
+ @parameters[to_parameter_symbol(name)] = value
212
+ else
213
+ @parameters[IMPLICIT_PARAMETER_ORDER[index]] = parameter
214
+ index += 1
215
+ end
216
+ end
217
+ end
218
+
219
+ def to_parameter_symbol(name)
220
+ name.sub(NAMED_PARAMETER_PREFIX, '').to_sym
221
+ end
222
+
223
+ def create_query
224
+ query = Query.new(@parameters)
225
+ query.original_log_entry = @log
226
+ query
227
+ end
228
+ end
229
+
230
+ class << self
231
+ def parse_groonga_query_log(log)
232
+ GroongaLogParser.parse(log)
233
+ end
234
+ end
235
+ end
236
+
237
+ class Configuration
238
+ attr_accessor :database_path
239
+ end
240
+
241
+ class Selector
242
+ attr_reader :context, :database_path
243
+ def initialize(context, database_path)
244
+ @context = context
245
+ @database_path = database_path
246
+ @database = @context.open_database(@database_path)
247
+ end
248
+
249
+ def select(query)
250
+ raise "implement"
251
+ end
252
+ end
253
+
254
+ class SelectorByCommand < Selector
255
+ def select(query)
256
+ parameters = query.parameters.merge(:cache => :no)
257
+ parameters[:sortby] ||= :_id
258
+ parameters[:drilldown_sortby] ||= :_key
259
+ result = @context.select(query.table_name, parameters)
260
+ CommandResult.new(result)
261
+ end
262
+ end
263
+
264
+ class SelectorByMethod < Selector
265
+ include ColumnTokenizer
266
+
267
+ def select(query)
268
+ table = @context[query.table_name]
269
+ filter = query.filter
270
+ if query.match_columns
271
+ default_column = table.column(query.match_columns)
272
+ end
273
+
274
+ result = do_select(filter, table, default_column)
275
+ sorted_result = sort(query, result)
276
+ formatted_result = format(query, sorted_result || result)
277
+ drilldown_results = drilldown(query, result)
278
+
279
+ MethodResult.new(result, sorted_result, formatted_result, drilldown_results)
280
+ end
281
+
282
+ def do_select(filter, table, default_column)
283
+ if filter
284
+ options = {
285
+ :syntax => :script
286
+ }
287
+ if default_column
288
+ options[:default_column] = default_column
289
+ options[:syntax] = :query
290
+ end
291
+
292
+ table.select(filter, options)
293
+ else
294
+ table.select
295
+ end
296
+ end
297
+
298
+ DEFAULT_LIMIT = 10
299
+ DEFAULT_DRILLDOWN_LIMIT = DEFAULT_LIMIT
300
+
301
+ def sort(query, result)
302
+ if needs_sort?(query)
303
+ sort_key = sort_key(query.sort_by)
304
+ limit = query.limit || DEFAULT_LIMIT
305
+ offset = query.offset
306
+
307
+ window_options = create_window_options(limit, offset)
308
+ sorted_result = result.sort(sort_key, window_options).collect do |record|
309
+ record.key
310
+ end
311
+ end
312
+ end
313
+
314
+ def drilldown_sort(query, result)
315
+ sort_key = sort_key(query.drilldown_sort_by || "_key")
316
+ limit = query.drilldown_limit || DEFAULT_DRILLDOWN_LIMIT
317
+ offset = query.drilldown_offset
318
+
319
+ window_options = create_window_options(limit, offset)
320
+
321
+ sorted_result = result.sort(sort_key, window_options).collect do |record|
322
+ record
323
+ end
324
+ end
325
+
326
+ DEFAULT_OUTPUT_COLUMNS = "_id, _key, *"
327
+ DEFAULT_DRILLDOWN_OUTPUT_COLUMNS = "_key, _nsubrecs"
328
+
329
+ def format(query, result)
330
+ columns = query.output_columns || DEFAULT_OUTPUT_COLUMNS
331
+ format_result(result, columns)
332
+ end
333
+
334
+ def drilldown_format(query, result)
335
+ columns = query.drilldown_output_columns || DEFAULT_DRILLDOWN_OUTPUT_COLUMNS
336
+ format_result(result, columns)
337
+ end
338
+
339
+ def drilldown(query, result)
340
+ if needs_drilldown?(query)
341
+ drilldown_results = drilldown_result(result, query.drilldown_columns, query)
342
+ end
343
+ end
344
+
345
+ def drilldown_result(result, drilldown_columns, query)
346
+ columns = tokenize_column_list(drilldown_columns).uniq
347
+ columns.collect do |column|
348
+ drilldown_result = do_group(result, column)
349
+ sorted_drilldown_result = drilldown_sort(query, drilldown_result)
350
+ formatted_drilldown_result = drilldown_format(query, sorted_drilldown_result || drilldown_result)
351
+
352
+ {
353
+ :column => column,
354
+ :result => drilldown_result,
355
+ :sort => sorted_drilldown_result,
356
+ :format => formatted_drilldown_result,
357
+ }
358
+ end
359
+ end
360
+
361
+ def do_group(result, column)
362
+ result.group(column)
363
+ end
364
+
365
+ def needs_sort?(query)
366
+ (query.limit.nil? or query.limit >= 0) or query.offset or query.sort_by
367
+ end
368
+
369
+ def needs_drilldown?(query)
370
+ query.drilldown_columns
371
+ end
372
+
373
+ DESCENDING_ORDER_PREFIX = /\A-/
374
+ def sort_key(sort_by)
375
+ if sort_by
376
+ build_sort_key(sort_by)
377
+ else
378
+ default_sort_key
379
+ end
380
+ end
381
+
382
+ def build_sort_key(sort_by)
383
+ tokens = tokenize_column_list(sort_by)
384
+
385
+ tokens.collect do |token|
386
+ key = token.sub(DESCENDING_ORDER_PREFIX, '')
387
+ if token =~ DESCENDING_ORDER_PREFIX
388
+ descending_order_sort_key(key)
389
+ else
390
+ ascending_order_sort_key(key)
391
+ end
392
+ end
393
+ end
394
+
395
+ def descending_order_sort_key(key)
396
+ {
397
+ :key => key,
398
+ :order => "descending",
399
+ }
400
+ end
401
+
402
+ def ascending_order_sort_key(key)
403
+ {
404
+ :key => key,
405
+ :order => "ascending",
406
+ }
407
+ end
408
+
409
+ def default_sort_key #XX use #ascending_order_sort_key("_id")
410
+ [
411
+ {
412
+ :key => "_id",
413
+ :order => "ascending",
414
+ }
415
+ ]
416
+ end
417
+
418
+ def create_window_options(limit, offset)
419
+ window_options = {}
420
+ if limit
421
+ window_options[:limit] = limit
422
+ end
423
+ if offset
424
+ window_options[:offset] = offset
425
+ end
426
+ window_options
427
+ end
428
+
429
+ def access_column(table, column)
430
+ columns = column.split(".")
431
+ columns.each do |name|
432
+ table = table.column(name).range
433
+ end
434
+ table
435
+ end
436
+
437
+ def format_result(result, output_columns)
438
+ if result.empty?
439
+ return []
440
+ end
441
+
442
+ column_tokens = tokenize_column_list(output_columns)
443
+ column_list = build_column_list(result, column_tokens)
444
+
445
+ result.collect do |record|
446
+ format_record(column_list, record)
447
+ end
448
+ end
449
+
450
+ def format_record(column_list, record)
451
+ column_list.collect do |column, access_column|
452
+ value = record[column]
453
+ to_json(value, access_column)
454
+ end
455
+ end
456
+
457
+ def to_json(value, column)
458
+ case value
459
+ when ::Time
460
+ value.to_f
461
+ when nil
462
+ if column.name =~ /Int/
463
+ 0
464
+ else
465
+ ""
466
+ end
467
+ when Groonga::Record
468
+ value["_key"]
469
+ when Array
470
+ value.collect do |element|
471
+ to_json(element, value)
472
+ end
473
+ else
474
+ value
475
+ end
476
+ end
477
+
478
+ def column_included_in_record?(column, record)
479
+ if record.respond_to?(:have_column?)
480
+ record.have_column?(column)
481
+ else
482
+ record.include?(column)
483
+ end
484
+ end
485
+
486
+ def build_column_list(result, columns)
487
+ access_table = result.first.table
488
+
489
+ table = result.first.key
490
+ unless table.is_a?(Groonga::Table)
491
+ table = result.first.table
492
+ end
493
+ columns = columns.collect do |column|
494
+ if column == "*"
495
+ table.columns.collect(&:name).collect do |name|
496
+ name.sub(/\A[A-Za-z0-9_]+\./, '')
497
+ end
498
+ else
499
+ column if column_included_in_record?(column, result.first)
500
+ end
501
+ end.flatten.compact
502
+
503
+ columns.collect do |column|
504
+ [column, access_column(access_table, column)]
505
+ end
506
+ end
507
+ end
508
+
509
+ class Result
510
+ def ==(other) # XXX needs more strict/rigid check
511
+ results = [
512
+ hit_count == other.hit_count,
513
+ result_count == other.result_count,
514
+ formatted_result == other.formatted_result,
515
+ drilldown_results == other.drilldown_results,
516
+ ]
517
+
518
+ results.all?
519
+ end
520
+ end
521
+
522
+ class CommandResult < Result
523
+ def initialize(result)
524
+ @result = result
525
+ end
526
+
527
+ def hit_count
528
+ @result.n_hits
529
+ end
530
+
531
+ def result_count
532
+ @result.records.size
533
+ end
534
+
535
+ def formatted_result
536
+ @result.values
537
+ end
538
+
539
+ def drilldown_results
540
+ @result.drill_down.values.collect(&:values)
541
+ end
542
+ end
543
+
544
+ class MethodResult < Result
545
+ def initialize(result, sorted_result, formatted_result, drilldown_results)
546
+ @result = result
547
+ @sorted_result = sorted_result
548
+ @formatted_result = formatted_result
549
+ @drilldown_results = drilldown_results
550
+ end
551
+
552
+ def hit_count
553
+ @result.size
554
+ end
555
+
556
+ def result_count
557
+ sorted_result.size
558
+ end
559
+
560
+ def formatted_result
561
+ @formatted_result
562
+ end
563
+
564
+ def drilldown_results
565
+ if @drilldown_results.nil?
566
+ []
567
+ else
568
+ @drilldown_results.collect do |result|
569
+ result[:format]
570
+ end
571
+ end
572
+ end
573
+
574
+ private
575
+ def sorted_result
576
+ @sorted_result || @result
577
+ end
578
+ end
579
+
580
+ class BenchmarkResult
581
+ attr_accessor :result
582
+ attr_accessor :profile
583
+ attr_reader :benchmark_result
584
+
585
+ class Time < BenchmarkResult
586
+ def initialize(profile, target_object, query, &block)
587
+ @intercepted_method_times = {}
588
+ @profile = profile
589
+ @target_object = target_object
590
+ @query = query
591
+ each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
592
+ intercept_method(klass, method_name, depth)
593
+ end
594
+
595
+ measure_time(&block)
596
+
597
+ each_intercepted_methods(@profile.intercepted_methods) do |klass, method_name, depth|
598
+ reset_intercepted_method(klass, method_name, depth)
599
+ end
600
+ end
601
+
602
+ def lines
603
+ super + intercepted_method_lines
604
+ end
605
+
606
+ def padding(depth)
607
+ " " * (depth + 1)
608
+ end
609
+
610
+ def intercepted_method_lines
611
+ lines = []
612
+ @intercepted_method_times.each do |method_name, status|
613
+ depth = status[:depth]
614
+ count = status[:benchmark_results].size
615
+ results = status[:benchmark_results]
616
+
617
+ if count == 1
618
+ result = results.first
619
+ lines << single_line(method_name, result, depth)
620
+ elsif count == 0
621
+ # do nothing
622
+ else
623
+ total = results.inject do |result, _total|
624
+ result + _total
625
+ end
626
+
627
+ total_result = ["#{padding(depth)}#{method_name}", total]
628
+ lines << total_result
629
+
630
+ lines += multile_lines(method_name, results, depth + 1)
631
+ end
632
+ end
633
+
634
+ lines
635
+ end
636
+
637
+ def single_line(method_name, result, depth)
638
+
639
+ ["#{padding(depth)}#{method_name}", result]
640
+ end
641
+
642
+ def multile_lines(method_name, results, depth)
643
+ index = 0
644
+
645
+ results.collect do |result|
646
+ index += 1
647
+ if @profile.respond_to?(:guess_invocation_label)
648
+ label = @profile.guess_invocation_label(@query, method_name, index)
649
+ end
650
+ label ||= index
651
+
652
+ ["#{padding(depth)}#{label}", result]
653
+ end
654
+ end
655
+
656
+ def +(other)
657
+ intercepted_method_times = other.instance_variable_get(:@intercepted_method_times)
658
+ intercepted_method_times.each do |method_name, time|
659
+ time[:benchmark_results].each_with_index do |result, index|
660
+ @intercepted_method_times[method_name][:benchmark_results][index] += result
661
+ end
662
+ end
663
+ super(other)
664
+ end
665
+
666
+ private
667
+ def measure_time
668
+ @benchmark_result = Benchmark.measure do
669
+ @result = yield
670
+ end
671
+ end
672
+
673
+ def each_intercepted_methods(intercepted_methods, depth=0, &block)
674
+ intercepted_methods.each do |method|
675
+ case method
676
+ when Symbol
677
+ yield(@target_object.class, method, depth)
678
+ when Method
679
+ if method.receiver.is_a?(Class)
680
+ yield(method.owner, method.name, depth)
681
+ else
682
+ yield(method.receiver.class, method.name, depth)
683
+ end
684
+ when Array
685
+ each_intercepted_methods(method, depth + 1, &block)
686
+ else
687
+ raise "bad"
688
+ end
689
+ end
690
+ end
691
+
692
+ def intercept_method(klass, method_name, depth)
693
+ intercepted_method_times = @intercepted_method_times
694
+ original_method_name = original_method_name(method_name)
695
+
696
+ intercepted_method_times[method_name] = {}
697
+ intercepted_method_times[method_name][:benchmark_results] = []
698
+ intercepted_method_times[method_name][:depth] = depth
699
+
700
+ klass.class_exec do
701
+ alias_method original_method_name, method_name
702
+ define_method method_name do |*arguments, &block|
703
+ returned_object = nil
704
+ benchmark_result = Benchmark.measure do
705
+ returned_object = __send__(original_method_name, *arguments, &block)
706
+ end
707
+ intercepted_method_times[method_name][:benchmark_results] << benchmark_result
708
+ returned_object
709
+ end
710
+ end
711
+ end
712
+
713
+ def reset_intercepted_method(klass, method_name, depth)
714
+ original_method_name = original_method_name(method_name)
715
+
716
+ klass.class_exec do
717
+ alias_method method_name, original_method_name
718
+ end
719
+ end
720
+
721
+ def original_method_name(method_name)
722
+ :"__intercepted__#{method_name}"
723
+ end
724
+ end
725
+
726
+ def lines
727
+ [["#{name} (#{result.hit_count} hits)", @benchmark_result]]
728
+ end
729
+
730
+ def name
731
+ profile.name
732
+ end
733
+
734
+ def +(other)
735
+ @benchmark_result += other.benchmark_result
736
+ self
737
+ end
738
+ end
739
+
740
+ class Profile
741
+ include ColumnTokenizer
742
+
743
+ attr_accessor :mode
744
+ attr_reader :name, :intercepted_methods
745
+ def initialize(name, selector, intercepted_methods=[])
746
+ @name = name
747
+ @selector = selector
748
+ @intercepted_methods = intercepted_methods
749
+ end
750
+
751
+ def take_benchmark(query)
752
+ if mode == :measure_time
753
+ measure_time(query)
754
+ else
755
+ raise "bad"
756
+ end
757
+ end
758
+
759
+ def guess_invocation_label(query, method_name, index)
760
+ if method_name.to_s =~ /drilldown|do_group/
761
+ columns = tokenize_column_list(query.drilldown_columns).uniq
762
+ columns[index - 1]
763
+ else
764
+ raise "bad: #{method_name}"
765
+ end
766
+ end
767
+
768
+ private
769
+ def measure_time(query)
770
+ BenchmarkResult::Time.new(self, @selector, query) do
771
+ result = @selector.select(query)
772
+ result
773
+ end
774
+ end
775
+ end
776
+
777
+ class BenchmarkRunner
778
+ attr_accessor :context
779
+ DEFAULT_MODE = :measure_time # :mesure_memory, :mesure_io, :mesure_???
780
+
781
+ def initialize(options={})
782
+ @options = options
783
+ @profiles = []
784
+ @queries = []
785
+ end
786
+
787
+ def benchmark_mode
788
+ @options[:mode] || DEFAULT_MODE
789
+ end
790
+
791
+ def add_profile(profile)
792
+ profile.mode = benchmark_mode
793
+ @profiles << profile
794
+ end
795
+
796
+ def add_query(query, label=nil)
797
+ @queries << [query, label]
798
+ end
799
+
800
+ LOCK_TIMEOUT_SECONDS = 10
801
+ def lock
802
+ @context.database.lock(:timeout => LOCK_TIMEOUT_SECONDS * 1000) do
803
+ yield
804
+ end
805
+ end
806
+
807
+ def collect_benchmarks(query)
808
+ lock do
809
+ @profiles.collect do |profile|
810
+ profile.take_benchmark(query)
811
+ end
812
+ end
813
+ end
814
+
815
+ def debug_benchmarks(query, benchmarks)
816
+ if ENV["DEBUG"]
817
+ pp query
818
+ pp benchmarks
819
+ end
820
+ end
821
+
822
+ def run_once(query)
823
+ benchmarks = do_run_once(query)
824
+ report_benchmarks(benchmarks, query)
825
+ end
826
+
827
+ def do_run_once(query)
828
+ benchmarks = collect_benchmarks(query)
829
+
830
+ debug_benchmarks(query, benchmarks)
831
+ verify_results(benchmarks)
832
+
833
+ benchmarks
834
+ end
835
+
836
+ def report_benchmarks(benchmarks, query, label)
837
+ report = create_report(benchmarks, query, label)
838
+ report.print
839
+ end
840
+
841
+ DEFAULT_REPEAT_COUNT = 5
842
+ def repeat_count
843
+ @options[:repeat_count] || DEFAULT_REPEAT_COUNT
844
+ end
845
+
846
+ def run(query=nil)
847
+ if query
848
+ do_run(query)
849
+ else
850
+ raise "no query" if @queries.empty?
851
+
852
+ index = 0
853
+ @queries.each do |query, label|
854
+ index += 1
855
+ do_run(query, "#{index}. #{label}")
856
+ puts
857
+ puts
858
+ end
859
+ end
860
+ end
861
+
862
+ def do_run(query, label=nil)
863
+ benchmarks_set = repeat_count.times.collect do
864
+ do_run_once(query)
865
+ end
866
+ total_benchmarks = benchmarks_set.shift
867
+ benchmarks_set.each do |benchmarks|
868
+ benchmarks.each_with_index do |benchmark, index|
869
+ total_benchmarks[index] += benchmark
870
+ end
871
+ end
872
+ report_benchmarks(total_benchmarks, query, label)
873
+ end
874
+
875
+ def verify_results(benchmarks)
876
+ return if ENV["NO_VERIFY"]
877
+ benchmarks = benchmarks.dup
878
+
879
+ expected_result = benchmarks.shift.result
880
+ benchmarks.each do |benchmark|
881
+ raise "bad" unless assert_equivalent_to(expected_result, benchmark.result)
882
+ end
883
+ end
884
+
885
+ def assert_equivalent_to(first_result, second_result)
886
+ first_result == second_result
887
+ end
888
+
889
+ def create_report(benchmarks, query, label=nil)
890
+ Report.new(query, label, benchmarks, repeat_count)
891
+ end
892
+
893
+ DEFAULT_WIKIPEDIA_DATABASE_LOCATION = "/tmp/wikipedia-db/db"
894
+ class << self
895
+ def select_benchmark_default_setup(runner, options=nil)
896
+ options ||= {}
897
+
898
+ configuration = Configuration.new
899
+ configuration.database_path = options[:database_path] || DEFAULT_WIKIPEDIA_DATABASE_LOCATION
900
+ ensure_database(configuration)
901
+
902
+ context = Groonga::Context.new
903
+ select_command = SelectorByCommand.new(context, configuration.database_path)
904
+ select_method = SelectorByMethod.new(context, configuration.database_path)
905
+ select_command_profile = command_selector_profile(select_command)
906
+ select_method_profile = method_selector_profile(select_method)
907
+
908
+ runner.context = context
909
+ runner.add_profile(select_command_profile)
910
+ runner.add_profile(select_method_profile)
911
+ end
912
+
913
+ def ensure_database(configuration)
914
+ unless File.exist?(configuration.database_path)
915
+ puts 'you must create wikipedia database to use, or specify it via "DATABASE_PATH" environment variable'
916
+ puts
917
+ puts 'how to create wikipedia database'
918
+ puts '1. download wikipedia dump.'
919
+ puts ' $ wget -c http://download.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles.xml.bz2'
920
+ puts '2. create groonga database from the dump'
921
+ puts ' $ cat jawiki-latest-pages-articles.xml.bz2 | bunzip2 | ruby1.9.1 ./benchmark/create-wikipedia-database.rb'
922
+ exit 1
923
+ end
924
+ end
925
+
926
+ def command_selector_profile(select_command)
927
+ Profile.new("select by command",
928
+ select_command,
929
+ [select_command.context.method(:send),
930
+ Groonga::Context::SelectResult.method(:parse)])
931
+ end
932
+
933
+ def method_selector_profile(select_method)
934
+ Profile.new("select by method",
935
+ select_method,
936
+ [:do_select,
937
+ :sort,
938
+ :format,
939
+ :drilldown, [:do_group,
940
+ :drilldown_sort,
941
+ :drilldown_format]])
942
+ end
943
+
944
+ def output_columns_without_content
945
+ "--output_columns '_id _key year wday timestamp month hour date last_contributor'"
946
+ end
947
+
948
+ def output_columns_with_content
949
+ "--output_columns '_id _key year wday timestamp month hour date last_contributor content'"
950
+ end
951
+
952
+ def predefined_queries
953
+ [
954
+ ["select Documents",
955
+ "minimum command"],
956
+ ["select Documents --filter true",
957
+ "select all"],
958
+ ["select Documents --filter false",
959
+ "select none"],
960
+ ["select Documents content アルミ #{output_columns_without_content}",
961
+ "full text search"],
962
+ ["select Documents content アルミ #{output_columns_without_content} --limit 0",
963
+ "full text search with no limit"],
964
+ ["select Documents content アルミ #{output_columns_with_content}",
965
+ "full text search output long text column"],
966
+ ["select Documents content アルミ #{output_columns_without_content} --limit 1000",
967
+ "full text search with large limit"],
968
+ ["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs",
969
+ "drilldown"],
970
+ ["select Documents --filter true --limit 0 --drilldown last_contributor --drilldown_sortby _nsubrecs --drilldown_limit 10000",
971
+ "drilldown with large drilldown_limit"],
972
+ ["select Documents --sortby _key",
973
+ "sort"],
974
+ ["select Documents --sortby _key --drilldown 'year month date wday hour, last_contributor links' --drilldown_sortby _nsubrecs",
975
+ "sort with drilldown"],
976
+ ]
977
+ end
978
+
979
+ def load_predefined_queries(runner, options)
980
+ predefined_queries.each do |command, label|
981
+ query = Query.parse_groonga_query_log(command)
982
+ runner.add_query(query, label)
983
+ end
984
+ end
985
+ end
986
+ end
987
+
988
+ class Report
989
+ def initialize(query, query_label, benchmarks, repeat_count)
990
+ @query = query
991
+ @query_label = query_label
992
+ @benchmarks = benchmarks
993
+ @repeat_count = repeat_count
994
+ end
995
+
996
+ def compare
997
+ end
998
+
999
+ def print
1000
+ puts "select command: #{@query_label}"
1001
+ puts " #{@query.original_log_entry}"
1002
+ puts
1003
+ puts "repeated #{@repeat_count} time(s). Average times are:"
1004
+
1005
+ lines = []
1006
+ @benchmarks.each do |benchmark|
1007
+ lines += benchmark.lines
1008
+ end
1009
+ width = lines.collect(&:first).collect(&:size).max
1010
+
1011
+ puts(" " * (width - 1) + Benchmark::Tms::CAPTION.rstrip)
1012
+ lines.each do |label, result|
1013
+ puts "#{label.ljust(width)} #{(result / @repeat_count).to_s.strip}"
1014
+ end
1015
+ end
1016
+ end
1017
+
1018
+ options = {
1019
+ :method => [:measure_time],
1020
+ }
1021
+
1022
+ OptionParser.new do |parser|
1023
+ parser.on("--repeat=COUNT",
1024
+ "repeat each query COUNT times",
1025
+ "(default: #{BenchmarkRunner::DEFAULT_REPEAT_COUNT})") do |count|
1026
+ options[:repeat_count] = count.to_i
1027
+ end
1028
+
1029
+ parser.on("--command=COMMAND",
1030
+ "use COMMAND instead of default predefined ones") do |command|
1031
+ options[:query] = Query.parse_groonga_query_log(command)
1032
+ end
1033
+
1034
+ parser.on("--database=PATH",
1035
+ "use database located at PATH",
1036
+ "(default: #{BenchmarkRunner::DEFAULT_WIKIPEDIA_DATABASE_LOCATION})") do |command|
1037
+ options[:database_path] = command
1038
+ end
1039
+ end.parse!(ARGV)
1040
+
1041
+ runner = BenchmarkRunner.new(options).tap do |runner|
1042
+ BenchmarkRunner.select_benchmark_default_setup(runner, options)
1043
+ if options[:query].nil?
1044
+ BenchmarkRunner.load_predefined_queries(runner, options)
1045
+ end
1046
+ end
1047
+
1048
+ if options[:query]
1049
+ runner.run(options[:query])
1050
+ else
1051
+ runner.run
1052
+ end