groonga 0.0.7 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. data/NEWS.ja.rdoc +56 -0
  2. data/NEWS.rdoc +58 -0
  3. data/Rakefile +2 -3
  4. data/benchmark/read-write-many-small-items.rb +16 -32
  5. data/benchmark/write-many-small-items.rb +14 -28
  6. data/example/bookmark.rb +19 -17
  7. data/example/index-html.rb +11 -1
  8. data/example/search/config.ru +14 -9
  9. data/ext/rb-grn-array.c +6 -6
  10. data/ext/rb-grn-column.c +348 -18
  11. data/ext/rb-grn-context.c +8 -4
  12. data/ext/rb-grn-database.c +6 -7
  13. data/ext/rb-grn-exception.c +101 -5
  14. data/ext/rb-grn-expression.c +206 -23
  15. data/ext/rb-grn-fix-size-column.c +6 -39
  16. data/ext/rb-grn-hash.c +24 -24
  17. data/ext/rb-grn-index-column.c +74 -19
  18. data/ext/rb-grn-logger.c +48 -0
  19. data/ext/rb-grn-object.c +281 -67
  20. data/ext/rb-grn-operation.c +1 -1
  21. data/ext/rb-grn-patricia-trie-cursor.c +10 -1
  22. data/ext/rb-grn-patricia-trie.c +268 -7
  23. data/ext/rb-grn-query.c +52 -1
  24. data/ext/rb-grn-record.c +8 -2
  25. data/ext/rb-grn-snippet.c +63 -1
  26. data/ext/rb-grn-table-cursor-key-support.c +15 -1
  27. data/ext/rb-grn-table-cursor.c +57 -0
  28. data/ext/rb-grn-table-key-support.c +382 -46
  29. data/ext/rb-grn-table.c +729 -192
  30. data/ext/rb-grn-type.c +63 -12
  31. data/ext/rb-grn-utils.c +156 -158
  32. data/ext/rb-grn-variable.c +18 -0
  33. data/ext/rb-grn.h +85 -21
  34. data/ext/rb-groonga.c +13 -3
  35. data/extconf.rb +19 -4
  36. data/html/developer.html +1 -1
  37. data/html/header.html.erb +1 -1
  38. data/html/index.html +4 -4
  39. data/lib/groonga.rb +10 -0
  40. data/lib/groonga/expression-builder.rb +81 -42
  41. data/lib/groonga/patricia-trie.rb +13 -0
  42. data/lib/groonga/record.rb +158 -13
  43. data/lib/groonga/schema.rb +339 -33
  44. data/pkg-config.rb +6 -1
  45. data/test-unit/lib/test/unit.rb +23 -42
  46. data/test-unit/lib/test/unit/assertionfailederror.rb +11 -0
  47. data/test-unit/lib/test/unit/assertions.rb +87 -9
  48. data/test-unit/lib/test/unit/autorunner.rb +20 -11
  49. data/test-unit/lib/test/unit/collector.rb +1 -8
  50. data/test-unit/lib/test/unit/collector/load.rb +2 -3
  51. data/test-unit/lib/test/unit/color-scheme.rb +13 -1
  52. data/test-unit/lib/test/unit/diff.rb +223 -37
  53. data/test-unit/lib/test/unit/error.rb +4 -0
  54. data/test-unit/lib/test/unit/failure.rb +31 -5
  55. data/test-unit/lib/test/unit/notification.rb +8 -4
  56. data/test-unit/lib/test/unit/omission.rb +51 -3
  57. data/test-unit/lib/test/unit/pending.rb +4 -0
  58. data/test-unit/lib/test/unit/testcase.rb +55 -4
  59. data/test-unit/lib/test/unit/ui/console/testrunner.rb +190 -4
  60. data/test-unit/lib/test/unit/ui/emacs/testrunner.rb +14 -0
  61. data/test-unit/lib/test/unit/ui/testrunner.rb +8 -0
  62. data/test-unit/lib/test/unit/version.rb +1 -1
  63. data/test-unit/sample/{tc_adder.rb → test_adder.rb} +3 -1
  64. data/test-unit/sample/{tc_subtracter.rb → test_subtracter.rb} +3 -1
  65. data/test-unit/sample/test_user.rb +1 -0
  66. data/test-unit/test/collector/test-descendant.rb +2 -4
  67. data/test-unit/test/collector/test_objectspace.rb +7 -5
  68. data/test-unit/test/run-test.rb +2 -0
  69. data/test-unit/test/test-color-scheme.rb +7 -0
  70. data/test-unit/test/test-diff.rb +48 -7
  71. data/test-unit/test/test-omission.rb +1 -1
  72. data/test-unit/test/test-testcase.rb +47 -0
  73. data/test-unit/test/test_assertions.rb +79 -10
  74. data/test/groonga-test-utils.rb +6 -1
  75. data/test/test-array.rb +29 -14
  76. data/test/test-column.rb +107 -55
  77. data/test/test-context.rb +5 -0
  78. data/test/test-database.rb +2 -37
  79. data/test/test-exception.rb +9 -1
  80. data/test/test-expression-builder.rb +23 -5
  81. data/test/test-expression.rb +44 -8
  82. data/test/test-fix-size-column.rb +16 -5
  83. data/test/test-gqtp.rb +70 -0
  84. data/test/test-hash.rb +142 -43
  85. data/test/test-index-column.rb +9 -9
  86. data/test/test-patricia-trie.rb +79 -20
  87. data/test/test-procedure.rb +4 -2
  88. data/test/test-record.rb +32 -20
  89. data/test/test-remote.rb +3 -2
  90. data/test/test-schema.rb +226 -92
  91. data/test/test-table-cursor.rb +103 -1
  92. data/test/test-table-offset-and-limit.rb +102 -0
  93. data/test/test-table-select-normalize.rb +4 -4
  94. data/test/test-table-select.rb +52 -8
  95. data/test/test-table.rb +235 -116
  96. data/test/test-type.rb +2 -2
  97. data/test/test-variable-size-column.rb +21 -5
  98. data/test/test-vector-column.rb +76 -0
  99. data/{TUTORIAL.ja.rdoc → text/TUTORIAL.ja.rdoc} +52 -52
  100. data/text/expression.rdoc +284 -0
  101. metadata +11 -7
  102. data/test-unit/sample/ts_examples.rb +0 -7
@@ -219,7 +219,12 @@ class PackageConfig
219
219
  end
220
220
  extern "const char *dln_find_exe(const char *, const char *)"
221
221
  end
222
- dln.dln_find_exe(pkg_config.to_s, ".")
222
+ path = dln.dln_find_exe(pkg_config.to_s, nil)
223
+ if path.size.zero?
224
+ nil
225
+ else
226
+ Pathname(path.to_s)
227
+ end
223
228
  end
224
229
 
225
230
  def guess_default_path
@@ -179,7 +179,7 @@ module Test # :nodoc:
179
179
  #
180
180
  # require 'test/unit'
181
181
  #
182
- # class TC_MyTest < Test::Unit::TestCase
182
+ # class MyTest < Test::Unit::TestCase
183
183
  # # def setup
184
184
  # # end
185
185
  #
@@ -194,21 +194,17 @@ module Test # :nodoc:
194
194
  #
195
195
  # == Test Runners
196
196
  #
197
- # So, now you have this great test class, but you still need a way to
198
- # run it and view any failures that occur during the run. This is
199
- # where Test::Unit::UI::Console::TestRunner (and others, such as
200
- # Test::Unit::UI::GTK::TestRunner) comes into play. The console test
201
- # runner is automatically invoked for you if you require 'test/unit'
202
- # and simply run the file. To use another runner, or to manually
203
- # invoke a runner, simply call its run class method and pass in an
204
- # object that responds to the suite message with a
205
- # Test::Unit::TestSuite. This can be as simple as passing in your
206
- # TestCase class (which has a class suite method). It might look
207
- # something like this:
208
- #
209
- # require 'test/unit/ui/console/testrunner'
210
- # Test::Unit::UI::Console::TestRunner.run(TC_MyTest)
197
+ # So, now you have this great test class, but you still
198
+ # need a way to run it and view any failures that occur
199
+ # during the run. There are some test runner; console test
200
+ # runner, GTK+ test runner and so on. The console test
201
+ # runner is automatically invoked for you if you require
202
+ # 'test/unit' and simply run the file. To use another
203
+ # runner simply set default test runner ID to
204
+ # Test::Unit::AutoRunner:
211
205
  #
206
+ # require 'test/unit'
207
+ # Test::Unit::AutoRunner.default_runner = "gtk2"
212
208
  #
213
209
  # == Test Suite
214
210
  #
@@ -220,33 +216,17 @@ module Test # :nodoc:
220
216
  # in response to a suite method. The TestSuite can, in turn, contain
221
217
  # other TestSuites or individual tests (typically created by a
222
218
  # TestCase). In other words, you can easily wrap up a group of
223
- # TestCases and TestSuites like this:
224
- #
225
- # require 'test/unit/testsuite'
226
- # require 'tc_myfirsttests'
227
- # require 'tc_moretestsbyme'
228
- # require 'ts_anothersetoftests'
229
- #
230
- # class TS_MyTests
231
- # def self.suite
232
- # suite = Test::Unit::TestSuite.new
233
- # suite << TC_MyFirstTests.suite
234
- # suite << TC_MoreTestsByMe.suite
235
- # suite << TS_AnotherSetOfTests.suite
236
- # return suite
237
- # end
238
- # end
239
- # Test::Unit::UI::Console::TestRunner.run(TS_MyTests)
240
- #
241
- # Now, this is a bit cumbersome, so Test::Unit does a little bit more
242
- # for you, by wrapping these up automatically when you require
243
- # 'test/unit'. What does this mean? It means you could write the above
244
- # test case like this instead:
219
+ # TestCases and TestSuites.
220
+ #
221
+ # Test::Unit does a little bit more for you, by wrapping
222
+ # these up automatically when you require
223
+ # 'test/unit'. What does this mean? It means you could
224
+ # write the above test case like this instead:
245
225
  #
246
226
  # require 'test/unit'
247
- # require 'tc_myfirsttests'
248
- # require 'tc_moretestsbyme'
249
- # require 'ts_anothersetoftests'
227
+ # require 'test_myfirsttests'
228
+ # require 'test_moretestsbyme'
229
+ # require 'test_anothersetoftests'
250
230
  #
251
231
  # Test::Unit is smart enough to find all the test cases existing in
252
232
  # the ObjectSpace and wrap them up into a suite for you. It then runs
@@ -323,12 +303,13 @@ module Test # :nodoc:
323
303
  #
324
304
 
325
305
  module Unit
326
- # If set to false Test::Unit will not automatically run at exit.
306
+ # Set true when Test::Unit has run. If set to true Test::Unit
307
+ # will not automatically run at exit.
327
308
  def self.run=(flag)
328
309
  @run = flag
329
310
  end
330
311
 
331
- # Automatically run tests at exit?
312
+ # Already tests have run?
332
313
  def self.run?
333
314
  @run ||= false
334
315
  end
@@ -9,6 +9,17 @@ module Test
9
9
 
10
10
  # Thrown by Test::Unit::Assertions when an assertion fails.
11
11
  class AssertionFailedError < StandardError
12
+ attr_accessor :expected, :actual, :user_message
13
+ attr_accessor :inspected_expected, :inspected_actual
14
+ def initialize(message=nil, options=nil)
15
+ options ||= {}
16
+ @expected = options[:expected]
17
+ @actual = options[:actual]
18
+ @inspected_expected = options[:inspected_expected]
19
+ @inspected_actual = options[:inspected_actual]
20
+ @user_message = options[:user_message]
21
+ super(message)
22
+ end
12
23
  end
13
24
  end
14
25
  end
@@ -84,7 +84,16 @@ module Test
84
84
  <?> expected but was
85
85
  <?>.?
86
86
  EOT
87
- assert_block(full_message) { expected == actual }
87
+ begin
88
+ assert_block(full_message) { expected == actual }
89
+ rescue AssertionFailedError => failure
90
+ failure.expected = expected
91
+ failure.actual = actual
92
+ failure.inspected_expected = AssertionMessage.convert(expected)
93
+ failure.inspected_actual = AssertionMessage.convert(actual)
94
+ failure.user_message = message
95
+ raise
96
+ end
88
97
  end
89
98
 
90
99
  ##
@@ -789,6 +798,54 @@ EOT
789
798
  end
790
799
  end
791
800
 
801
+ ##
802
+ # Passes if +object+#+alias_name+ is an alias method of
803
+ # +object+#+original_name+.
804
+ #
805
+ # Example:
806
+ # assert_alias_method([], :length, :size) # -> pass
807
+ # assert_alias_method([], :size, :length) # -> pass
808
+ # assert_alias_method([], :each, :size) # -> fail
809
+ def assert_alias_method(object, alias_name, original_name, message=nil)
810
+ _wrap_assertion do
811
+ find_method_failure_message = Proc.new do |method_name|
812
+ build_message(message,
813
+ "<?>.? doesn't exist\n" +
814
+ "(Class: <?>)",
815
+ object,
816
+ AssertionMessage.literal(method_name),
817
+ object.class)
818
+ end
819
+
820
+ alias_method = original_method = nil
821
+ assert_block(find_method_failure_message.call(alias_name)) do
822
+ begin
823
+ alias_method = object.method(alias_name)
824
+ true
825
+ rescue NameError
826
+ false
827
+ end
828
+ end
829
+ assert_block(find_method_failure_message.call(original_name)) do
830
+ begin
831
+ original_method = object.method(original_name)
832
+ true
833
+ rescue NameError
834
+ false
835
+ end
836
+ end
837
+
838
+ full_message = build_message(message,
839
+ "<?> is alias of\n" +
840
+ "<?> expected",
841
+ alias_method,
842
+ original_method)
843
+ assert_block(full_message) do
844
+ alias_method == original_method
845
+ end
846
+ end
847
+ end
848
+
792
849
  ##
793
850
  # Builds a failure message. +head+ is added before the +template+ and
794
851
  # +arguments+ replaces the '?'s positionally in the template.
@@ -881,23 +938,44 @@ EOT
881
938
  end
882
939
 
883
940
  MAX_DIFF_TARGET_STRING_SIZE = 1000
941
+ def max_diff_target_string_size
942
+ size = ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"]
943
+ if size
944
+ begin
945
+ size = Integer(size)
946
+ rescue ArgumentError
947
+ size = nil
948
+ end
949
+ end
950
+ size || MAX_DIFF_TARGET_STRING_SIZE
951
+ end
952
+
884
953
  def diff_target_string?(string)
885
954
  if string.respond_to?(:bytesize)
886
- string.bytesize < MAX_DIFF_TARGET_STRING_SIZE
955
+ string.bytesize < max_diff_target_string_size
887
956
  else
888
- string.size < MAX_DIFF_TARGET_STRING_SIZE
957
+ string.size < max_diff_target_string_size
958
+ end
959
+ end
960
+
961
+ def prepare_for_diff(from, to)
962
+ if !from.is_a?(String) or !to.is_a?(String)
963
+ from = convert(from)
964
+ to = convert(to)
965
+ end
966
+
967
+ if diff_target_string?(from) and diff_target_string?(to)
968
+ [from, to]
969
+ else
970
+ [nil, nil]
889
971
  end
890
972
  end
891
973
 
892
974
  def delayed_diff(from, to)
893
975
  delayed_literal do
894
- if !from.is_a?(String) or !to.is_a?(String)
895
- from = convert(from)
896
- to = convert(to)
897
- end
976
+ from, to = prepare_for_diff(from, to)
898
977
 
899
- diff = nil
900
- diff = "" if !diff_target_string?(from) or !diff_target_string?(to)
978
+ diff = "" if from.nil? or to.nil?
901
979
  diff ||= Diff.readable(from, to)
902
980
  if /^[-+]/ !~ diff
903
981
  diff = ""
@@ -18,6 +18,15 @@ module Test
18
18
  RUNNERS[id.to_s]
19
19
  end
20
20
 
21
+ @@default_runner = nil
22
+ def default_runner
23
+ runner(@@default_runner)
24
+ end
25
+
26
+ def default_runner=(id)
27
+ @@default_runner = id
28
+ end
29
+
21
30
  def register_collector(id, collector_builder=Proc.new)
22
31
  COLLECTORS[id] = collector_builder
23
32
  COLLECTORS[id.to_s] = collector_builder
@@ -123,8 +132,6 @@ module Test
123
132
  puts e
124
133
  puts options
125
134
  exit(false)
126
- else
127
- @filters << proc{false} unless(@filters.empty?)
128
135
  end
129
136
  not @to_run.empty?
130
137
  ensure
@@ -177,9 +184,9 @@ module Test
177
184
  n = (%r{\A/(.*)/\Z} =~ n ? Regexp.new($1) : n)
178
185
  case n
179
186
  when Regexp
180
- @filters << proc{|t| n =~ t.method_name ? true : nil}
187
+ @filters << proc{|t| n =~ t.method_name ? true : false}
181
188
  else
182
- @filters << proc{|t| n == t.method_name ? true : nil}
189
+ @filters << proc{|t| n == t.method_name}
183
190
  end
184
191
  end
185
192
 
@@ -189,17 +196,17 @@ module Test
189
196
  n = (%r{\A/(.*)/\Z} =~ n ? Regexp.new($1) : n)
190
197
  case n
191
198
  when Regexp
192
- @filters << proc{|t| n =~ t.class.name ? true : nil}
199
+ @filters << proc{|t| n =~ t.class.name ? true : false}
193
200
  else
194
- @filters << proc{|t| n == t.class.name ? true : nil}
201
+ @filters << proc{|t| n == t.class.name}
195
202
  end
196
203
  end
197
204
 
198
205
  priority_filter = Proc.new do |test|
199
- if @filters.size > 2
200
- nil
206
+ if @filters == [priority_filter]
207
+ Priority::Checker.new(test).need_to_run?
201
208
  else
202
- Priority::Checker.new(test).need_to_run? or nil
209
+ nil
203
210
  end
204
211
  end
205
212
  o.on("--[no-]priority-mode",
@@ -325,11 +332,13 @@ module Test
325
332
 
326
333
  private
327
334
  def default_runner
335
+ runner = self.class.default_runner
328
336
  if ENV["EMACS"] == "t"
329
- self.class.runner(:emacs)
337
+ runner ||= self.class.runner(:emacs)
330
338
  else
331
- self.class.runner(:console)
339
+ runner ||= self.class.runner(:console)
332
340
  end
341
+ runner
333
342
  end
334
343
 
335
344
  def default_collector
@@ -23,14 +23,7 @@ module Test
23
23
  def include?(test)
24
24
  return true if(@filters.empty?)
25
25
  @filters.each do |filter|
26
- result = filter[test]
27
- if(result.nil?)
28
- next
29
- elsif(!result)
30
- return false
31
- else
32
- return true
33
- end
26
+ return false if filter[test] == false
34
27
  end
35
28
  true
36
29
  end
@@ -29,7 +29,7 @@ module Test
29
29
  add_load_path(@base) do
30
30
  froms = ["."] if froms.empty?
31
31
  test_suites = froms.collect do |from|
32
- test_suite = collect_recursive(from, find_test_cases)
32
+ test_suite = collect_recursive(resolve_path(from), find_test_cases)
33
33
  test_suite = nil if test_suite.tests.empty?
34
34
  test_suite
35
35
  end.compact
@@ -56,10 +56,9 @@ module Test
56
56
  end
57
57
 
58
58
  private
59
- def collect_recursive(name, already_gathered)
59
+ def collect_recursive(path, already_gathered)
60
60
  sub_test_suites = []
61
61
 
62
- path = resolve_path(name)
63
62
  if path.directory?
64
63
  directories, files = path.children.partition do |child|
65
64
  child.directory?
@@ -18,7 +18,19 @@ module Test
18
18
  "case" => Color.new("white", :bold => true) +
19
19
  Color.new("blue", :foreground => false),
20
20
  "suite" => Color.new("white", :bold => true) +
21
- Color.new("green", :foreground => false))
21
+ Color.new("green", :foreground => false),
22
+ "diff-inserted-tag" =>
23
+ Color.new("red", :bold => true),
24
+ "diff-deleted-tag" =>
25
+ Color.new("green", :bold => true),
26
+ "diff-difference-tag" =>
27
+ Color.new("cyan", :bold => true),
28
+ "diff-inserted" =>
29
+ Color.new("red", :foreground => false) +
30
+ Color.new("white", :bold => true),
31
+ "diff-deleted" =>
32
+ Color.new("green", :foreground => false) +
33
+ Color.new("white", :bold => true))
22
34
  end
23
35
 
24
36
  @@schemes = {}
@@ -35,7 +35,7 @@ module Test
35
35
 
36
36
  def grouped_operations(context_size=nil)
37
37
  context_size ||= 3
38
- _operations = operations
38
+ _operations = operations.dup
39
39
  _operations = [[:equal, 0, 0, 0, 0]] if _operations.empty?
40
40
  expand_edge_equal_operations!(_operations, context_size)
41
41
 
@@ -266,29 +266,187 @@ module Test
266
266
  end
267
267
  end
268
268
 
269
+ class UTF8Line
270
+ class << self
271
+ # from http://unicode.org/reports/tr11/
272
+ WIDE_CHARACTERS =
273
+ [0x1100..0x1159, 0x115F..0x115F, 0x2329..0x232A,
274
+ 0x2E80..0x2E99, 0x2E9B..0x2EF3, 0x2F00..0x2FD5,
275
+ 0x2FF0..0x2FFB, 0x3000..0x303E, 0x3041..0x3096,
276
+ 0x3099..0x30FF, 0x3105..0x312D, 0x3131..0x318E,
277
+ 0x3190..0x31B7, 0x31C0..0x31E3, 0x31F0..0x321E,
278
+ 0x3220..0x3243, 0x3250..0x32FE, 0x3300..0x4DB5,
279
+ 0x4E00..0x9FC3, 0xA000..0xA48C, 0xA490..0xA4C6,
280
+ 0xAC00..0xD7A3, 0xF900..0xFA2D, 0xFA30..0xFA6A,
281
+ 0xFA70..0xFAD9, 0xFE10..0xFE19, 0xFE30..0xFE52,
282
+ 0xFE54..0xFE66, 0xFE68..0xFE6B, 0xFF01..0xFF60,
283
+ 0xFFE0..0xFFE6, 0x20000..0x2FFFD, 0x30000..0x3FFFD,
284
+ ]
285
+
286
+ AMBIGUOUS =
287
+ [0x00A1..0x00A1, 0x00A4..0x00A4, 0x00A7..0x00A8,
288
+ 0x00AA..0x00AA, 0x00AD..0x00AE, 0x00B0..0x00B4,
289
+ 0x00B6..0x00BA, 0x00BC..0x00BF, 0x00C6..0x00C6,
290
+ 0x00D0..0x00D0, 0x00D7..0x00D8, 0x00DE..0x00E1,
291
+ 0x00E6..0x00E6, 0x00E8..0x00EA, 0x00EC..0x00ED,
292
+ 0x00F0..0x00F0, 0x00F2..0x00F3, 0x00F7..0x00FA,
293
+ 0x00FC..0x00FC, 0x00FE..0x00FE, 0x0101..0x0101,
294
+ 0x0111..0x0111, 0x0113..0x0113, 0x011B..0x011B,
295
+ 0x0126..0x0127, 0x012B..0x012B, 0x0131..0x0133,
296
+ 0x0138..0x0138, 0x013F..0x0142, 0x0144..0x0144,
297
+ 0x0148..0x014B, 0x014D..0x014D, 0x0152..0x0153,
298
+ 0x0166..0x0167, 0x016B..0x016B, 0x01CE..0x01CE,
299
+ 0x01D0..0x01D0, 0x01D2..0x01D2, 0x01D4..0x01D4,
300
+ 0x01D6..0x01D6, 0x01D8..0x01D8, 0x01DA..0x01DA,
301
+ 0x01DC..0x01DC, 0x0251..0x0251, 0x0261..0x0261,
302
+ 0x02C4..0x02C4, 0x02C7..0x02C7, 0x02C9..0x02CB,
303
+ 0x02CD..0x02CD, 0x02D0..0x02D0, 0x02D8..0x02DB,
304
+ 0x02DD..0x02DD, 0x02DF..0x02DF, 0x0300..0x036F,
305
+ 0x0391..0x03A1, 0x03A3..0x03A9, 0x03B1..0x03C1,
306
+ 0x03C3..0x03C9, 0x0401..0x0401, 0x0410..0x044F,
307
+ 0x0451..0x0451, 0x2010..0x2010, 0x2013..0x2016,
308
+ 0x2018..0x2019, 0x201C..0x201D, 0x2020..0x2022,
309
+ 0x2024..0x2027, 0x2030..0x2030, 0x2032..0x2033,
310
+ 0x2035..0x2035, 0x203B..0x203B, 0x203E..0x203E,
311
+ 0x2074..0x2074, 0x207F..0x207F, 0x2081..0x2084,
312
+ 0x20AC..0x20AC, 0x2103..0x2103, 0x2105..0x2105,
313
+ 0x2109..0x2109, 0x2113..0x2113, 0x2116..0x2116,
314
+ 0x2121..0x2122, 0x2126..0x2126, 0x212B..0x212B,
315
+ 0x2153..0x2154, 0x215B..0x215E, 0x2160..0x216B,
316
+ 0x2170..0x2179, 0x2190..0x2199, 0x21B8..0x21B9,
317
+ 0x21D2..0x21D2, 0x21D4..0x21D4, 0x21E7..0x21E7,
318
+ 0x2200..0x2200, 0x2202..0x2203, 0x2207..0x2208,
319
+ 0x220B..0x220B, 0x220F..0x220F, 0x2211..0x2211,
320
+ 0x2215..0x2215, 0x221A..0x221A, 0x221D..0x2220,
321
+ 0x2223..0x2223, 0x2225..0x2225, 0x2227..0x222C,
322
+ 0x222E..0x222E, 0x2234..0x2237, 0x223C..0x223D,
323
+ 0x2248..0x2248, 0x224C..0x224C, 0x2252..0x2252,
324
+ 0x2260..0x2261, 0x2264..0x2267, 0x226A..0x226B,
325
+ 0x226E..0x226F, 0x2282..0x2283, 0x2286..0x2287,
326
+ 0x2295..0x2295, 0x2299..0x2299, 0x22A5..0x22A5,
327
+ 0x22BF..0x22BF, 0x2312..0x2312, 0x2460..0x24E9,
328
+ 0x24EB..0x254B, 0x2550..0x2573, 0x2580..0x258F,
329
+ 0x2592..0x2595, 0x25A0..0x25A1, 0x25A3..0x25A9,
330
+ 0x25B2..0x25B3, 0x25B6..0x25B7, 0x25BC..0x25BD,
331
+ 0x25C0..0x25C1, 0x25C6..0x25C8, 0x25CB..0x25CB,
332
+ 0x25CE..0x25D1, 0x25E2..0x25E5, 0x25EF..0x25EF,
333
+ 0x2605..0x2606, 0x2609..0x2609, 0x260E..0x260F,
334
+ 0x2614..0x2615, 0x261C..0x261C, 0x261E..0x261E,
335
+ 0x2640..0x2640, 0x2642..0x2642, 0x2660..0x2661,
336
+ 0x2663..0x2665, 0x2667..0x266A, 0x266C..0x266D,
337
+ 0x266F..0x266F, 0x273D..0x273D, 0x2776..0x277F,
338
+ 0xE000..0xF8FF, 0xFE00..0xFE0F, 0xFFFD..0xFFFD,
339
+ 0xE0100..0xE01EF, 0xF0000..0xFFFFD, 0x100000..0x10FFFD,
340
+ ]
341
+
342
+ def wide_character?(character)
343
+ binary_search_ranges(character, WIDE_CHARACTERS) or
344
+ binary_search_ranges(character, AMBIGUOUS)
345
+ end
346
+
347
+ private
348
+ def binary_search_ranges(character, ranges)
349
+ if ranges.size.zero?
350
+ false
351
+ elsif ranges.size == 1
352
+ ranges[0].include?(character)
353
+ else
354
+ half = ranges.size / 2
355
+ range = ranges[half]
356
+ if range.include?(character)
357
+ true
358
+ elsif character < range.begin
359
+ binary_search_ranges(character, ranges[0...half])
360
+ else
361
+ binary_search_ranges(character, ranges[(half + 1)..-1])
362
+ end
363
+ end
364
+ end
365
+ end
366
+
367
+ def initialize(line)
368
+ @line = line
369
+ @characters = @line.unpack("U*")
370
+ end
371
+
372
+ def [](*args)
373
+ result = @characters[*args]
374
+ if result.respond_to?(:pack)
375
+ result.pack("U*")
376
+ else
377
+ result
378
+ end
379
+ end
380
+
381
+ def each(&block)
382
+ @characters.each(&block)
383
+ end
384
+
385
+ def size
386
+ @characters.size
387
+ end
388
+
389
+ def to_s
390
+ @line
391
+ end
392
+
393
+ def compute_width(start, _end)
394
+ width = 0
395
+ start.upto(_end - 1) do |i|
396
+ if self.class.wide_character?(@characters[i])
397
+ width += 2
398
+ else
399
+ width += 1
400
+ end
401
+ end
402
+ width
403
+ end
404
+ end
405
+
269
406
  class ReadableDiffer < Differ
270
407
  def diff(options={})
271
- result = []
272
- matcher = SequenceMatcher.new(@from, @to)
273
- matcher.operations.each do |args|
274
- tag, from_start, from_end, to_start, to_end = args
408
+ @result = []
409
+ operations.each do |tag, from_start, from_end, to_start, to_end|
275
410
  case tag
276
411
  when :replace
277
- result.concat(diff_lines(from_start, from_end, to_start, to_end))
412
+ diff_lines(from_start, from_end, to_start, to_end)
278
413
  when :delete
279
- result.concat(tag_deleted(@from[from_start...from_end]))
414
+ tag_deleted(@from[from_start...from_end])
280
415
  when :insert
281
- result.concat(tag_inserted(@to[to_start...to_end]))
416
+ tag_inserted(@to[to_start...to_end])
282
417
  when :equal
283
- result.concat(tag_equal(@from[from_start...from_end]))
418
+ tag_equal(@from[from_start...from_end])
284
419
  else
285
420
  raise "unknown tag: #{tag}"
286
421
  end
287
422
  end
288
- result
423
+ @result
289
424
  end
290
425
 
291
426
  private
427
+ def operations
428
+ @operations ||= nil
429
+ if @operations.nil?
430
+ matcher = SequenceMatcher.new(@from, @to)
431
+ @operations = matcher.operations
432
+ end
433
+ @operations
434
+ end
435
+
436
+ def default_ratio
437
+ 0.74
438
+ end
439
+
440
+ def cut_off_ratio
441
+ 0.75
442
+ end
443
+
444
+ def tag(mark, contents)
445
+ contents.each do |content|
446
+ @result << "#{mark}#{content}"
447
+ end
448
+ end
449
+
292
450
  def tag_deleted(contents)
293
451
  tag("- ", contents)
294
452
  end
@@ -306,7 +464,7 @@ module Test
306
464
  end
307
465
 
308
466
  def find_diff_line_info(from_start, from_end, to_start, to_end)
309
- best_ratio = 0.74
467
+ best_ratio = default_ratio
310
468
  from_equal_index = to_equal_index = nil
311
469
  from_best_index = to_best_index = nil
312
470
 
@@ -334,30 +492,31 @@ module Test
334
492
  end
335
493
 
336
494
  def diff_lines(from_start, from_end, to_start, to_end)
337
- cut_off = 0.75
338
-
339
495
  info = find_diff_line_info(from_start, from_end, to_start, to_end)
340
496
  best_ratio, from_equal_index, to_equal_index, *info = info
341
497
  from_best_index, to_best_index = info
498
+ from_best_index ||= from_start
499
+ to_best_index ||= to_start
342
500
 
343
- if best_ratio < cut_off
501
+ if best_ratio < cut_off_ratio
344
502
  if from_equal_index.nil?
345
- tagged_from = tag_deleted(@from[from_start...from_end])
346
- tagged_to = tag_inserted(@to[to_start...to_end])
347
503
  if to_end - to_start < from_end - from_start
348
- return tagged_to + tagged_from
504
+ tag_inserted(@to[to_start...to_end])
505
+ tag_deleted(@from[from_start...from_end])
349
506
  else
350
- return tagged_from + tagged_to
507
+ tag_deleted(@from[from_start...from_end])
508
+ tag_inserted(@to[to_start...to_end])
351
509
  end
510
+ return
352
511
  end
353
512
  from_best_index = from_equal_index
354
513
  to_best_index = to_equal_index
355
514
  best_ratio = 1.0
356
515
  end
357
516
 
358
- _diff_lines(from_start, from_best_index, to_start, to_best_index) +
359
- diff_line(@from[from_best_index], @to[to_best_index]) +
360
- _diff_lines(from_best_index + 1, from_end, to_best_index + 1, to_end)
517
+ _diff_lines(from_start, from_best_index, to_start, to_best_index)
518
+ diff_line(@from[from_best_index], @to[to_best_index])
519
+ _diff_lines(from_best_index + 1, from_end, to_best_index + 1, to_end)
361
520
  end
362
521
 
363
522
  def _diff_lines(from_start, from_end, to_start, to_end)
@@ -372,26 +531,54 @@ module Test
372
531
  end
373
532
  end
374
533
 
534
+ def line_operations(from_line, to_line)
535
+ if !from_line.respond_to?(:force_encoding) and $KCODE == "UTF8"
536
+ from_line = UTF8Line.new(from_line)
537
+ to_line = UTF8Line.new(to_line)
538
+ end
539
+ matcher = SequenceMatcher.new(from_line, to_line,
540
+ &method(:space_character?))
541
+ [from_line, to_line, matcher.operations]
542
+ end
543
+
544
+ def compute_width(line, start, _end)
545
+ if line.respond_to?(:encoding) and
546
+ Encoding.compatible?(Encoding::UTF_8, line.encoding)
547
+ utf8_line = line[start..._end].encode(Encoding::UTF_8)
548
+ width = 0
549
+ utf8_line.each_codepoint do |unicode_codepoint|
550
+ if UTF8Line.wide_character?(unicode_codepoint)
551
+ width += 2
552
+ else
553
+ width += 1
554
+ end
555
+ end
556
+ width
557
+ elsif line.is_a?(UTF8Line)
558
+ line.compute_width(start, _end)
559
+ else
560
+ _end - start
561
+ end
562
+ end
563
+
375
564
  def diff_line(from_line, to_line)
376
565
  from_tags = ""
377
566
  to_tags = ""
378
- matcher = SequenceMatcher.new(from_line, to_line,
379
- &method(:space_character?))
380
- operations = matcher.operations
381
- operations.each do |tag, from_start, from_end, to_start, to_end|
382
- from_length = from_end - from_start
383
- to_length = to_end - to_start
567
+ from_line, to_line, _operations = line_operations(from_line, to_line)
568
+ _operations.each do |tag, from_start, from_end, to_start, to_end|
569
+ from_width = compute_width(from_line, from_start, from_end)
570
+ to_width = compute_width(to_line, to_start, to_end)
384
571
  case tag
385
572
  when :replace
386
- from_tags << "^" * from_length
387
- to_tags << "^" * to_length
573
+ from_tags << "^" * from_width
574
+ to_tags << "^" * to_width
388
575
  when :delete
389
- from_tags << "-" * from_length
576
+ from_tags << "-" * from_width
390
577
  when :insert
391
- to_tags << "+" * to_length
578
+ to_tags << "+" * to_width
392
579
  when :equal
393
- from_tags << " " * from_length
394
- to_tags << " " * to_length
580
+ from_tags << " " * from_width
581
+ to_tags << " " * to_width
395
582
  else
396
583
  raise "unknown tag: #{tag}"
397
584
  end
@@ -409,13 +596,12 @@ module Test
409
596
 
410
597
  result = tag_deleted([from_line])
411
598
  unless from_tags.empty?
412
- result.concat(tag_difference(["#{"\t" * common}#{from_tags}"]))
599
+ tag_difference(["#{"\t" * common}#{from_tags}"])
413
600
  end
414
- result.concat(tag_inserted([to_line]))
601
+ tag_inserted([to_line])
415
602
  unless to_tags.empty?
416
- result.concat(tag_difference(["#{"\t" * common}#{to_tags}"]))
603
+ tag_difference(["#{"\t" * common}#{to_tags}"])
417
604
  end
418
- result
419
605
  end
420
606
 
421
607
  def n_leading_characters(string, character)