docdiff 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -7
  3. data/Guardfile +4 -4
  4. data/Makefile +1 -1
  5. data/Rakefile +6 -6
  6. data/bin/docdiff +1 -1
  7. data/devutil/Rakefile +12 -5
  8. data/devutil/char_by_charclass.rb +43 -20
  9. data/devutil/charclass_by_char.rb +40 -19
  10. data/devutil/jis0208.rb +263 -231
  11. data/devutil/jis0208_test.rb +196 -0
  12. data/doc/news.md +8 -0
  13. data/docdiff.gemspec +12 -10
  14. data/lib/doc_diff.rb +59 -60
  15. data/lib/docdiff/charstring.rb +225 -241
  16. data/lib/docdiff/cli.rb +285 -250
  17. data/lib/docdiff/diff/contours.rb +1 -1
  18. data/lib/docdiff/diff/editscript.rb +1 -1
  19. data/lib/docdiff/diff/rcsdiff.rb +1 -1
  20. data/lib/docdiff/diff/shortestpath.rb +1 -1
  21. data/lib/docdiff/diff/speculative.rb +1 -1
  22. data/lib/docdiff/diff/subsequence.rb +1 -1
  23. data/lib/docdiff/diff/unidiff.rb +1 -1
  24. data/lib/docdiff/diff.rb +1 -1
  25. data/lib/docdiff/difference.rb +71 -70
  26. data/lib/docdiff/document.rb +129 -109
  27. data/lib/docdiff/encoding/en_ascii.rb +64 -58
  28. data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
  29. data/lib/docdiff/encoding/ja_sjis.rb +240 -226
  30. data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
  31. data/lib/docdiff/version.rb +1 -1
  32. data/lib/docdiff/view.rb +522 -438
  33. data/lib/docdiff.rb +2 -2
  34. data/test/charstring_test.rb +475 -351
  35. data/test/cli_test.rb +103 -101
  36. data/test/diff_test.rb +15 -16
  37. data/test/difference_test.rb +40 -31
  38. data/test/docdiff_test.rb +162 -136
  39. data/test/document_test.rb +280 -175
  40. data/test/test_helper.rb +2 -1
  41. data/test/view_test.rb +636 -497
  42. metadata +8 -8
  43. data/devutil/testjis0208.rb +0 -38
@@ -0,0 +1,196 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "test/unit"
5
+ require_relative "jis0208"
6
+
7
+ class TestCaseJIS0208 < Test::Unit::TestCase
8
+ # internal methods
9
+
10
+ def test_utf16_to_utf8
11
+ # "\xE3\x80\x80": U+3000 IDEOGRAPHIC SPACE in UTF-8
12
+ expected = String.new("\xE3\x80\x80", encoding: Encoding::ASCII_8BIT)
13
+ actual = JIS0208.new.utf16_to_utf8(["3000"].pack("H*"))
14
+ assert_equal(expected, actual)
15
+ end
16
+
17
+ def test_characters
18
+ expected = String.new("\xE3\x80\x80", encoding: Encoding::ASCII_8BIT)
19
+ actual = JIS0208.new.characters[1][1][:u8]
20
+ assert_equal(expected, actual)
21
+ end
22
+
23
+ def test_char
24
+ expected = "\\xe3\\x80\\x80"
25
+ actual = JIS0208.new.char(1, 1, "UTF-8")
26
+ assert_equal(expected, actual)
27
+ end
28
+
29
+ # EUC-JP expressions
30
+
31
+ def test_euc_ja_alnum
32
+ exps = JIS0208.new.euc_ja_alnum
33
+ expected = ["\\xa3\\xb0", "\\xa3\\xfa", 62]
34
+ assert_equal(expected, [exps.first, exps.last, exps.size])
35
+ end
36
+
37
+ def test_euc_ja_blank
38
+ exps = JIS0208.new.euc_ja_blank
39
+ expected = ["\\xa1\\xa1", "\\xa1\\xa1", 1]
40
+ assert_equal(expected, [exps.first, exps.last, exps.size])
41
+ end
42
+
43
+ def test_euc_ja_print
44
+ exps = JIS0208.new.euc_ja_print
45
+ expected = ["\\xa3\\xb0", "\\xa1\\xa1", 355]
46
+ assert_equal(expected, [exps.first, exps.last, exps.size])
47
+ end
48
+
49
+ def test_euc_ja_graph
50
+ exps = JIS0208.new.euc_ja_graph
51
+ expected = ["\\xa3\\xb0", "\\xa8\\xc0", 354]
52
+ assert_equal(expected, [exps.first, exps.last, exps.size])
53
+ end
54
+
55
+ def test_euc_ja_punct
56
+ exps = JIS0208.new.euc_ja_punct
57
+ expected = ["\\xa1\\xa2", "\\xa8\\xc0", 292]
58
+ assert_equal(expected, [exps.first, exps.last, exps.size])
59
+ end
60
+
61
+ def test_euc_ja_space
62
+ exps = JIS0208.new.euc_ja_space
63
+ expected = ["\\xa1\\xa1", "\\xa1\\xa1", 1]
64
+ assert_equal(expected, [exps.first, exps.last, exps.size])
65
+ end
66
+
67
+ def test_euc_hiragana
68
+ exps = JIS0208.new.euc_hiragana
69
+ expected = ["\\xa4\\xa1", "\\xa4\\xf3", 83]
70
+ assert_equal(expected, [exps.first, exps.last, exps.size])
71
+ end
72
+
73
+ def test_euc_katakana
74
+ exps = JIS0208.new.euc_katakana
75
+ expected = ["\\xa5\\xa1", "\\xa5\\xf6", 86]
76
+ assert_equal(expected, [exps.first, exps.last, exps.size])
77
+ end
78
+
79
+ def test_euc_kanji
80
+ exps = JIS0208.new.euc_kanji
81
+ expected = ["\\xb0\\xa1-\\xb0\\xfe", "\\xf4\\xa1-\\xf4\\xa6", 69]
82
+ assert_equal(expected, [exps.first, exps.last, exps.size])
83
+ end
84
+
85
+ # Shift-JIS (CP932) expressions
86
+
87
+ def test_sjis_ja_alnum
88
+ exps = JIS0208.new.sjis_ja_alnum
89
+ expected = ["\\x82\\x4f", "\\x82\\x9a", 62]
90
+ assert_equal(expected, [exps.first, exps.last, exps.size])
91
+ end
92
+
93
+ def test_sjis_ja_blank
94
+ exps = JIS0208.new.sjis_ja_blank
95
+ expected = ["\\x81\\x40", "\\x81\\x40", 1]
96
+ assert_equal(expected, [exps.first, exps.last, exps.size])
97
+ end
98
+
99
+ def test_sjis_ja_print
100
+ exps = JIS0208.new.sjis_ja_print
101
+ expected = ["\\x82\\x4f", "\\x81\\x40", 355]
102
+ assert_equal(expected, [exps.first, exps.last, exps.size])
103
+ end
104
+
105
+ def test_sjis_ja_graph
106
+ exps = JIS0208.new.sjis_ja_graph
107
+ expected = ["\\x82\\x4f", "\\x84\\xbe", 354]
108
+ assert_equal(expected, [exps.first, exps.last, exps.size])
109
+ end
110
+
111
+ def test_sjis_ja_punct
112
+ exps = JIS0208.new.sjis_ja_punct
113
+ expected = ["\\x81\\x41", "\\x84\\xbe", 292]
114
+ assert_equal(expected, [exps.first, exps.last, exps.size])
115
+ end
116
+
117
+ def test_sjis_ja_space
118
+ exps = JIS0208.new.sjis_ja_space
119
+ expected = ["\\x81\\x40", "\\x81\\x40", 1]
120
+ assert_equal(expected, [exps.first, exps.last, exps.size])
121
+ end
122
+
123
+ def test_sjis_hiragana
124
+ exps = JIS0208.new.sjis_hiragana
125
+ expected = ["\\x82\\x9f", "\\x82\\xf1", 83]
126
+ assert_equal(expected, [exps.first, exps.last, exps.size])
127
+ end
128
+
129
+ def test_sjis_katakana
130
+ exps = JIS0208.new.sjis_katakana
131
+ expected = ["\\x83\\x40", "\\x83\\x96", 86]
132
+ assert_equal(expected, [exps.first, exps.last, exps.size])
133
+ end
134
+
135
+ def test_sjis_kanji
136
+ exps = JIS0208.new.sjis_kanji
137
+ expected = ["\\x88\\x9f-\\x88\\xfc", "\\xea\\x9f-\\xea\\xa4", 69] # FIXME
138
+ assert_equal(expected, [exps.first, exps.last, exps.size])
139
+ end
140
+
141
+ # UTF-8 expressions
142
+
143
+ def test_utf8_ja_alnum
144
+ exps = JIS0208.new.utf8_ja_alnum
145
+ expected = ["\\xef\\xbc\\x90", "\\xef\\xbd\\x9a", 62]
146
+ assert_equal(expected, [exps.first, exps.last, exps.size])
147
+ end
148
+
149
+ def test_utf8_ja_blank
150
+ exps = JIS0208.new.utf8_ja_blank
151
+ expected = ["\\xe3\\x80\\x80", "\\xe3\\x80\\x80", 1]
152
+ assert_equal(expected, [exps.first, exps.last, exps.size])
153
+ end
154
+
155
+ def test_utf8_ja_print
156
+ exps = JIS0208.new.utf8_ja_print
157
+ expected = ["\\xef\\xbc\\x90", "\\xe3\\x80\\x80", 355]
158
+ assert_equal(expected, [exps.first, exps.last, exps.size])
159
+ end
160
+
161
+ def test_utf8_ja_graph
162
+ exps = JIS0208.new.utf8_ja_graph
163
+ expected = ["\\xef\\xbc\\x90", "\\xe2\\x95\\x82", 354]
164
+ assert_equal(expected, [exps.first, exps.last, exps.size])
165
+ end
166
+
167
+ def test_utf8_ja_punct
168
+ exps = JIS0208.new.utf8_ja_punct
169
+ expected = ["\\xe3\\x80\\x81", "\\xe2\\x95\\x82", 292]
170
+ assert_equal(expected, [exps.first, exps.last, exps.size])
171
+ end
172
+
173
+ def test_utf8_ja_space
174
+ exps = JIS0208.new.utf8_ja_space
175
+ expected = ["\\xe3\\x80\\x80", "\\xe3\\x80\\x80", 1]
176
+ assert_equal(expected, [exps.first, exps.last, exps.size])
177
+ end
178
+
179
+ def test_utf8_hiragana
180
+ exps = JIS0208.new.utf8_hiragana
181
+ expected = ["\\xe3\\x81\\x81", "\\xe3\\x82\\x93", 83]
182
+ assert_equal(expected, [exps.first, exps.last, exps.size])
183
+ end
184
+
185
+ def test_utf8_katakana
186
+ exps = JIS0208.new.utf8_katakana
187
+ expected = ["\\xe3\\x82\\xa1", "\\xe3\\x83\\xb6", 86]
188
+ assert_equal(expected, [exps.first, exps.last, exps.size])
189
+ end
190
+
191
+ def test_utf8_kanji
192
+ exps = JIS0208.new.utf8_kanji
193
+ expected = ["\\xe4\\xba\\x9c", "\\xe7\\x86\\x99", 6355]
194
+ assert_equal(expected, [exps.first, exps.last, exps.size])
195
+ end
196
+ end
data/doc/news.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # News
2
2
 
3
+ ### 0.6.6 (2026-02-18)
4
+
5
+ * User-visible changes:
6
+ - `--encoding` and `--eol` options now accept lowercase values, such as ascii or crlf, as well as ASCII or CRLF.
7
+ * Developer-related changes:
8
+ - Applied some lint (except for lib/docdiff/diff/).
9
+ - `make all` now generates docs, tarball, and gem, in addition to running tests.
10
+
3
11
  ### 0.6.5 (2025-12-29)
4
12
 
5
13
  * User-visible changes:
data/docdiff.gemspec CHANGED
@@ -1,5 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.unshift File.expand_path("../lib", __FILE__)
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../lib", __FILE__))
3
4
  require "docdiff/version"
4
5
 
5
6
  Gem::Specification.new do |s|
@@ -9,17 +10,18 @@ Gem::Specification.new do |s|
9
10
  s.authors = ["Hisashi Morita"]
10
11
  s.email = ["hisashim@icloud.com"]
11
12
  s.homepage = "https://github.com/hisashim/docdiff"
12
- s.summary = %q{Word-by-word diff}
13
- s.description = %q{DocDiff compares two text files and shows the
14
- difference. It can compare files word by word,
15
- character by character, or line by line. It has
16
- several output formats such as HTML, tty, Manued,
17
- or user-defined markup.}
13
+ s.summary = "Word-by-word diff"
14
+ s.description = <<~EOS.chomp
15
+ DocDiff compares two text files and shows the
16
+ difference. It can compare files word by word,
17
+ character by character, or line by line. It has
18
+ several output formats such as HTML, tty, Manued,
19
+ or user-defined markup.
20
+ EOS
18
21
 
19
22
  s.files = `git ls-files`.split("\n")
20
- s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
- s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
22
24
  s.require_paths = ["lib"]
23
- s.required_ruby_version = '>= 3.0'
25
+ s.required_ruby_version = ">= 3.0"
24
26
  s.add_development_dependency "test-unit", "~> 3"
25
27
  end
data/lib/doc_diff.rb CHANGED
@@ -1,35 +1,38 @@
1
1
  # DocDiff: word/character-oriented text comparison utility
2
2
  # Copyright (C) 2002-2011 Hisashi MORITA
3
3
  # Requirements: Ruby (>= 2.0)
4
- require 'docdiff/difference'
5
- require 'docdiff/document'
6
- require 'docdiff/view'
7
- require 'docdiff/cli'
4
+ require "docdiff/difference"
5
+ require "docdiff/document"
6
+ require "docdiff/view"
7
+ require "docdiff/cli"
8
8
 
9
9
  class DocDiff
10
-
11
- Author = "Copyright (C) 2002-2011 Hisashi MORITA.\n" +
12
- "diff library originates from Ruby/CVS by TANAKA Akira.\n"
13
- License = "This software is licensed under so-called modified BSD license.\n" +
14
- "See the document for detail.\n"
10
+ Author = <<~EOS
11
+ Copyright (C) 2002-2011 Hisashi MORITA.
12
+ diff library originates from Ruby/CVS by TANAKA Akira.
13
+ EOS
14
+ License = <<~EOS
15
+ This software is licensed under so-called modified BSD license.
16
+ See the document for detail.
17
+ EOS
15
18
  SystemConfigFileName = File.join(File::Separator, "etc", "docdiff", "docdiff.conf")
16
- UserConfigFileName = File.join(ENV['HOME'], "etc", "docdiff", "docdiff.conf")
17
- AltUserConfigFileName = File.join(ENV['HOME'], ".docdiff", "docdiff.conf")
19
+ UserConfigFileName = File.join(ENV["HOME"], "etc", "docdiff", "docdiff.conf")
20
+ AltUserConfigFileName = File.join(ENV["HOME"], ".docdiff", "docdiff.conf")
18
21
  XDGUserConfigFileName =
19
- if xdg_config_home = ENV['XDG_CONFIG_HOME'] && !xdg_config_home.empty?
20
- File.join(ENV['HOME'], xdg_config_home, "docdiff", "docdiff.conf")
22
+ if (xdg_config_home = ENV["XDG_CONFIG_HOME"]) && !xdg_config_home.empty?
23
+ File.join(ENV["HOME"], xdg_config_home, "docdiff", "docdiff.conf")
21
24
  else
22
- File.join(ENV['HOME'], ".config", "docdiff", "docdiff.conf")
25
+ File.join(ENV["HOME"], ".config", "docdiff", "docdiff.conf")
23
26
  end
24
27
  DEFAULT_CONFIG = {
25
- :resolution => "word",
26
- :encoding => "auto",
27
- :eol => "auto",
28
- :format => "html",
29
- :cache => true,
30
- :digest => false,
31
- :pager => nil,
32
- :verbose => false
28
+ resolution: "word",
29
+ encoding: "auto",
30
+ eol: "auto",
31
+ format: "html",
32
+ cache: true,
33
+ digest: false,
34
+ pager: nil,
35
+ verbose: false,
33
36
  }
34
37
 
35
38
  def initialize(config: {})
@@ -44,20 +47,17 @@ class DocDiff
44
47
  def compare_by_line_word(doc1, doc2)
45
48
  lines = compare_by_line(doc1, doc2)
46
49
  words = Difference.new
47
- lines.each{|line|
50
+ lines.each do |line|
48
51
  if line.first == :change_elt
49
- before_change = Document.new(line[1].join,
50
- doc1.encoding, doc1.eol)
51
- after_change = Document.new(line[2].join,
52
- doc2.encoding, doc2.eol)
53
- Difference.new(before_change.split_to_word,
54
- after_change.split_to_word).each{|word|
52
+ before_change = Document.new(line[1].join, doc1.encoding, doc1.eol)
53
+ after_change = Document.new(line[2].join, doc2.encoding, doc2.eol)
54
+ Difference.new(before_change.split_to_word, after_change.split_to_word).each do |word|
55
55
  words << word
56
- }
57
- else # :common_elt_elt, :del_elt, or :add_elt
56
+ end
57
+ else # :common_elt_elt, :del_elt, or :add_elt
58
58
  words << line
59
59
  end
60
- }
60
+ end
61
61
  words
62
62
  end
63
63
 
@@ -65,32 +65,29 @@ class DocDiff
65
65
  def compare_by_line_word_char(doc1, doc2)
66
66
  lines = compare_by_line(doc1, doc2)
67
67
  lines_and_words = Difference.new
68
- lines.each{|line|
68
+ lines.each do |line|
69
69
  if line.first == :change_elt
70
- before_change = Document.new(line[1].join,
71
- doc1.encoding, doc1.eol)
72
- after_change = Document.new(line[2].join,
73
- doc2.encoding, doc2.eol)
74
- Difference.new(before_change.split_to_word,
75
- after_change.split_to_word).each{|word|
70
+ before_change = Document.new(line[1].join, doc1.encoding, doc1.eol)
71
+ after_change = Document.new(line[2].join, doc2.encoding, doc2.eol)
72
+ Difference.new(before_change.split_to_word, after_change.split_to_word).each do |word|
76
73
  lines_and_words << word
77
- }
74
+ end
78
75
  else # :common_elt_elt, :del_elt, or :add_elt
79
76
  lines_and_words << line
80
77
  end
81
- }
78
+ end
82
79
  lines_words_and_chars = Difference.new
83
- lines_and_words.each{|line_or_word|
80
+ lines_and_words.each do |line_or_word|
84
81
  if line_or_word.first == :change_elt
85
82
  before_change = Document.new(line_or_word[1].join, doc1.encoding, doc1.eol)
86
83
  after_change = Document.new(line_or_word[2].join, doc2.encoding, doc2.eol)
87
- Difference.new(before_change.split_to_char, after_change.split_to_char).each{|char|
84
+ Difference.new(before_change.split_to_char, after_change.split_to_char).each do |char|
88
85
  lines_words_and_chars << char
89
- }
86
+ end
90
87
  else # :common_elt_elt, :del_elt, or :add_elt
91
88
  lines_words_and_chars << line_or_word
92
89
  end
93
- }
90
+ end
94
91
  lines_words_and_chars
95
92
  end
96
93
 
@@ -98,10 +95,10 @@ class DocDiff
98
95
  raise "option is nil" if option.nil?
99
96
  raise "option[:resolution] is nil" if option[:resolution].nil?
100
97
  raise "option[:format] is nil" if option[:format].nil?
101
- case
102
- when doc1.class == Document && doc2.class == Document # OK
103
- when doc1.encoding != nil && doc2.encoding != nil # OK
104
- when doc1.encoding == doc2.encoding && doc1.eol == doc2.eol # OK
98
+
99
+ if doc1.class == Document && doc2.class == Document # OK
100
+ elsif !doc1.encoding.nil? && !doc2.encoding.nil? # OK
101
+ elsif doc1.encoding == doc2.encoding && doc1.eol == doc2.eol # OK
105
102
  else
106
103
  raise("Error! Blame the author (doc1: #{doc1.encoding}, #{doc1.eol}, doc2: #{doc2.encoding}, #{doc2.eol}).")
107
104
  end
@@ -114,16 +111,18 @@ class DocDiff
114
111
  raise "Unsupported resolution: #{option[:resolution].inspect}"
115
112
  end
116
113
  view = View.new(difference, doc1.encoding, doc1.eol)
117
- user_tags = {:start_common => (@config[:tag_common_start] ||= ''),
118
- :end_common => (@config[:tag_common_end] ||= ''),
119
- :start_del => (@config[:tag_del_start] ||= ''),
120
- :end_del => (@config[:tag_del_end] ||= ''),
121
- :start_add => (@config[:tag_add_start] ||= ''),
122
- :end_add => (@config[:tag_add_end] ||= ''),
123
- :start_before_change => (@config[:tag_change_before_start] ||= ''),
124
- :end_before_change => (@config[:tag_change_before_end] ||= ''),
125
- :start_after_change => (@config[:tag_change_after_start] ||= ''),
126
- :end_after_change => (@config[:tag_change_after_end] ||= '')}
114
+ user_tags = {
115
+ start_common: (@config[:tag_common_start] ||= ""),
116
+ end_common: (@config[:tag_common_end] ||= ""),
117
+ start_del: (@config[:tag_del_start] ||= ""),
118
+ end_del: (@config[:tag_del_end] ||= ""),
119
+ start_add: (@config[:tag_add_start] ||= ""),
120
+ end_add: (@config[:tag_add_end] ||= ""),
121
+ start_before_change: (@config[:tag_change_before_start] ||= ""),
122
+ end_before_change: (@config[:tag_change_before_end] ||= ""),
123
+ start_after_change: (@config[:tag_change_after_start] ||= ""),
124
+ end_after_change: (@config[:tag_change_after_end] ||= ""),
125
+ }
127
126
  case option[:digest]
128
127
  when true
129
128
  case option[:format]
@@ -150,4 +149,4 @@ class DocDiff
150
149
  end
151
150
  result.join
152
151
  end
153
- end # class DocDiff
152
+ end