docdiff 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -7
  3. data/Guardfile +4 -4
  4. data/Makefile +6 -7
  5. data/README.md +1 -0
  6. data/README_ja.md +1 -0
  7. data/Rakefile +6 -6
  8. data/bin/docdiff +2 -209
  9. data/devutil/Rakefile +12 -5
  10. data/devutil/char_by_charclass.rb +43 -20
  11. data/devutil/charclass_by_char.rb +40 -19
  12. data/devutil/jis0208.rb +263 -231
  13. data/devutil/jis0208_test.rb +196 -0
  14. data/doc/news.md +17 -0
  15. data/docdiff.gemspec +13 -10
  16. data/lib/doc_diff.rb +63 -98
  17. data/lib/docdiff/charstring.rb +225 -241
  18. data/lib/docdiff/cli.rb +316 -0
  19. data/lib/docdiff/diff/contours.rb +1 -1
  20. data/lib/docdiff/diff/editscript.rb +1 -1
  21. data/lib/docdiff/diff/rcsdiff.rb +1 -1
  22. data/lib/docdiff/diff/shortestpath.rb +1 -1
  23. data/lib/docdiff/diff/speculative.rb +1 -1
  24. data/lib/docdiff/diff/subsequence.rb +1 -1
  25. data/lib/docdiff/diff/unidiff.rb +1 -1
  26. data/lib/docdiff/diff.rb +1 -1
  27. data/lib/docdiff/difference.rb +71 -70
  28. data/lib/docdiff/document.rb +129 -109
  29. data/lib/docdiff/encoding/en_ascii.rb +64 -58
  30. data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
  31. data/lib/docdiff/encoding/ja_sjis.rb +240 -226
  32. data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
  33. data/lib/docdiff/version.rb +1 -1
  34. data/lib/docdiff/view.rb +523 -427
  35. data/lib/docdiff.rb +2 -2
  36. data/test/charstring_test.rb +475 -351
  37. data/test/cli_test.rb +314 -0
  38. data/test/diff_test.rb +15 -16
  39. data/test/difference_test.rb +40 -31
  40. data/test/docdiff_test.rb +162 -159
  41. data/test/document_test.rb +280 -175
  42. data/test/fixture/format_wdiff.conf +1 -0
  43. data/test/fixture/simple.conf +9 -0
  44. data/test/test_helper.rb +2 -1
  45. data/test/view_test.rb +636 -497
  46. metadata +27 -9
  47. data/devutil/testjis0208.rb +0 -38
@@ -0,0 +1,316 @@
1
+ require "optparse"
2
+
3
+ class DocDiff
4
+ module CLI
5
+ class << self
6
+ def parse_options!(args, base_options: {})
7
+ o = base_options.dup
8
+
9
+ option_parser = OptionParser.new do |parser|
10
+ parser.on(
11
+ "--resolution=RESOLUTION",
12
+ resolutions = ["line", "word", "char"],
13
+ "specify resolution (granularity)",
14
+ "#{resolutions.join("|")} (default: word)",
15
+ ) { |s| o[:resolution] = (s || "word") }
16
+ parser.on("--line", "same as --resolution=line") { o[:resolution] = "line" }
17
+ parser.on("--word", "same as --resolution=word") { o[:resolution] = "word" }
18
+ parser.on("--char", "same as --resolution=char") { o[:resolution] = "char" }
19
+
20
+ parser.on(
21
+ '--encoding=ENCODING',
22
+ encodings = ['ASCII', 'EUC-JP', 'Shift_JIS', 'CP932', 'UTF-8', 'auto'],
23
+ encoding_aliases = {
24
+ "ascii" => "ASCII",
25
+ "euc-jp" => "EUC-JP",
26
+ "shift_jis" => "Shift_JIS",
27
+ "cp932" => "CP932",
28
+ "utf-8" => "UTF-8",
29
+ },
30
+ "specify character encoding",
31
+ "#{encodings.join("|")} (default: auto)",
32
+ "(try ASCII for single byte encodings such as ISO-8859)",
33
+ ) { |s| o[:encoding] = (s || "auto") }
34
+ parser.on("--ascii", "same as --encoding=ASCII") { o[:encoding] = "ASCII" }
35
+ parser.on("--iso8859", "same as --encoding=ASCII") { o[:encoding] = "ASCII" }
36
+ parser.on("--iso8859x", "same as --encoding=ASCII (deprecated)") { o[:encoding] = "ASCII" }
37
+ parser.on("--eucjp", "same as --encoding=EUC-JP") { o[:encoding] = "EUC-JP" }
38
+ parser.on("--sjis", "same as --encoding=Shift_JIS") { o[:encoding] = "Shift_JIS" }
39
+ parser.on("--cp932", "same as --encoding=CP932") { o[:encoding] = "CP932" }
40
+ parser.on("--utf8", "same as --encoding=UTF-8") { o[:encoding] = "UTF-8" }
41
+
42
+ parser.on(
43
+ "--eol=EOL",
44
+ eols = ["CR", "LF", "CRLF", "auto"],
45
+ eol_aliases = { "cr" => "CR", "lf" => "LF", "crlf" => "CRLF" },
46
+ "specify end-of-line character",
47
+ "#{eols.join("|")} (default: auto)",
48
+ ) { |s| o[:eol] = (s || "auto") }
49
+ parser.on("--cr", "same as --eol=CR") { o[:eol] = "CR" }
50
+ parser.on("--lf", "same as --eol=LF") { o[:eol] = "LF" }
51
+ parser.on("--crlf", "same as --eol=CRLF") { o[:eol] = "CRLF" }
52
+
53
+ parser.on(
54
+ "--format=FORMAT",
55
+ formats = ["tty", "manued", "html", "wdiff", "stat", "user"],
56
+ "specify output format",
57
+ "#{formats.join("|")} (default: html) (stat is deprecated)",
58
+ "(user tags can be defined in config file)",
59
+ ) { |s| o[:format] = (s || "manued") }
60
+ parser.on("--tty", "same as --format=tty") { o[:format] = "tty" }
61
+ parser.on("--manued", "same as --format=manued") { o[:format] = "manued" }
62
+ parser.on("--html", "same as --format=html") { o[:format] = "html" }
63
+ parser.on("--wdiff", "same as --format=wdiff") { o[:format] = "wdiff" }
64
+ parser.on("--stat", "same as --format=stat (not implemented) (deprecated)") { o[:format] = "stat" }
65
+
66
+ parser.on(
67
+ "--label LABEL",
68
+ "-L LABEL",
69
+ "use label instead of file name (not implemented; exists for compatibility with diff)",
70
+ ) do |s|
71
+ o[:label] ||= []
72
+ o[:label] << s
73
+ end
74
+
75
+ parser.on("--digest", "digest output, do not show all") { o[:digest] = true }
76
+ parser.on("--summary", "same as --digest") { o[:digest] = true }
77
+
78
+ parser.on(
79
+ "--display=DISPLAY",
80
+ display_types = ["inline", "block", "multi"],
81
+ "specify presentation type (effective only with digest; experimental feature)",
82
+ "#{display_types.join("|")} (default: inline) (multi is deprecated)",
83
+ ) { |s| o[:display] ||= s.downcase }
84
+
85
+ parser.on("--cache", "use file cache (not implemented) (deprecated)") { o[:cache] = true }
86
+ parser.on(
87
+ "--pager=PAGER",
88
+ String,
89
+ "specify pager (if available, $DOCDIFF_PAGER is used by default)",
90
+ ) { |s| o[:pager] = s }
91
+ parser.on("--no-pager", "do not use pager") { o[:pager] = false }
92
+ parser.on("--config-file=FILE", String, "specify config file to read") { |s| o[:config_file] = s }
93
+ parser.on("--no-config-file", "do not read config files") { o[:no_config_file] = true }
94
+ parser.on("--verbose", "run verbosely (not well-supported) (deprecated)") { o[:verbose] = true }
95
+
96
+ parser.on("--help", "show this message") do
97
+ puts parser
98
+ exit(0)
99
+ end
100
+ parser.on("--version", "show version") do
101
+ puts Docdiff::VERSION
102
+ exit(0)
103
+ end
104
+ parser.on("--license", "show license (deprecated)") do
105
+ puts DocDiff::License
106
+ exit(0)
107
+ end
108
+ parser.on("--author", "show author(s) (deprecated)") do
109
+ puts DocDiff::Author
110
+ exit(0)
111
+ end
112
+
113
+ parser.on_tail(
114
+ "When invoked as worddiff or chardiff, resolution will be set accordingly.",
115
+ "Config files: /etc/docdiff/docdiff.conf, ~/.config/docdiff/docdiff.conf (or ~/etc/docdiff/docdiff.conf (deprecated))",
116
+ )
117
+ end
118
+
119
+ option_parser.parse!(args)
120
+ o
121
+ end
122
+
123
+ def parse_config_file_content(content)
124
+ result = {}
125
+ return result if content.size <= 0
126
+
127
+ lines = content.dup.split(/\r\n|\r|\n/).compact
128
+ lines.collect! { |line| line.sub(/#.*$/, "") }
129
+ lines.collect!(&:strip)
130
+ lines.delete_if { |line| line == "" }
131
+ lines.each do |line|
132
+ raise 'line does not include " = ".' unless /[\s]+=[\s]+/.match(line)
133
+
134
+ name_src, value_src = line.split(/[\s]+=[\s]+/)
135
+ raise "Invalid name: #{name_src.inspect}" if /\s/.match(name_src)
136
+ raise "Invalid value: #{value_src.inspect}" unless value_src.is_a?(String)
137
+
138
+ name = name_src.intern
139
+ value = value_src
140
+ value = true if ["on", "yes", "true"].include?(value_src.downcase)
141
+ value = false if ["off", "no", "false"].include?(value_src.downcase)
142
+ value = value_src.to_i if /^[0-9]+$/.match(value_src)
143
+ result[name] = value
144
+ end
145
+ result
146
+ end
147
+
148
+ def read_config_from_file(filename)
149
+ content = nil
150
+ begin
151
+ File.open(filename, "r") { |f| content = f.read }
152
+ rescue => exception
153
+ raise exception
154
+ ensure
155
+ message =
156
+ case exception
157
+ in Errno::ENOENT
158
+ "config file not found: #{filename.inspect}"
159
+ in Errno::EACCES
160
+ "permission denied for reading: #{filename.inspect}"
161
+ else
162
+ "something unexpected happened: #{filename.inspect}"
163
+ end
164
+ if content
165
+ config = parse_config_file_content(content)
166
+ else
167
+ message = "config file empty: #{filename.inspect}"
168
+ end
169
+ end
170
+ [config, message]
171
+ end
172
+
173
+ def print_or_write_to_pager(content, pager)
174
+ if $stdout.tty? && pager.is_a?(String) && !pager.empty?
175
+ IO.popen(pager, "w") { |f| f.print(content) }
176
+ else
177
+ print(content)
178
+ end
179
+ end
180
+
181
+ def run
182
+ command_line_config = parse_options!(ARGV)
183
+
184
+ system_config =
185
+ unless command_line_config[:no_config_file]
186
+ possible_system_config_file_names = [
187
+ DocDiff::SystemConfigFileName,
188
+ ]
189
+ existing_system_config_file_names =
190
+ possible_system_config_file_names.select { |fn| File.exist?(fn) }
191
+ if existing_system_config_file_names.size >= 2
192
+ raise <<~EOS
193
+ More than one system config file found, using the first one: \
194
+ #{existing_system_config_file_names.inspect}
195
+ EOS
196
+ end
197
+
198
+ filename = existing_system_config_file_names.first
199
+ config, message = read_config_from_file(filename)
200
+ $stderr.print(message) if command_line_config[:verbose]
201
+ config
202
+ end
203
+
204
+ user_config =
205
+ unless command_line_config[:no_config_file]
206
+ possible_user_config_file_names = [
207
+ DocDiff::UserConfigFileName,
208
+ DocDiff::AltUserConfigFileName,
209
+ DocDiff::XDGUserConfigFileName,
210
+ ]
211
+ existing_user_config_file_names =
212
+ possible_user_config_file_names.select { |fn| File.exist?(fn) }
213
+ if existing_user_config_file_names.size >= 2
214
+ raise <<~EOS
215
+ Only one user config file can be used at the same time. \
216
+ Keep one and remove or rename the others: \
217
+ #{existing_user_config_file_names.inspect}
218
+ EOS
219
+ end
220
+
221
+ filename = existing_user_config_file_names.first
222
+ config, message = read_config_from_file(filename)
223
+ $stderr.print(message) if command_line_config[:verbose]
224
+ config
225
+ end
226
+
227
+ config_from_specified_file =
228
+ if (filename = command_line_config[:config_file])
229
+ config, message = read_config_from_file(filename)
230
+ $stderr.print(message) if command_line_config[:verbose] == true
231
+ config
232
+ end
233
+
234
+ config_from_program_name =
235
+ case File.basename($PROGRAM_NAME, ".*")
236
+ when "worddiff" then { resolution: "word" }
237
+ when "chardiff" then { resolution: "char" }
238
+ end
239
+
240
+ config_from_env_vars = {}
241
+ if (pager = ENV["DOCDIFF_PAGER"]) && !pager.empty?
242
+ config_from_env_vars[:pager] = pager
243
+ end
244
+
245
+ config_in_effect = DocDiff::DEFAULT_CONFIG.dup
246
+ config_in_effect.merge!(config_from_program_name) if config_from_program_name
247
+ config_in_effect.merge!(system_config) if system_config
248
+ config_in_effect.merge!(user_config) if user_config
249
+ config_in_effect.merge!(config_from_env_vars) if config_from_env_vars
250
+ config_in_effect.merge!(config_from_specified_file) if config_from_specified_file
251
+ config_in_effect.merge!(command_line_config) if command_line_config
252
+
253
+ docdiff = DocDiff.new(config: config_in_effect)
254
+
255
+ file1_content = nil
256
+ file2_content = nil
257
+ raise "Try `#{File.basename($PROGRAM_NAME)} --help' for more information." if ARGV[0].nil?
258
+ raise "Specify at least 2 target files." unless ARGV[0] && ARGV[1]
259
+
260
+ ARGV[0] = "/dev/stdin" if ARGV[0] == "-"
261
+ ARGV[1] = "/dev/stdin" if ARGV[1] == "-"
262
+ raise "No such file: #{ARGV[0]}." unless FileTest.exist?(ARGV[0])
263
+ raise "No such file: #{ARGV[1]}." unless FileTest.exist?(ARGV[1])
264
+ raise "#{ARGV[0]} is not readable." unless FileTest.readable?(ARGV[0])
265
+ raise "#{ARGV[1]} is not readable." unless FileTest.readable?(ARGV[1])
266
+
267
+ File.open(ARGV[0], "r") { |f| file1_content = f.read }
268
+ File.open(ARGV[1], "r") { |f| file2_content = f.read }
269
+
270
+ encoding1 = docdiff.config[:encoding]
271
+ encoding2 = docdiff.config[:encoding]
272
+ eol1 = docdiff.config[:eol]
273
+ eol2 = docdiff.config[:eol]
274
+
275
+ if docdiff.config[:encoding] == "auto"
276
+ encoding1 = DocDiff::CharString.guess_encoding(file1_content)
277
+ encoding2 = DocDiff::CharString.guess_encoding(file2_content)
278
+ if (encoding1 == "UNKNOWN") || (encoding2 == "UNKNOWN")
279
+ raise "Document encoding unknown (#{encoding1}, #{encoding2})."
280
+ elsif encoding1 != encoding2
281
+ raise "Document encoding mismatch (#{encoding1}, #{encoding2})."
282
+ end
283
+ end
284
+
285
+ if docdiff.config[:eol] == "auto"
286
+ eol1 = DocDiff::CharString.guess_eol(file1_content)
287
+ eol2 = DocDiff::CharString.guess_eol(file2_content)
288
+ if eol1.nil? || eol2.nil?
289
+ raise "Document eol is nil (#{eol1.inspect}, #{eol2.inspect}). The document might be empty."
290
+ elsif (eol1 == "UNKNOWN") || (eol2 == "UNKNOWN")
291
+ raise "Document eol unknown (#{eol1.inspect}, #{eol2.inspect})."
292
+ elsif eol1 != eol2
293
+ raise "Document eol mismatch (#{eol1}, #{eol2})."
294
+ end
295
+ end
296
+
297
+ doc1 = DocDiff::Document.new(file1_content, encoding1, eol1)
298
+ doc2 = DocDiff::Document.new(file2_content, encoding2, eol2)
299
+
300
+ output =
301
+ docdiff.run(
302
+ doc1,
303
+ doc2,
304
+ {
305
+ resolution: docdiff.config[:resolution],
306
+ format: docdiff.config[:format],
307
+ digest: docdiff.config[:digest],
308
+ display: docdiff.config[:display],
309
+ },
310
+ )
311
+
312
+ print_or_write_to_pager(output, docdiff.config[:pager])
313
+ end
314
+ end
315
+ end
316
+ end
@@ -380,4 +380,4 @@ class Diff
380
380
  end
381
381
  end
382
382
  end
383
- end # class DocDiff
383
+ end
@@ -147,4 +147,4 @@ class Diff
147
147
  end
148
148
  end
149
149
  end
150
- end # class DocDiff
150
+ end
@@ -106,4 +106,4 @@ class Diff
106
106
  end
107
107
  end
108
108
  end
109
- end # class DocDiff
109
+ end
@@ -92,4 +92,4 @@ class Diff
92
92
  end
93
93
  end
94
94
  end
95
- end # class DocDiff
95
+ end
@@ -40,4 +40,4 @@ class Diff
40
40
  end
41
41
  end
42
42
  end
43
- end # class DocDiff
43
+ end
@@ -38,4 +38,4 @@ class Diff
38
38
  end
39
39
  end
40
40
  end
41
- end # class DocDiff
41
+ end
@@ -122,4 +122,4 @@ class Diff
122
122
  end
123
123
  end
124
124
  end
125
- end # class DocDiff
125
+ end
data/lib/docdiff/diff.rb CHANGED
@@ -216,4 +216,4 @@ class Diff
216
216
  end
217
217
  end
218
218
  end
219
- end # class DocDiff
219
+ end
@@ -1,94 +1,95 @@
1
1
  # Difference class for DocDiff
2
- # 2003-03-24 ..
2
+ # 2003-03-24 ..
3
3
  # Hisashi MORITA
4
4
 
5
- require 'docdiff/diff'
5
+ require "docdiff/diff"
6
6
 
7
7
  class DocDiff
8
- class Difference < Array
8
+ class Difference < Array
9
+ # @resolution = nil # char, word, phrase, sentence, line, paragraph..
10
+ # @codeset = ''
11
+ # @eol_char = "\n"
12
+ # @source = 'source'
13
+ # @target = 'target'
14
+ # attr_accessor :resolution, :codeset, :eol_char, :source, :target
9
15
 
10
- # @resolution = nil # char, word, phrase, sentence, line, paragraph..
11
- # @codeset = ''
12
- # @eol_char = "\n"
13
- # @source = 'source'
14
- # @target = 'target'
15
- # attr_accessor :resolution, :codeset, :eol_char, :source, :target
16
+ def initialize(array1 = nil, array2 = nil)
17
+ if array1.nil? && array2.nil?
18
+ return []
19
+ end
16
20
 
17
- def initialize(array1 = nil, array2 = nil)
18
- if (array1 == nil) && (array2 == nil)
19
- return []
21
+ diff = Diff.new(array1, array2)
22
+ @raw_list = []
23
+ diff.ses.each do |block| # Diff::EditScript does not have each_with_index
24
+ @raw_list << block
25
+ end
26
+ combine_del_add_to_change!
20
27
  end
21
- diff = Diff.new(array1, array2)
22
- @raw_list = []
23
- diff.ses.each{|block| # Diff::EditScript does not have each_with_index()
24
- @raw_list << block
25
- }
26
- combine_del_add_to_change!()
27
- end
28
28
 
29
- def combine_del_add_to_change!()
30
-
31
- @raw_list.each_with_index{|block, i|
32
- case block.first
33
- when :common_elt_elt
34
- if i == 0 # first block
35
- self << block
36
- else # in-between or the last block
37
- if @raw_list[i - 1].first == :del_elt # previous block was del
29
+ def combine_del_add_to_change!
30
+ @raw_list.each_with_index do |block, i|
31
+ case block.first
32
+ when :common_elt_elt
33
+ if i == 0
34
+ # first block
35
+ self << block
36
+ elsif @raw_list[i - 1].first == :del_elt
37
+ # previous block was del
38
38
  self << @raw_list[i - 1]
39
39
  self << block
40
- else # previous block was add
40
+ else
41
+ # previous block was add
41
42
  self << block
42
43
  end
43
- end
44
- when :del_elt
45
- if i == (@raw_list.size - 1) # last block
46
- self << block
47
- else # first block or in-between
48
- # do nothing, let the next block to decide what to do
49
- end
50
- when :add_elt
51
- if i == 0 # first block
52
- self << block
53
- else # in-between or the last block
54
- if @raw_list[i - 1].first == :del_elt # previous block was del
44
+ when :del_elt
45
+ if i == (@raw_list.size - 1)
46
+ # last block
47
+ self << block
48
+ else
49
+ # do nothing, let the next block to decide what to do
50
+ end
51
+ when :add_elt
52
+ if i == 0
53
+ # first block
54
+ self << block
55
+ elsif @raw_list[i - 1].first == :del_elt
56
+ # previous block was del
55
57
  deleted = @raw_list[i - 1][1]
56
- added = @raw_list[i][2]
58
+ added = @raw_list[i][2]
57
59
  self << [:change_elt, deleted, added]
58
- else # previous block was common
60
+ else
61
+ # previous block was common
59
62
  self << block
60
63
  end
64
+ else
65
+ raise "the first element of the block #{i} is invalid: (#{block.first})\n"
61
66
  end
62
- else
63
- raise "the first element of the block #{i} is invalid: (#{block.first})\n"
64
67
  end
65
- }
66
- end
67
- attr_accessor :raw_list
68
+ end
69
+ attr_accessor :raw_list
68
70
 
69
- def former_only()
70
- elms = self.dup.delete_if{|e| e[0] == :add_elt}
71
- elms.collect!{|e|
72
- if e[0] == :change_elt
73
- [e[0], e[1], nil]
74
- else
75
- e
71
+ def former_only
72
+ elms = dup.delete_if { |e| e[0] == :add_elt }
73
+ elms.collect! do |e|
74
+ if e[0] == :change_elt
75
+ [e[0], e[1], nil]
76
+ else
77
+ e
78
+ end
76
79
  end
77
- }
78
- return elms
79
- end
80
+ elms
81
+ end
80
82
 
81
- def latter_only()
82
- elms = self.dup.delete_if{|e| e[0] == :del_elt}
83
- elms.collect!{|e|
84
- if e[0] == :change_elt
85
- [e[0], nil, e[2]]
86
- else
87
- e
83
+ def latter_only
84
+ elms = dup.delete_if { |e| e[0] == :del_elt }
85
+ elms.collect! do |e|
86
+ if e[0] == :change_elt
87
+ [e[0], nil, e[2]]
88
+ else
89
+ e
90
+ end
88
91
  end
89
- }
90
- return elms
92
+ elms
93
+ end
91
94
  end
92
-
93
- end # class Difference
94
- end # class DocDiff
95
+ end