docdiff 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1,375 @@
1
+ #!/usr/bin/ruby
2
+ # 2005-08-29..xx-xx-xx Hisashi Morita
3
+
4
+ require 'docdiff/difference'
5
+ require 'docdiff/document'
6
+ require 'docdiff/view'
7
+ require 'docdiff/charstring'
8
+
9
+ require "tempfile"
10
+
11
+ # $KCODE="e"
12
+
13
+ class String
14
+ def to_lines
15
+ scan(Regexp.new("(?:.*(?:\r\n|\r|\n|\z))", Regexp::MULTILINE))
16
+ end
17
+ end
18
+
19
+ def scan_text_for_diffs(src)
20
+ eol = "(?:\r\n|\n|\r)"
21
+ pats = {
22
+ :classic => "(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol}.+?(?=^[^-<>0-9 ]))",
23
+ :context => "(?:^\\*{3} .+?#{eol}--- .+?#{eol}.+?(?=^[^-+! *]|\\z))",
24
+ :unified => "(?:^--- .+?#{eol}^\\+{3} .+?#{eol}.+?(?=^[^-+ @]|\\z))"
25
+ }
26
+ src.scan(/(?:#{pats.values.join("|")})|(?:.*?#{eol}+)/m)
27
+ end
28
+
29
+ class DiffFile < Array
30
+
31
+ def initialize(src)
32
+ src.extend(CharString)
33
+ src.encoding = CharString.guess_encoding(src)
34
+ src.eol = CharString.guess_eol(src)
35
+ @src = src
36
+ end
37
+ attr_accessor :src
38
+
39
+ def guess_diff_type(text)
40
+ case
41
+ when (/^[<>] /m).match(text) then return "classic"
42
+ when (/^[-+!] /m).match(text) then return "context"
43
+ when (/^[-+]/m).match(text) then return "unified"
44
+ else return "unknown"
45
+ end
46
+ end
47
+
48
+ def anatomize
49
+ case guess_diff_type(@src)
50
+ when "classic" then return anatomize_classic(@src)
51
+ when "context" then return anatomize_context(@src)
52
+ when "unified" then return anatomize_unified(@src)
53
+ else
54
+ raise "unsupported diff format: \n#{src.inspect}"
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+ module ClassicDiff
61
+ def eol
62
+ "(?:\r\n|\n|\r)"
63
+ end
64
+ def noeol
65
+ "(?:[^\r\n])"
66
+ end
67
+ def hunk_header
68
+ "(?:[0-9]+(?:,[0-9]+)?[dac][0-9]+(?:,[0-9]+)?#{eol})"
69
+ end
70
+ def del
71
+ "(?:^< ?#{noeol}*?#{eol})"
72
+ end
73
+ def sep
74
+ "(?:^---#{eol})"
75
+ end
76
+ def add
77
+ "(?:^> ?#{noeol}*?#{eol})"
78
+ end
79
+ def change
80
+ "(?:#{del}+#{sep}#{add}+)"
81
+ end
82
+ def misc
83
+ "(?:.*?(?:#{eol}|\z))"
84
+ end
85
+ def hunk
86
+ "(?:#{hunk_header}(?:#{change}|#{del}+|#{add}+))"
87
+ end
88
+ def elements
89
+ "(?:#{hunk}|#{misc})"
90
+ end
91
+ end
92
+
93
+ def anatomize_classic(src)
94
+ self.extend ClassicDiff
95
+ diffed = []
96
+ src_encoding = CharString.guess_encoding(src)
97
+ src_eol = CharString.guess_eol(src)
98
+ src.scan(Regexp.new(elements, Regexp::MULTILINE)){|m|
99
+ case
100
+ when /\A[0-9]/.match(m) then # hunk
101
+ diffed.concat(anatomize_classic_hunk(m, src_encoding, src_eol))
102
+ else # not hunk
103
+ diffed.concat(Difference.new(m.split(/^/), m.split(/^/)))
104
+ end
105
+ }
106
+ return diffed
107
+ end
108
+
109
+ def anatomize_classic_hunk(a_hunk, src_encoding, src_eol)
110
+ self.extend ClassicDiff
111
+ diffed = []
112
+ a_hunk.scan(/(#{hunk_header})(#{change}|#{del}+|#{add}+)/){|n|
113
+ head, body = [$1, $2].collect{|e|
114
+ e.extend(CharString)
115
+ e.encoding, e.eol = src_encoding, src_eol
116
+ e
117
+ }
118
+ diffed.concat(Difference.new(head.to_words, head.to_words))
119
+ case
120
+ when /d/.match(head) # del
121
+ diffed.concat(Difference.new(body.to_words, []))
122
+ when /a/.match(head) # add
123
+ diffed.concat(Difference.new([], body.to_words))
124
+ when /c/.match(head) # change (need tweak)
125
+ former, latter = body.split(/#{sep}/).collect{|e|
126
+ e.extend(CharString)
127
+ e.encoding, e.eol = src_encoding, src_eol
128
+ e
129
+ }
130
+ d = Difference.new(former.to_words, latter.to_words)
131
+ diffed_former = d.former_only
132
+ diffed_latter = d.latter_only
133
+ sepstr = /#{sep}/.match(body).to_s.extend(CharString)
134
+ sepstr.encoding, sepstr.eol = src_encoding, src_eol
135
+ sepelm = Difference.new(sepstr.to_words, sepstr.to_words)
136
+ diffed.concat(diffed_former + sepelm + diffed_latter)
137
+ else
138
+ raise "invalid hunk header: #{head}"
139
+ end
140
+ }
141
+ return diffed
142
+ end
143
+
144
+ module ContextDiff
145
+ def eol
146
+ "(?:\r\n|\n|\r|\\z)"
147
+ end
148
+ def noneol
149
+ "(?:[^\r\n])"
150
+ end
151
+ def hunk_header
152
+ "(?:\\*+#{eol})"
153
+ end
154
+ def hunk_subheader_former
155
+ "(?:^\\*+ [0-9]+,[0-9]+ \\*+#{eol})"
156
+ end
157
+ def hunk_subheader_latter
158
+ "(?:^-+ [0-9]+,[0-9]+ -+#{eol})"
159
+ end
160
+ def del
161
+ "(?:^- #{noneol}*?#{eol})"
162
+ end
163
+ def add
164
+ "(?:^\\+ #{noneol}*?#{eol})"
165
+ end
166
+ def change
167
+ "(?:^! #{noneol}*?#{eol})"
168
+ end
169
+ def misc
170
+ "(?:^[^-+!*]+?#{eol}+?)"
171
+ end
172
+ def any
173
+ "(?:#{del}+|#{add}+|#{change}+|#{misc}+)"
174
+ end
175
+ def file_header
176
+ "(?:[-\\*]{3} #{noneol}+?#{eol})"
177
+ end
178
+ def elements
179
+ "(?:#{file_header}|#{hunk_header}#{hunk_subheader_former}#{any}*?#{hunk_subheader_latter}#{any}+|#{misc}|#{noneol}+#{eol})"
180
+ end
181
+ end
182
+
183
+ def anatomize_context(src)
184
+ self.extend ContextDiff
185
+ diffed = []
186
+ src_encoding = CharString.guess_encoding(src)
187
+ src_eol = CharString.guess_eol(src)
188
+ src.scan(/#{elements}/m){|m|
189
+ case
190
+ when /\A\*{10,}#{eol}^\*{3} /.match(m) then # hunk
191
+ diffed.concat(anatomize_context_hunk(m, src_encoding, src_eol))
192
+ else # not hunk
193
+ m.extend(CharString)
194
+ m.encoding, m.eol = src_encoding, src_eol
195
+ diffed.concat(Difference.new(m.to_words, m.to_words))
196
+ end
197
+ }
198
+ return diffed
199
+ end
200
+
201
+ def anatomize_context_hunk(a_hunk, src_encoding, src_eol)
202
+ self.extend ContextDiff
203
+ diffed = []
204
+ h, sh_f, body_f, sh_l, body_l = nil
205
+ a_hunk.scan(/(#{hunk_header})(#{hunk_subheader_former})(.*?)(#{hunk_subheader_latter})(.*?)\z/m){|m|
206
+ h, sh_f, body_f, sh_l, body_l = m[0..4].collect{|he|
207
+ if he
208
+ he.extend(CharString)
209
+ he.encoding, he.eol = src_encoding, src_eol
210
+ end
211
+ he
212
+ }
213
+ }
214
+ diffed_former, diffed_latter = anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
215
+ diffed.concat(Difference.new(h.to_words, h.to_words) +
216
+ Difference.new(sh_f.to_words, sh_f.to_words) +
217
+ diffed_former +
218
+ Difference.new(sh_l.to_words, sh_l.to_words) +
219
+ diffed_latter)
220
+ return diffed
221
+ end
222
+
223
+ def anatomize_context_hunk_scanbodies(body_f, body_l, src_encoding, src_eol)
224
+ body_f = '' if body_f.nil?
225
+ body_l = '' if body_l.nil?
226
+ self.extend ContextDiff
227
+ changes_org = [[], []]
228
+ changes_org[0], changes_org[1] = [body_f, body_l].collect{|b|
229
+ b.scan(/#{change}+/).collect{|ch|
230
+ if ch
231
+ ch.extend(CharString)
232
+ ch.encoding, ch.eol = src_encoding, src_eol
233
+ end
234
+ ch
235
+ }
236
+ }
237
+ changes = changes_org.dup
238
+ diffed = [[], []]
239
+ [body_f, body_l].each_with_index{|half, i|
240
+ changes[0], changes[1] = changes_org[0].dup, changes_org[1].dup
241
+ half.scan(/(#{del}+)|(#{add}+)|(#{change}+)|(#{misc}+)/m){|elm|
242
+ elm_d, elm_a, elm_c, elm_cmn = elm[0..3]
243
+ [elm_d, elm_a, elm_c, elm_cmn].collect{|e|
244
+ if e
245
+ e.extend(CharString)
246
+ e.encoding, e.eol = src_encoding, src_eol
247
+ end
248
+ e
249
+ }
250
+ case
251
+ when elm_d then d = Difference.new(elm_d.to_words, [])
252
+ when elm_a then d = Difference.new([], elm_a.to_words)
253
+ when elm_c then d = Difference.new(changes[0].shift.to_words, changes[1].shift.to_words)
254
+ case i
255
+ when 0 then d = d.former_only
256
+ when 1 then d = d.latter_only
257
+ else raise
258
+ end
259
+ when elm_cmn then d = Difference.new(elm_cmn.to_words, elm_cmn.to_words)
260
+ else
261
+ raise "bummers!"
262
+ end
263
+ diffed[i].concat(d)
264
+ } # end half.scan
265
+ } # end each_with_index
266
+ return diffed
267
+ end
268
+
269
+ module UnifiedDiff
270
+ def eol
271
+ "(?:\r\n|\n|\r|\z)"
272
+ end
273
+ def noneol
274
+ "(?:[^\r\n])"
275
+ end
276
+ def hunk_header
277
+ "(?:@@ #{noneol}+#{eol})"
278
+ end
279
+ def del
280
+ "(?:^-#{noneol}*?#{eol})"
281
+ end
282
+ def add
283
+ "(?:^\\+#{noneol}*?#{eol})"
284
+ end
285
+ def change
286
+ "(?:#{del}+#{add}+)"
287
+ end
288
+ def common
289
+ "(?:^ #{noneol}*?#{eol})"
290
+ end
291
+ def misc
292
+ "(?:^[^-+]+?#{eol}+?)"
293
+ end
294
+ def any
295
+ "(?:#{del}+|#{add}+|#{change}+|#{common}+|#{misc}+)"
296
+ end
297
+ def file_header
298
+ "(?:^[^-+@ ]#{noneol}+#{eol}(?:^[-+]{3} #{noneol}+#{eol}){2})"
299
+ end
300
+ def elements
301
+ "(?:#{file_header}|#{hunk_header}#{any}+?|#{misc}|#{noneol}+#{eol})"
302
+ end
303
+ end
304
+
305
+ def anatomize_unified(src)
306
+ self.extend UnifiedDiff
307
+ diffed = []
308
+ src_encoding = CharString.guess_encoding(src)
309
+ src_eol = CharString.guess_eol(src)
310
+ src.scan(/#{elements}/m){|m|
311
+ case
312
+ when /\A@@ /.match(m) then # hunk
313
+ diffed.concat(anatomize_unified_hunk(m.to_s, src_encoding, src_eol))
314
+ else # not hunk
315
+ m.extend(CharString)
316
+ m.encoding, m.eol = src_encoding, src_eol
317
+ diffed.concat(Difference.new(m.to_words, m.to_words))
318
+ end
319
+ }
320
+ return diffed
321
+ end
322
+
323
+ def anatomize_unified_hunk(a_hunk, src_encoding, src_eol)
324
+ self.extend UnifiedDiff
325
+ diffed = []
326
+ a_hunk.scan(/(#{hunk_header})(#{any}+#{eol}?)/m){|m|
327
+ head, body = m[0], m[1]
328
+ [head, body].collect{|e|
329
+ e.extend(CharString)
330
+ e.encoding, e.eol = src_encoding, src_eol
331
+ }
332
+ diffed.concat(Difference.new(head.to_words, head.to_words))
333
+ body.scan(/(#{del}+)(#{add}+)|(#{del}+#{eol}?)|(#{add}+)|(#{common}+#{eol}?)|(.*#{eol}?)/m){|m|
334
+ cf, cl, d, a, cmn, msc = m[0..5]
335
+ [cf, cl, d, a, cmn, msc].collect{|e|
336
+ e.extend(CharString)
337
+ e.encoding, e.eol = src_encoding, src_eol
338
+ }
339
+ case
340
+ when (cf and cl) then
341
+ Difference.new(cf.to_words, cl.to_words).each{|e|
342
+ case e.first
343
+ when :change_elt then diffed << [:change_elt, e[1], nil]
344
+ diffed << [:change_elt, nil, e[2]]
345
+ when :del_elt then diffed << [:change_elt, e[1], nil]
346
+ when :add_elt then diffed << [:change_elt, nil, e[2]]
347
+ when :common_elt_elt then diffed << e
348
+ else raise "bummers! (#{e.inspect})"
349
+ end
350
+ }
351
+ when d then diffed.concat(Difference.new(d.to_words, []))
352
+ when a then diffed.concat(Difference.new([], a.to_words))
353
+ when cmn then diffed.concat(Difference.new(cmn.to_words, cmn.to_words))
354
+ when msc then diffed.concat(Difference.new(msc.to_words, msc.to_words))
355
+ else raise "bummers! (#{m.inspect})"
356
+ end
357
+ }
358
+ }
359
+ return diffed
360
+ end
361
+
362
+ if $0 == __FILE__
363
+
364
+ src = ARGF.read
365
+ enc, eol = CharString.guess_encoding(src), CharString.guess_eol(src)
366
+ scan_text_for_diffs(src).each{|fragment|
367
+ if DiffFile.new('').guess_diff_type(fragment) == "unknown"
368
+ print fragment
369
+ else
370
+ diff = DiffFile.new(fragment).anatomize
371
+ print View.new(diff, enc, eol).to_tty
372
+ end
373
+ }
374
+
375
+ end
@@ -0,0 +1,713 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
5
+ <head>
6
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
7
+ <title>DocDiff Readme</title>
8
+ <style type="text/css">
9
+ body {
10
+ margin-left: 5%;
11
+ }
12
+ body > h1, body > h2 {
13
+ margin-left: -5%;
14
+ }
15
+ h1 {
16
+ border: medium outset;
17
+ padding: 0.5em;
18
+ }
19
+ h2 {
20
+ border-top: thin solid;
21
+ border-right: thin solid;
22
+ border-left: thick solid;
23
+ border-bottom: thin solid;
24
+ padding-left: 0.3em;
25
+ }
26
+ h3 {
27
+ border-left: thick solid;
28
+ padding-left: 0.3em;
29
+ }
30
+ /* these styles are not supported or do not fit to today's browsers.
31
+ h4 {display: run-in;}
32
+ h4:after {content: ": ";}
33
+ kbd {font-family: "Courier New", "Courier", monospace; font-weight: bolder;}
34
+ */
35
+ </style>
36
+ <style type="text/css">
37
+ li blockquote {margin-top: 0em; margin-bottom: 0em; padding-top: 0em; padding-bottom: 0em;}
38
+ li blockquote p {margin-top: 0em; margin-bottom: 0em; padding-top: 0em; padding-bottom: 0em;}
39
+ span.del {background: hotpink; border: thin inset;}
40
+ span.add {background: deepskyblue; font-weight: bolder; border: thin outset;}
41
+ span.before-change {background: yellow; border: thin inset;}
42
+ span.after-change {background: lime; font-weight: bolder; border: thin outset;}
43
+ li.entry .position {font-weight: bolder; margin-top: 0em; margin-bottom: 0em; padding-top: 0em; padding-bottom: 0em;}
44
+ li.entry .body {margin-top: 0em; margin-bottom: 0em; padding-top: 0em; padding-bottom: 0em;}
45
+ </style>
46
+ <style title="en" type="text/css">
47
+ * [lang="ja"] {display: none}
48
+ * [lang="en"] {display: compact}
49
+ </style>
50
+ <style title="ja" type="text/css">
51
+ * [lang="en"] {display: none}
52
+ * [lang="ja"] {display: compact}
53
+ </style>
54
+ </head>
55
+ <body>
56
+ <h1>DocDiff Readme</h1>
57
+ <p>
58
+ 2000-12-09..2011-02-23 Hisashi MORITA
59
+ </p>
60
+
61
+ <hr />
62
+
63
+ <h2>
64
+ <span lang="en">Table of Contents</span>
65
+ <span lang="ja">目次</span>
66
+ </h2>
67
+ <ul lang="en">
68
+ <li><a href="#news">News</a></li>
69
+ <li><a href="#todo">Todo</a></li>
70
+ <li><a href="#description">Description</a></li>
71
+ <li><a href="#summary">Summary</a></li>
72
+ <li><a href="#requirement">Requirement</a></li>
73
+ <li><a href="#installation">Installation</a></li>
74
+ <li><a href="#usage">Usage</a></li>
75
+ <li><a href="#tips">Troubleshooting and Tips</a></li>
76
+ <li><a href="#license">License</a></li>
77
+ <li><a href="#credits">Credits</a></li>
78
+ <li><a href="#acknowledgments">Acknowledgments</a></li>
79
+ <li><a href="#resources">Resources</a></li>
80
+ </ul>
81
+ <ul lang="ja">
82
+ <li><a href="#news">ニュース</a></li>
83
+ <li><a href="#todo">予定</a></li>
84
+ <li><a href="#description">簡単な説明</a></li>
85
+ <li><a href="#summary">概要</a></li>
86
+ <li><a href="#requirement">必要なもの</a></li>
87
+ <li><a href="#installation">インストール方法</a></li>
88
+ <li><a href="#usage">使い方</a></li>
89
+ <li><a href="#tips">問題解決とヒント</a></li>
90
+ <li><a href="#license">ライセンス</a></li>
91
+ <li><a href="#credits">クレジット</a></li>
92
+ <li><a href="#acknowledgments">謝辞</a></li>
93
+ <li><a href="#resources">情報源</a></li>
94
+ </ul>
95
+
96
+ <hr />
97
+
98
+ <h2 id="news">
99
+ <span lang="en">News</span>
100
+ <span lang="ja">ニュース</span>
101
+ </h2>
102
+ <ul>
103
+ <!--
104
+ <li>0.4.x (xxxx-xx-xx)<ul>
105
+ <li></li>
106
+ </ul></li>
107
+ -->
108
+ <li>0.4.0 (2011-02-23)<ul>
109
+ <li>Compatible with Ruby 1.9 (thanks to Kazuhiko).</li>
110
+ </ul></li>
111
+ <li>0.3.4 (2007-12-10)<ul>
112
+ <li>Increased context length in digest mode from 16 to 32.</li>
113
+ <li>Added --display=inline|multi option. With inline, things before change and things after change are displayed inline. With multi, they are displayed in separate blocks. Default is inline.</li>
114
+ <li>Added --iso8859x option as an alias to --encoding=ASCII, so that users notice DocDiff can handle text in ISO-8859-* encoding.</li>
115
+ </ul></li>
116
+ <li>0.3.3 (2006-02-03)<ul>
117
+ <li>Fixed arg test so that we can compare non-normal files, such as device files and named pipes (thanks to Shugo Maeda).</li>
118
+ <li>Added DocDiff Web UI sample (experimental).</li>
119
+ <li>Fixed HTML output to produce valid XHTML (thanks to Hiroshi OHKUBO). Note that CSS in HTML output is slightly changed.</li>
120
+ <li>Replaced underscores(_) in CSS class names to hyphens(-) so that older UAs can understand them (thanks to Kazuhiro NISHIYAMA).</li>
121
+ </ul></li>
122
+ <li>0.3.2 (2005-01-03)<ul>
123
+ <li>Readme is multilingualized (added partial Japanese translation). Try switching CSS between en and ja. Monolingual files are also available (readme.en.html, readme.ja.html).</li>
124
+ <li>Outputs better error messages when it failed to auto-detect the encoding and/or eol, though the accuracy is the same.</li>
125
+ <li>Switched revision control system from CVS to Subversion.</li>
126
+ </ul></li>
127
+ <li>0.3.1 (2004-08-29)<ul>
128
+ <li>Added -L (--label) option place holder in order to be used as external diff program from Subversion.</li>
129
+ </ul></li>
130
+ <li>0.3.0 (2004-05-29)<ul>
131
+ <li>Re-designed and re-written from scratch.</li>
132
+ <li>Supports multiple encodings (ASCII, EUC-JP, Shift_JIS, UTF-8) and multiple eols (CR, LF, CRLF).</li>
133
+ <li>Supports more output formats (tty, HTML, Manued, wdiff-like, user-defined markup text).</li>
134
+ <li>Supports configuration files (/etc/docdiff/docdiff.conf, ~/etc/docdiff/docdiff.conf (or ~/.docdiff/docdiff.conf)).</li>
135
+ <li>Introduced digest (summary) mode.</li>
136
+ <li>Approximately 200% faster than older versions, thanks to akr's diff library.</li>
137
+ <li>Better documentation and help message.</li>
138
+ <li>License changed from Ruby's to modified BSD style.</li>
139
+ <li>Pure Ruby. Does not require external diff program such as GNU diff, or morphological analyzer such as ChaSen.</li>
140
+ <li>Runs on both Unix and Windows (tested on Debian GNU/Linux and Cygwin)</li>
141
+ <li>Unit tests introduced to decrease bugs and to encourage faster development.</li>
142
+ <li>Makefile introduced.</li>
143
+ </ul></li>
144
+ <li>0.1.8 (2003-12-14)<ul>
145
+ <li>Displays warning when --bymorpheme is specified but ChaSen is not available (patch by Akira YAMADA: Debian bug #192258).</li>
146
+ <li>Supports system-wide configuration file (if ~/.chasenrc.docdiff does not exist, reads /etc/docdiff/chasenrc) (patch by Akira YAMADA: Debian bug #192261).</li>
147
+ </ul></li>
148
+ <li>0.1.7 (2003-11-21)<ul>
149
+ <li>HTML output retains spaces (&amp;nbsp; patch by Akira YAMADA). </li>
150
+ <li>Manued output is added. Use --manued command line option to get result in Manued-like format.</li>
151
+ <li>Fixed .chasenrc.docdiff to be compatible with the latest ChaSen, so that it does not cause error.</li>
152
+ <li>Alphabet words in the output may look ugly, since ChaSen does not keep spaces between alphabetical words recently.</li>
153
+ <li>Other minor bug fixes and code cleanup.</li>
154
+ </ul></li>
155
+ <li>0.2.0b2 (2001-08-31)<ul>
156
+ <li>Code cleanup.</li>
157
+ </ul></li>
158
+ <li>0.2.0b1 (2001-08-31)<ul>
159
+ <li>A bit faster than 0.1.x, using file cache.</li>
160
+ <li>A bit cleaner code.</li>
161
+ </ul></li>
162
+ <li>0.1.6 (2001-05-16)<ul>
163
+ <li>Increased diff option number from 100000 to 1000000 in order to support 900KB+ text files.</li>
164
+ </ul></li>
165
+ <li>0.1.5 (2001-01-17)<ul>
166
+ <li>Erased useless old code which were already commented out.</li>
167
+ <li>Added documentation. (Updated README, more comments)</li>
168
+ <li>First public release. Registered to RAA.</li>
169
+ </ul></li>
170
+ <li>0.1.4 (2001-01-16)<ul>
171
+ <li>Output is like &lt;tag&gt;ab&lt;/tag&gt;, instead of ugly &lt;tag&gt;a&lt;/tag&gt;&lt;tag&gt;b&lt;/tag&gt; (thanks again to Masatoshi Seki for suggestion).</li>
172
+ <li>Fixed hidden bug ('puts' was used to output result).</li>
173
+ <li>Some code clean-up, though still hairy enough.</li>
174
+ </ul></li>
175
+ <li>0.1.3 (2001-01-09)<ul>
176
+ <li>Tested with Ruby 1.6.2.</li>
177
+ <li>Fixed "meth(a,b,)" bug (thanks to Masatoshi Seki).</li>
178
+ <li>Switched development platform from Windows to Linux, but it should work fine on Windows too, except for ChaSen stuff.</li>
179
+ </ul></li>
180
+ <li>0.1.2 (2000-12-28)<ul>
181
+ <li>Mostly bug fix.</li>
182
+ </ul></li>
183
+ <li>0.1.1 (2000-12-25)<ul>
184
+ <li>Bug fix and some cleanup.</li>
185
+ <li>Quotes some of HTML special characters (&lt;&gt;&amp;) when output in HTML.</li>
186
+ <li>Added support for tty output using escape sequence.</li>
187
+ </ul></li>
188
+ <li>0.1.0 (2000-12-19)<ul>
189
+ <li>ChaSen works fine now.</li>
190
+ <li>GetOptLong was introduced to support command line options.</li>
191
+ </ul></li>
192
+ <li>0.1.0a1 (2000-12-16)<ul>
193
+ <li>Added ChaSen support. Japanese word by word comparison requires ChaSen.</li>
194
+ <li>Converted scripts from Shift_JIS/CRLF to EUC-JP/LF.</li>
195
+ </ul></li>
196
+ <li>0.0.2 (2000-12-10)<ul>
197
+ <li>Rewritten to use class.</li>
198
+ </ul></li>
199
+ <li>0.0.1 (2000-12-09)<ul>
200
+ <li>First version. Proof-of-concept.</li>
201
+ <li>Supports ASCII, EUC-JP, LF only.</li>
202
+ <li>Supports HTML output only.</li>
203
+ <li>Requires GNU diff.</li>
204
+ <li>Distributed under the same license as Ruby's</li>
205
+ </ul></li>
206
+ </ul>
207
+ <p>See the ChangeLog for detail.</p>
208
+
209
+ <h2 id="todo">
210
+ <span lang="en">Todo</span>
211
+ <span lang="ja">予定</span>
212
+ </h2>
213
+ <ul>
214
+ <li>Incorporate ignore space patch.</li>
215
+ <li>Better auto-recognition of encodings and eols.</li>
216
+ <li>Make CSS and tty escape sequence customizable in config files.</li>
217
+ <li>Better multilingualization using Ruby 1.9 feature.</li>
218
+ <li>Write "DocPatch".</li>
219
+ </ul>
220
+
221
+ <hr />
222
+
223
+ <h2 id="description">
224
+ <span lang="en">Description</span>
225
+ <span lang="ja">簡単な説明</span>
226
+ </h2>
227
+ <p lang="en">
228
+ Compares two text files by word, by character, or by line
229
+ </p>
230
+ <p lang="ja">
231
+ 2つのテキストファイルを単語ごと、文字ごと、あるいは行ごとに比較する
232
+ </p>
233
+
234
+ <h2 id="summary">
235
+ <span lang="en">Summary</span>
236
+ <span lang="ja">概要</span>
237
+ </h2>
238
+ <p lang="en">
239
+ DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as HTML, tty, Manued, or user-defined markup.
240
+ </p>
241
+ <p lang="ja">
242
+ DocDiffは2つのテキストファイルを比較してその違いを表示します。単語ごと、文字ごと、そして行ごとにファイルを比較できます。結果を出力する形式は、HTML, tty(文字端末向けのエスケープシーケンス), Manued(真鵺道という校正用のマークアップ形式)などが用意されており、ユーザ定義のタグを使うこともできます。
243
+ </p>
244
+ <p lang="en">
245
+ It supports several encodings and end-of-line characters, including ASCII (and other single byte encodings such as ISO-8859-*), UTF-8, EUC-JP, Shift_JIS, CR, LF, and CRLF.
246
+ </p>
247
+ <p lang="ja">
248
+ 次のエンコーディング(文字コード)と行末コード(改行文字)をサポートしています: ASCII(およびISO-8859-*などのシングルバイトエンコーディング), UTF-8, EUC-JP, Shift_JIS、そしてCR, LF, CRLF.
249
+ </p>
250
+
251
+ <hr />
252
+
253
+ <h2 id="requirement">
254
+ <span lang="en">Requirement</span>
255
+ <span lang="ja">必要なもの</span>
256
+ </h2>
257
+ <ul>
258
+ <li lang="en"><a href="http://www.ruby-lang.org">Ruby</a><br />
259
+ (Note that you may need additional ruby library such as iconv, if your OS's Ruby package does not include those.)
260
+ </li>
261
+ <li lang="ja"><a href="http://www.ruby-lang.org">Ruby</a><br />
262
+ (注意: あなたが使っているOS向けのRubyパッケージがiconvなどのライブラリを含んでいない場合は、別途それらを手に入れる必要があるかもしれません。)
263
+ </li>
264
+ </ul>
265
+
266
+ <h2 id="installation">
267
+ <span lang="en">Installation</span>
268
+ <span lang="ja">インストール方法</span>
269
+ </h2>
270
+ <p lang="en">
271
+ Note that you need appropriate permission for proper installation (you may have to have a root/administrator privilege).
272
+ </p>
273
+ <p lang="ja">
274
+ 注意: 正しくインストールするためには適切なファイルアクセス権限が必要です(管理者権限が必要な場合があります)。
275
+ </p>
276
+ <ol lang="en">
277
+ <li>
278
+ Place <samp>docdiff/</samp> directory and its contents to ruby library directory, so that ruby interpreter can load them.<br />
279
+ (e.g. <blockquote><p><samp># <kbd>cp -r docdiff /usr/lib/ruby/1.9.1</kbd></samp></p></blockquote>)
280
+ </li>
281
+ <li>
282
+ Place <samp>docdiff.rb</samp> in command binary directory.<br />
283
+ (e.g. <blockquote><p><samp># <kbd>cp docdiff.rb /usr/bin/</kbd></samp></p></blockquote>)<br />
284
+ (Optional) You may want to rename it to <samp>docdiff</samp>.<br />
285
+ (e.g. <blockquote><p><samp># <kbd>mv /usr/bin/docdiff.rb /usr/bin/docdiff</kbd></samp></p></blockquote>)<br />
286
+ (Optional) When invoked as <samp>chardiff</samp> or <samp>worddiff</samp>, docdiff runs with resolution set to <samp>char</samp> or <samp>word</samp>, respectively.<br />
287
+ (e.g. <blockquote><p><samp># <kbd>ln -s /usr/bin/docdiff.rb /usr/bin/chardiff.rb</kbd></samp><br />
288
+ <samp># <kbd>ln -s /usr/bin/docdiff.rb /usr/bin/worddiff.rb</kbd></samp></p></blockquote>)
289
+ </li>
290
+ <li>
291
+ Set appropriate permission.<br />
292
+ (e.g. <blockquote><p><samp># <kbd>chmod +x /usr/bin/docdiff.rb</kbd></samp></p></blockquote>)
293
+ </li>
294
+ <li>
295
+ (Optional) If you want site-wide configuration file, place <samp>docdiff.conf.example</samp> as <samp>/etc/docdiff/docdiff.conf</samp> and edit it.<br />
296
+ (e.g. <blockquote><p><samp># <kbd>cp docdiff.conf.example /etc/docdiff.conf</kbd><br />
297
+ # <kbd>$EDITOR /etc/docdiff.conf</kbd></samp></p></blockquote>)
298
+ </li>
299
+ <li>
300
+ (Optional) If you want per-user configuration file, place <samp>docdiff.conf.example</samp> as <samp>~/etc/docdiff/docdiff.conf</samp> and edit it.<br />
301
+ (e.g. <blockquote><p><samp>% <kbd>cp docdiff.conf.example ~/etc/docdiff.conf</kbd><br />
302
+ % <kbd>$EDITOR ~/etc/docdiff.conf</kbd></samp></p></blockquote>)
303
+ </li>
304
+ </ol>
305
+ <ol lang="ja">
306
+ <li>
307
+ <samp>docdiff/</samp>ディレクトリとその内容をrubyのライブラリを格納するディレクトリに配置して、Rubyインタプリタがdocdiffライブラリをロードできるようにする。<br />
308
+ (e.g. <blockquote><p><samp># <kbd>cp -r docdiff /usr/lib/ruby/1.9.1</kbd></samp></p></blockquote>)
309
+ </li>
310
+ <li>
311
+ <samp>docdiff.rb</samp>をコマンドを格納するディレクトリに配置する。<br />
312
+ (e.g. <blockquote><p><samp># <kbd>cp docdiff.rb /usr/bin/</kbd></samp></p></blockquote>)<br />
313
+ ここで必要に応じてファイル名を<samp>docdiff</samp>のように変更してもよい。<br />
314
+ (e.g. <blockquote><p><samp># <kbd>mv /usr/bin/docdiff.rb /usr/bin/docdiff</kbd></samp></p></blockquote>)
315
+ </li>
316
+ <li>
317
+ 適切なパーミッションを設定する。<br />
318
+ (e.g. <blockquote><p><samp># <kbd>chmod +x /usr/bin/docdiff.rb</kbd></samp></p></blockquote>)
319
+ </li>
320
+ <li>
321
+ (オプション) もしサイトの全ユーザに適用される設定ファイルが必要なら、<samp>docdiff.conf.example</samp>を<samp>/etc/docdiff/docdiff.conf</samp>として配置し、編集する。<br />
322
+ (e.g. <blockquote><p><samp># <kbd>cp docdiff.conf.example /etc/docdiff.conf</kbd><br />
323
+ # <kbd>$EDITOR /etc/docdiff.conf</kbd></samp></p></blockquote>)
324
+ </li>
325
+ <li>
326
+ (オプション) もしユーザごとの設定ファイルが必要なら、<samp>docdiff.conf.example</samp>を<samp>~/etc/docdiff/docdiff.conf</samp>として配置し、編集する。<br />
327
+ (e.g. <blockquote><p><samp>% <kbd>cp docdiff.conf.example ~/etc/docdiff.conf</kbd><br />
328
+ % <kbd>$EDITOR ~/etc/docdiff.conf</kbd></samp></p></blockquote>)
329
+ </li>
330
+ </ol>
331
+
332
+ <hr />
333
+
334
+ <h2 id="usage">
335
+ <span lang="en">Usage</span>
336
+ <span lang="ja">使い方</span>
337
+ </h2>
338
+
339
+ <h3>
340
+ <span lang="en">Synopsis</span>
341
+ <span lang="ja">概要</span>
342
+ </h3>
343
+ <blockquote><p>
344
+ <kbd>docdiff [options] <var>oldfile</var> <var>newfile</var></kbd><br />
345
+ e.g. <samp>% <kbd>docdiff old.txt new.txt > diff.html</kbd></samp>
346
+ </p></blockquote>
347
+ <p lang="en">
348
+ See the help message for detail (<kbd>docdiff --help</kbd>).
349
+ </p>
350
+ <p lang="ja">
351
+ 詳しくはヘルプメッセージを参照してください(<kbd>docdiff --help</kbd>)。
352
+ </p>
353
+
354
+ <h3>
355
+ <span lang="en">Example</span>
356
+ <span lang="ja">例</span>
357
+ </h3>
358
+ <blockquote>
359
+ <p lang="en"><samp>
360
+ % <kbd>cat sample/01.en.ascii.lf</kbd><br />
361
+ Hello, my name is Watanabe.<br />
362
+ I am just another Ruby porter.<br />
363
+ % <kbd>cat sample/02.en.ascii.lf</kbd><br />
364
+ Hello, my name is matz.<br />
365
+ It's me who has created Ruby. I am a Ruby hacker.<br />
366
+ % <kbd>docdiff sample/01.en.ascii.lf sample/02.en.ascii.lf</kbd><br />
367
+ <span class="common">Hello, my name is </span><span class="before-change"><del>Watanabe.</del></span><span class="after-change"><ins>matz.</ins></span><span class="common"><br /></span>
368
+ <span class="add"><ins>It's me who has created Ruby.&nbsp;&nbsp;</ins></span><span class="common">I am </span><span class="before-change"><del>just another </del></span><span class="after-change"><ins>a </ins></span><span class="common">Ruby </span><span class="before-change"><del>porter.</del></span><span class="after-change"><ins>hacker.</ins></span><span class="common"><br /></span>
369
+ %<br />
370
+ </samp></p>
371
+ <p lang="ja"><samp>
372
+ % <kbd>cat sample/01.ja.eucjp.lf</kbd><br />
373
+ こんにちは、私の名前はわたなべです。<br />
374
+ 私はJust Another Ruby Porterです。<br />
375
+ % <kbd>cat sample/02.ja.eucjp.lf</kbd><br />
376
+ こんばんは、私の名前はまつもとです。<br />
377
+ Rubyを作ったのは私です。私はRuby Hackerです。<br />
378
+ % <kbd>docdiff sample/01.ja.eucjp.lf sample/02.ja.eucjp.lf</kbd><br />
379
+ <span class="before-change"><del>こんにちは</del></span><span class="after-change"><ins>こんばんは</ins></span><span class="common">、私の</span><span class="before-change"><del>名前はわたなべです</del></span><span class="after-change"><ins>名前はまつもとです</ins></span><span class="common">。<br /></span>
380
+ <span class="add"><ins>Rubyを作ったのは私です。</ins></span><span class="common">私は</span><span class="del"><del>Just Another </del></span><span class="common">Ruby </span><span class="before-change"><del>Porter</del></span><span class="after-change"><ins>Hacker</ins></span><span class="common">です。<br /></span>
381
+ %<br />
382
+ </samp></p>
383
+ </blockquote>
384
+
385
+ <h3>
386
+ <span lang="en">Configuration</span>
387
+ <span lang="ja">設定</span>
388
+ </h3>
389
+ <p lang="en">
390
+ You can place configuration files at:
391
+ </p>
392
+ <p lang="ja">
393
+ 設定ファイルは次の場所に配置すると有効になります:
394
+ </p>
395
+ <ul lang="en">
396
+ <li><samp>/etc/docdiff/docdiff.conf</samp> (site-wide configuration)</li>
397
+ <li><samp>~/etc/docdiff/docdiff.conf</samp> (user configuration)<br />
398
+ (<samp>~/etc/docdiff/docdiff.conf</samp> is used by default in order to keep home directory clean, preventing dotfiles and dotdirs from scattering around. Alternatively, you can use <samp>~/.docdiff/docdiff.conf</samp> as user configuration file name, following the traditional Unix convention.)</li>
399
+ </ul>
400
+ <ul lang="ja">
401
+ <li><samp>/etc/docdiff/docdiff.conf</samp>(サイト全体の設定)</li>
402
+ <li><samp>~/etc/docdiff/docdiff.conf</samp>(ユーザごとの設定)<br />
403
+ (標準の設定では、ホームディレクトリにドットファイルをまき散らかさないために、<samp>~/etc/docdiff/docdiff.conf</samp>が使われます。または、昔ながらのUnixの慣習に従って<samp>~/.docdiff/docdiff.conf</samp>を使ってももちろん構いません。ただし両方同時には使えません)</li>
404
+ </ul>
405
+
406
+ <p lang="en">
407
+ Notation is as follows (also refer to the file <samp>docdiff.conf.example</samp> included in the distribution archive):
408
+ </p>
409
+ <p lang="ja">
410
+ 設定ファイルの記法は次のとおりです(<samp>docdiff.conf.example</samp>ファイルも参照してください。配布アーカイブに含まれているはずです):
411
+ </p>
412
+ <blockquote>
413
+ <p><code>
414
+ # comment<br />
415
+ key1 = value<br />
416
+ key2 = value<br />
417
+ ...
418
+ </code></p>
419
+ </blockquote>
420
+ <p lang="en">
421
+ Every value is treated as string, unless it seems like a number. In such case, value is treated as a number (usually an integer).
422
+ </p>
423
+ <p lang="ja">
424
+ 値(value)は、数字として解釈できるもの以外は、すべて文字列として扱われます。数字は数値(たいていは整数)として扱われます。
425
+ </p>
426
+
427
+ <h2 id="tips">
428
+ <span lang="en">Troubleshooting and Tips</span>
429
+ <span lang="ja">問題解決とヒント</span>
430
+ </h2>
431
+
432
+ <h3>
433
+ <span lang="en">wrong argument type nil (expected Module) (TypeError)</span>
434
+ <span lang="ja">wrong argument type nil (expected Module) (TypeError)</span>
435
+ </h3>
436
+ <p lang="en">
437
+ Sometimes DocDiff fails to auto-recognize encoding and/or end-of-line
438
+ character. You may get an error like this.
439
+ </p>
440
+ <p lang="ja">
441
+ DocDiffがたまにエンコーディングや行末文字の自動判定に失敗して、次のようなエラーを出力することがあります。
442
+ </p>
443
+ <blockquote><p><samp>
444
+ charstring.rb:47:in `extend': wrong argument type nil (expected Module) (TypeError)<br />
445
+ </samp></p></blockquote>
446
+ <p lang="en">
447
+ In such a case, try explicitly specifying encoding and end-of-line character (e.g. <kbd>docdiff --utf8 --crlf</kbd>).
448
+ </p>
449
+ <p lang="ja">
450
+ このような場合は、エンコーディングや行末文字を明示的に指定してみてください(e.g. <kbd>docdiff --utf8 --crlf</kbd>)。
451
+ </p>
452
+
453
+ <h3>
454
+ <span lang="en">Inappropriate Insertion / Deletion</span>
455
+ <span lang="ja">不適切な挿入と削除</span>
456
+ </h3>
457
+ <p lang="en">
458
+ When comparing space-separated texts (such as English or program source code), the word next to the end of line is sometimes unnecessarily deleted and inserted. This is due to the limitation of DocDiff's word splitter. It splits strings into words like the following.
459
+ </p>
460
+ <p lang="ja">
461
+ スペースで区切られたテキスト(英文やプログラムのソースコードなど)を比較しているときに、行末にある単語が、特に必要もないのにいったん削除されてからまた挿入されることがあります。これはDocDiffの単語分割機能に制限があるせいで起きます。テキストは次のように単語に分割されます。
462
+ </p>
463
+ <p>text 1:</p>
464
+ <blockquote><p><samp>
465
+ foo bar<br />
466
+ ("foo bar" =&gt; ["foo ", "bar"])
467
+ </samp></p></blockquote>
468
+ <p>text 2:</p>
469
+ <blockquote><p><samp>
470
+ foo<br />
471
+ bar<br />
472
+ ("foo\nbar" =&gt; ["foo", "\n", "bar"])
473
+ </samp></p></blockquote>
474
+ <p>comparison result:</p>
475
+ <blockquote><p><samp>
476
+ <del>foo </del><ins>foo</ins><ins><br />
477
+ </ins>bar<br />
478
+ ("&lt;del&gt;foo &lt;/del&gt;&lt;ins&gt;foo&lt;/ins&gt;&lt;ins&gt;\n&lt;/ins&gt;bar")
479
+ </samp></p></blockquote>
480
+ <p lang="en">
481
+ Foo is (unnecessarily) deleted and inserted at the same time.
482
+ </p>
483
+ <p lang="ja">
484
+ Fooは(必要もないのに)削除されると同時に挿入されています。
485
+ </p>
486
+ <p lang="en">
487
+ I would like to fix this sometime, but it's not easy. If you split single space as single element (i.e. <samp>["foo", " ", "bar"]</samp>), the word order of the comparison result will be less natural. Suggestions are welcome.
488
+ </p>
489
+ <p lang="ja">
490
+ 作者はこの問題をいつか解決したいと思っていますが、簡単ではなさそうです。もし空白を1つの要素として分割したなら(i.e. <samp>["foo", " ", "bar"]</samp>)、比較した結果出力される単語の並びが今よりも不自然になってしまいます。良い案があったら教えてください。
491
+ </p>
492
+
493
+ <h3>
494
+ <span lang="en">Using DocDiff with Version Control Systems</span>
495
+ <span lang="ja">DocDiffをバージョン管理システムと組み合わせて使う</span>
496
+ </h3>
497
+ <p lang="en">
498
+ If you want to use DocDiff as an external diff program from VCSs, the following may work.
499
+ </p>
500
+ <p lang="ja">
501
+ DocDiffをVCSの外部diffプログラムとして使いたければ、次のようにするとよいでしょう。
502
+ </p>
503
+ <dl>
504
+ <dt>Subversion</dt>
505
+ <dd>
506
+ <p>
507
+ <samp>% <kbd>svn diff --diff-cmd=docdiff --extensions "--ascii --lf --tty --digest"</kbd></samp>
508
+ </p>
509
+ </dd>
510
+ <dt>Git</dt>
511
+ <dd>
512
+ <p>
513
+ <samp>% <kbd>GIT_EXTERNAL_DIFF=~/bin/gitdocdiff.sh git diff</kbd></samp>
514
+ </p>
515
+ <p>
516
+ ~/bin/gitdocdiff.sh:
517
+ </p>
518
+ <pre>#!/bin/sh
519
+ docdiff --ascii --lf --tty --digest $2 $5</pre>
520
+ </dd>
521
+ </dl>
522
+ <p lang="en">
523
+ With zsh, you can use DocDiff or other utility to compare arbitrary sources. In the following example, we compare specific revision of foo.html in a repository with one on a website.
524
+ </p>
525
+ <p lang="ja">
526
+ zshを使えば、いろいろな場所にある文書をDocDiffや他のユーティリティで自由に比較できます。次の例ではリポジトリ内の特定のリビジョンのfoo.htmlとウェブサイト上のfoo.htmlとを比較しています。
527
+ </p>
528
+ <blockquote><p>CVS:<br />
529
+ <samp>% <kbd>docdiff =(cvs -Q update -p -r 1.3 foo.html) =(curl --silent http://www.example.org/foo.html)</kbd></samp>
530
+ </p></blockquote>
531
+ <blockquote><p>Subversion:<br />
532
+ <samp>% <kbd>docdiff =(svn cat -r3 http://svn.example.org/repos/foo.html) =(curl --silent http://www.example.org/foo.html)</kbd></samp>
533
+ </p></blockquote>
534
+ <h3>
535
+ <span lang="en">Comparing Non-plain Text Files Such As HTML or Microsoft Word Documents</span>
536
+ <span lang="ja">HTMLやWord文書などのプレーンテキストではないファイルを比較する</span>
537
+ </h3>
538
+ <p lang="en">
539
+ You can compare files other than plain text, such as HTML and Microsoft Word documents, if you use appropriate converter.
540
+ </p>
541
+ <p lang="ja">
542
+ 適切な変換ツールを使えば、HTMLやMicrosoft Word文書など、プレーンテキスト以外のファイルも比較できます。
543
+ </p>
544
+ <blockquote><p>
545
+ <span lang="en">Comparing the content of two HTML documents (without tags)</span>
546
+ <span lang="ja">HTML文書の内容(タグを除く)を比較</span>:<br />
547
+ <samp>% <kbd>docdiff =(w3m -dump -cols 10000 foo.html) =(w3m -dump -cols 10000 http://www.example.org/foo.html)</kbd></samp>
548
+ </p></blockquote>
549
+ <blockquote><p>
550
+ <span lang="en">Comparing the content of two Microsoft Word documents</span>
551
+ <span lang="ja">Microsoft Word文書の内容を比較</span>:<br />
552
+ <samp>% <kbd>docdiff =(wvWare foo.doc | w3m -T text/html -dump -cols 10000) =(wvWare bar.doc | w3m -T text/html -dump -cols 10000)</kbd></samp>
553
+ </p></blockquote>
554
+ <h3>
555
+ <span lang="en">Workaround for Latin-* (ISO-8859-*) encodings: Use ASCII</span>
556
+ <span lang="ja">Latin-* (ISO-8859-*) のための回避策: ASCIIを指定する</span>
557
+ </h3>
558
+ <p lang="en">
559
+ If you want to compare Latin-* (ISO-8859-*) texts, try using ASCII as their encoding. When ASCII is specified, DocDiff assumes single-byte characters.
560
+ </p>
561
+ <p lang="ja">
562
+ 文字コードがLatin-* (ISO-8859-*) のテキストを扱うときは、文字コードにASCIIを指定してみてください。ASCIIが指定されると、DocDiffは対象をシングルバイト文字のテキストとして扱います。
563
+ </p>
564
+ <blockquote><p>Comparing Latin-1 texts:<br />
565
+ <samp>% <kbd>docdiff --encoding=ASCII latin-1-old.txt latin-1-new.txt</kbd></samp>
566
+ </p></blockquote>
567
+
568
+ <hr />
569
+
570
+ <h2 id="license">
571
+ <span lang="en">License</span>
572
+ <span lang="ja">ライセンス</span>
573
+ </h2>
574
+ <p lang="en">
575
+ This software is distributed under so-called modified BSD style license (<a href="http://www.opensource.org/licenses/bsd-license.php">http://www.opensource.org/licenses/bsd-license.php</a> (without advertisement clause)). By contributing to this software, you agree that your contribution may be incorporated under the same license.
576
+ </p>
577
+ <p lang="ja">
578
+ このソフトウェアはいわゆる修正BSDスタイルライセンス(<a href="http://www.opensource.org/licenses/bsd-license.php">http://www.opensource.org/licenses/bsd-license.php</a>(広告条項なし))のもとで配布されています。このソフトウェアに貢献すると、あなたは貢献したものが同ライセンスのもとに取り込まれることに同意したとみなされます。
579
+ </p>
580
+ <p lang="en">
581
+ Copyright and condition of use of main portion of the source:
582
+ </p>
583
+ <p lang="ja">
584
+ ソースコードの主となる部分の著作権と使用条件は次のとおりです:
585
+ </p>
586
+
587
+ <blockquote>
588
+ <pre>
589
+ Copyright (C) Hisashi MORITA. All rights reserved.
590
+
591
+ Redistribution and use in source and binary forms, with or without
592
+ modification, are permitted provided that the following conditions
593
+ are met:
594
+ 1. Redistributions of source code must retain the above copyright
595
+ notice, this list of conditions and the following disclaimer.
596
+ 2. Redistributions in binary form must reproduce the above copyright
597
+ notice, this list of conditions and the following disclaimer in the
598
+ documentation and/or other materials provided with the distribution.
599
+ 3. Neither the name of the University nor the names of its contributors
600
+ may be used to endorse or promote products derived from this software
601
+ without specific prior written permission.
602
+
603
+ THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
604
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
605
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
606
+ ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
607
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
608
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
609
+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
610
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
611
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
612
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
613
+ SUCH DAMAGE.
614
+ </pre>
615
+ </blockquote>
616
+
617
+ <p lang="en">
618
+ diff library (<samp>docdiff/diff.rb</samp> and <samp>docdiff/diff/*</samp>) was originally a part of Ruby/CVS by Akira TANAKA. Ruby/CVS is licensed under modified BSD style license. See the following for detail.
619
+ </p>
620
+ <p lang="ja">
621
+ diffライブラリ(<samp>docdiff/diff.rb</samp>および<samp>docdiff/diff/*</samp>)は、もともと田中哲さんによるRuby/CVSの一部分でした。Ruby/CVSは修正BSDスタイルライセンスのもとで配布されています。詳細は次を参照してください。
622
+ </p>
623
+ <ul>
624
+ <li><a href="http://raa.ruby-lang.org/list.rhtml?name=ruby-cvs">
625
+ http://raa.ruby-lang.org/list.rhtml?name=ruby-cvs</a></li>
626
+ <li><a href="http://cvs.m17n.org/~akr/ruby-cvs/">
627
+ http://cvs.m17n.org/~akr/ruby-cvs/</a></li>
628
+ </ul>
629
+
630
+ <h2 id="credits">
631
+ <span lang="en">Credits</span>
632
+ <span lang="ja">クレジット</span>
633
+ </h2>
634
+ <ul>
635
+ <li>Hisashi MORITA (primary author)</li>
636
+ </ul>
637
+
638
+ <h2 id="acknowledgments">
639
+ <span lang="en">Acknowledgments</span>
640
+ <span lang="ja">謝辞</span>
641
+ </h2>
642
+ <ul>
643
+ <li>Akira TANAKA (diff library author)</li>
644
+ <li>Shin'ichiro HARA (initial idea and algorithm suggestion)</li>
645
+ <li>Masatoshi SEKI (patch)</li>
646
+ <li>Akira YAMADA (patch, Debian package)</li>
647
+ <li>Kenshi MUTO (testing, bug report, Debian package)</li>
648
+ <li>Kazuhiro NISHIYAMA (bug report)</li>
649
+ <li>Hiroshi OHKUBO (bug report)</li>
650
+ <li>Shugo MAEDA (bug report)</li>
651
+ <li>Kazuhiko (patch)</li>
652
+ </ul>
653
+
654
+ <hr />
655
+
656
+ <h2 id="resources">
657
+ <span lang="en">Resources</span>
658
+ <span lang="ja">情報源</span>
659
+ </h2>
660
+
661
+ <h3>
662
+ <span lang="en">Format</span>
663
+ <span lang="ja">フォーマット</span>
664
+ </h3>
665
+ <ul>
666
+ <li>HTML/XHTML<br />
667
+ <a href="http://www.w3.org">http://www.w3.org</a></li>
668
+ <li>tty (Graphic rendition using VT100 / ANSI escape sequence)<br />
669
+ VT100: <a href="http://vt100.net/docs/tp83/appendixb.html">
670
+ http://vt100.net/docs/tp83/appendixb.html</a><br />
671
+ ANSI: <a href="http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html">
672
+ http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html</a></li>
673
+ <li>Manued (Manuscript Editing language: a proofreading method for text)<br />
674
+ <a href="http://www.archi.is.tohoku.ac.jp/~yamauchi/otherprojects/manued/index.shtml">
675
+ http://www.archi.is.tohoku.ac.jp/~yamauchi/otherprojects/manued/index.shtml</a></li>
676
+ </ul>
677
+
678
+ <h3>
679
+ <span lang="en">Similar Software</span>
680
+ <span lang="ja">同様の目的を持ったソフトウェア</span>
681
+ </h3>
682
+ <p lang="en">
683
+ There are several other software that can compare text word by word and/or character by character.
684
+ </p>
685
+ <p lang="ja">
686
+ テキストを単語単位や文字単位で比較することができるソフトウェアは、ほかにもあります。
687
+ </p>
688
+ <ul>
689
+ <li>GNU wdiff (Seems to support single byte characters only.)<br />
690
+ <a href="http://www.gnu.org/directory/GNU/wdiff.html">
691
+ http://www.gnu.org/directory/GNU/wdiff.html</a></li>
692
+ <li>cdif by Kazumasa UTASHIRO (Supports several Japanese encodings.)<br />
693
+ <a href="http://srekcah.org/~utashiro/perl/scripts/cdif">
694
+ http://srekcah.org/~utashiro/perl/scripts/cdif</a></li>
695
+ <li>ediff for Emacsen<br />
696
+ <a href="http://www.xemacs.org/Documentation/packages/html/ediff.html">
697
+ http://www.xemacs.org/Documentation/packages/html/ediff.html</a></li>
698
+ <li>diff-detail for xyzzy, by Hiroshi OHKUBO<br />
699
+ <a href="http://ohkubo.s53.xrea.com/xyzzy/index.html#diff-detail">
700
+ http://ohkubo.s53.xrea.com/xyzzy/index.html#diff-detail</a></li>
701
+ <li>Manuediff (Outputs difference in Manued format.)<br />
702
+ <a href="http://hibiki.miyagi-ct.ac.jp/~suzuki/comp/export/manuediff.html">
703
+ http://hibiki.miyagi-ct.ac.jp/~suzuki/comp/export/manuediff.html</a></li>
704
+ <li>YASDiff (Yet Another Scheme powered diff) by Y. Fujisawa<br />
705
+ <a href="http://nnri.dip.jp/~yf/cgi-bin/yaswiki2.cgi?name=YASDiff&amp;parentid=0">
706
+ http://nnri.dip.jp/~yf/cgi-bin/yaswiki2.cgi?name=YASDiff&amp;parentid=0</a></li>
707
+ <li>WinMerge (GUI diff tool for Windows)<br />
708
+ <a href="http://winmerge.org/">http://winmerge.org/</a></li>
709
+ </ul>
710
+
711
+ <hr />
712
+ </body>
713
+ </html>