docdiff 0.5.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +5 -3
- data/Gemfile +1 -1
- data/Makefile +15 -19
- data/Rakefile +45 -10
- data/bin/docdiff +25 -13
- data/devutil/Rakefile +9 -0
- data/devutil/changelog.sh +40 -0
- data/docdiff.gemspec +4 -4
- data/docdiffwebui.cgi +1 -1
- data/langfilter.rb +1 -5
- data/lib/doc_diff.rb +5 -1
- data/lib/docdiff/charstring.rb +10 -285
- data/lib/docdiff/diff/contours.rb +2 -1
- data/lib/docdiff/diff/editscript.rb +2 -0
- data/lib/docdiff/diff/rcsdiff.rb +2 -0
- data/lib/docdiff/diff/shortestpath.rb +2 -0
- data/lib/docdiff/diff/speculative.rb +6 -3
- data/lib/docdiff/diff/subsequence.rb +2 -0
- data/lib/docdiff/diff/unidiff.rb +2 -1
- data/lib/docdiff/diff.rb +2 -0
- data/lib/docdiff/difference.rb +2 -0
- data/lib/docdiff/document.rb +2 -0
- data/lib/docdiff/encoding/en_ascii.rb +15 -40
- data/lib/docdiff/encoding/ja_eucjp.rb +15 -40
- data/lib/docdiff/encoding/ja_sjis.rb +15 -40
- data/lib/docdiff/encoding/ja_utf8.rb +15 -40
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +16 -14
- data/lib/docdiff.rb +1 -1
- data/readme.html +41 -4
- data/readme.md +185 -0
- data/test/charstring_test.rb +16 -26
- data/test/diff_test.rb +2 -1
- data/test/difference_test.rb +2 -1
- data/test/docdiff_test.rb +12 -3
- data/test/document_test.rb +7 -6
- data/test/view_test.rb +3 -1
- metadata +23 -34
- data/devutil/JIS0208.TXT +0 -6952
- data/lib/viewdiff.rb +0 -375
- data/test/viewdiff_test.rb +0 -908
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
# Japanese Shift_JIS encoding module for CharString
|
|
2
2
|
# 2003- Hisashi MORITA
|
|
3
3
|
|
|
4
|
+
# frozen_string_literal: false
|
|
5
|
+
|
|
6
|
+
class DocDiff
|
|
4
7
|
module CharString
|
|
5
8
|
module Shift_JIS
|
|
6
9
|
|
|
@@ -16,50 +19,21 @@ module CharString
|
|
|
16
19
|
SPACE = "\x09\x0a\x0b\x0c\x0d\x20"
|
|
17
20
|
BLANK = "\x09\x20"
|
|
18
21
|
DIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
|
|
19
|
-
|
|
20
|
-
"\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
|
|
21
|
-
"\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
|
|
22
|
-
"\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
|
|
23
|
-
"\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
|
|
24
|
-
"\x79\x7a"
|
|
25
|
-
ALNUM = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
|
|
26
|
-
"\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
|
|
22
|
+
UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
|
|
27
23
|
"\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
|
|
28
|
-
"\x55\x56\x57\x58\x59\x5a
|
|
29
|
-
|
|
30
|
-
"\x6f\x70\x71\x72\x73\x74
|
|
31
|
-
"\x79\x7a"
|
|
24
|
+
"\x55\x56\x57\x58\x59\x5a"
|
|
25
|
+
LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
|
|
26
|
+
"\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
|
|
27
|
+
"\x75\x76\x77\x78\x79\x7a"
|
|
28
|
+
ALPHA = UPPER + LOWER
|
|
29
|
+
ALNUM = DIGIT + ALPHA
|
|
32
30
|
PUNCT = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
|
|
33
31
|
"\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
|
|
34
32
|
"\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
|
|
35
33
|
"\x7d\x7e"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
|
|
40
|
-
"\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
|
|
41
|
-
"\x55\x56\x57\x58\x59\x5a"
|
|
42
|
-
PRINT = "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29" \
|
|
43
|
-
"\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33" \
|
|
44
|
-
"\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d" \
|
|
45
|
-
"\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47" \
|
|
46
|
-
"\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51" \
|
|
47
|
-
"\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b" \
|
|
48
|
-
"\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65" \
|
|
49
|
-
"\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
|
|
50
|
-
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79" \
|
|
51
|
-
"\x7a\x7b\x7c\x7d\x7e"
|
|
52
|
-
GRAPH = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
|
|
53
|
-
"\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34" \
|
|
54
|
-
"\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e" \
|
|
55
|
-
"\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48" \
|
|
56
|
-
"\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52" \
|
|
57
|
-
"\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c" \
|
|
58
|
-
"\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66" \
|
|
59
|
-
"\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" \
|
|
60
|
-
"\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a" \
|
|
61
|
-
"\x7b\x7c\x7d\x7e"
|
|
62
|
-
XDIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
|
|
34
|
+
GRAPH = DIGIT + UPPER + LOWER + PUNCT
|
|
35
|
+
PRINT = "\x20" + GRAPH
|
|
36
|
+
XDIGIT = DIGIT +
|
|
63
37
|
"\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
|
|
64
38
|
"\x65\x66"
|
|
65
39
|
JA_SPACE = "\x81\x40"
|
|
@@ -257,4 +231,5 @@ module CharString
|
|
|
257
231
|
CharString.register_encoding(self)
|
|
258
232
|
|
|
259
233
|
end # module SJIS
|
|
260
|
-
end
|
|
234
|
+
end # module CharString
|
|
235
|
+
end # class DocDiff
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
# Japanese UTF-8 encoding module for CharString
|
|
3
3
|
# 2003- Hisashi MORITA
|
|
4
4
|
|
|
5
|
+
# frozen_string_literal: false
|
|
6
|
+
|
|
7
|
+
class DocDiff
|
|
5
8
|
module CharString
|
|
6
9
|
module UTF8
|
|
7
10
|
|
|
@@ -17,50 +20,21 @@ module CharString
|
|
|
17
20
|
SPACE = "\x09\x0a\x0b\x0c\x0d\x20"
|
|
18
21
|
BLANK = "\x09\x20"
|
|
19
22
|
DIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
|
|
20
|
-
|
|
21
|
-
"\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
|
|
22
|
-
"\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
|
|
23
|
-
"\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
|
|
24
|
-
"\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
|
|
25
|
-
"\x79\x7a"
|
|
26
|
-
ALNUM = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
|
|
27
|
-
"\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
|
|
23
|
+
UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
|
|
28
24
|
"\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
|
|
29
|
-
"\x55\x56\x57\x58\x59\x5a
|
|
30
|
-
|
|
31
|
-
"\x6f\x70\x71\x72\x73\x74
|
|
32
|
-
"\x79\x7a"
|
|
25
|
+
"\x55\x56\x57\x58\x59\x5a"
|
|
26
|
+
LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
|
|
27
|
+
"\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
|
|
28
|
+
"\x75\x76\x77\x78\x79\x7a"
|
|
29
|
+
ALPHA = UPPER + LOWER
|
|
30
|
+
ALNUM = DIGIT + ALPHA
|
|
33
31
|
PUNCT = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
|
|
34
32
|
"\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
|
|
35
33
|
"\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
|
|
36
34
|
"\x7d\x7e"
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
|
|
41
|
-
"\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
|
|
42
|
-
"\x55\x56\x57\x58\x59\x5a"
|
|
43
|
-
PRINT = "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29" \
|
|
44
|
-
"\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33" \
|
|
45
|
-
"\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d" \
|
|
46
|
-
"\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47" \
|
|
47
|
-
"\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51" \
|
|
48
|
-
"\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b" \
|
|
49
|
-
"\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65" \
|
|
50
|
-
"\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
|
|
51
|
-
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79" \
|
|
52
|
-
"\x7a\x7b\x7c\x7d\x7e"
|
|
53
|
-
GRAPH = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
|
|
54
|
-
"\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34" \
|
|
55
|
-
"\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e" \
|
|
56
|
-
"\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48" \
|
|
57
|
-
"\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52" \
|
|
58
|
-
"\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c" \
|
|
59
|
-
"\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66" \
|
|
60
|
-
"\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" \
|
|
61
|
-
"\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a" \
|
|
62
|
-
"\x7b\x7c\x7d\x7e"
|
|
63
|
-
XDIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
|
|
35
|
+
GRAPH = DIGIT + UPPER + LOWER + PUNCT
|
|
36
|
+
PRINT = "\x20" + GRAPH
|
|
37
|
+
XDIGIT = DIGIT +
|
|
64
38
|
"\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
|
|
65
39
|
"\x65\x66"
|
|
66
40
|
JA_SPACE = "\xe3\x80\x80"
|
|
@@ -6971,4 +6945,5 @@ module CharString
|
|
|
6971
6945
|
CharString.register_encoding(self)
|
|
6972
6946
|
|
|
6973
6947
|
end # module UTF8
|
|
6974
|
-
end
|
|
6948
|
+
end # module CharString
|
|
6949
|
+
end # class DocDiff
|
data/lib/docdiff/version.rb
CHANGED
data/lib/docdiff/view.rb
CHANGED
|
@@ -21,6 +21,7 @@ class String
|
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
+
class DocDiff
|
|
24
25
|
class View
|
|
25
26
|
|
|
26
27
|
# EOL_CHARS_PAT = Regexp.new(/\r\n|\r(?!\n)|(?:\A|[^\r])\n/m)
|
|
@@ -84,25 +85,25 @@ class View
|
|
|
84
85
|
end
|
|
85
86
|
|
|
86
87
|
def encname_for_regexp(encname)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
end
|
|
90
|
-
if ruby_m17n?
|
|
91
|
-
# in 1.9.x, encoding names are deprecated except for N (ASCII-8BIT (binary))
|
|
92
|
-
nil
|
|
93
|
-
else
|
|
94
|
-
# in 1.8.x, U|E|S|N are accepted
|
|
95
|
-
encname.sub(/US-ASCII/i, 'none')
|
|
96
|
-
end
|
|
88
|
+
# in 1.9.x, encoding names are deprecated except for N (ASCII-8BIT (binary))
|
|
89
|
+
nil
|
|
97
90
|
end
|
|
98
91
|
|
|
99
92
|
CONTEXT_PRE_LENGTH = 32
|
|
100
93
|
CONTEXT_POST_LENGTH = 32
|
|
101
94
|
def apply_style_digest(tags, headfoot = true)
|
|
102
|
-
cxt_pre_pat
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
95
|
+
cxt_pre_pat =
|
|
96
|
+
if RUBY_VERSION >= "2.3.1"
|
|
97
|
+
Regexp.new('.{0,'+"#{CONTEXT_PRE_LENGTH}"+'}\Z', Regexp::MULTILINE)
|
|
98
|
+
else
|
|
99
|
+
Regexp.new('.{0,'+"#{CONTEXT_PRE_LENGTH}"+'}\Z', Regexp::MULTILINE, encname_for_regexp(@encoding))
|
|
100
|
+
end
|
|
101
|
+
cxt_post_pat =
|
|
102
|
+
if RUBY_VERSION >= "2.3.1"
|
|
103
|
+
Regexp.new('\A.{0,'+"#{CONTEXT_POST_LENGTH}"+'}', Regexp::MULTILINE)
|
|
104
|
+
else
|
|
105
|
+
Regexp.new('\A.{0,'+"#{CONTEXT_POST_LENGTH}"+'}', Regexp::MULTILINE, encname_for_regexp(@encoding))
|
|
106
|
+
end
|
|
106
107
|
display = (tags and tags[:display]) || 'inline'
|
|
107
108
|
result = []
|
|
108
109
|
d1l = doc1_line_number = 1
|
|
@@ -474,3 +475,4 @@ class View
|
|
|
474
475
|
end
|
|
475
476
|
|
|
476
477
|
end
|
|
478
|
+
end # class DocDiff
|
data/lib/docdiff.rb
CHANGED
data/readme.html
CHANGED
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
<body>
|
|
56
56
|
<h1>DocDiff Readme</h1>
|
|
57
57
|
<p>
|
|
58
|
-
2000
|
|
58
|
+
(C) 2000 Hisashi MORITA
|
|
59
59
|
</p>
|
|
60
60
|
|
|
61
61
|
<hr />
|
|
@@ -100,11 +100,42 @@
|
|
|
100
100
|
<span lang="ja">ニュース</span>
|
|
101
101
|
</h2>
|
|
102
102
|
<ul>
|
|
103
|
-
<!--
|
|
104
|
-
<li>
|
|
103
|
+
<!--
|
|
104
|
+
<li>x.y.z (xxxx-xx-xx)<ul>
|
|
105
105
|
<li></li>
|
|
106
106
|
</ul></li>
|
|
107
107
|
-->
|
|
108
|
+
<li>0.6.2 (2025-11-28)<ul>
|
|
109
|
+
<li>User-visible changes:<ul>
|
|
110
|
+
<li>Add support for stdin (-) in command line arguments (thanks to tamo)</li>
|
|
111
|
+
<li>Add --config-file option (thanks to tamo)</li>
|
|
112
|
+
<li>Fix incompatibility with Ruby 3.4 Regexp (thanks to yoshuki)</li>
|
|
113
|
+
<li>Resolve frozen literal warnings introduced by Ruby 3.4.</li>
|
|
114
|
+
</ul></li>
|
|
115
|
+
<li>Developer-related changes:<ul>
|
|
116
|
+
<li>Update email address in .gemspec.</li>
|
|
117
|
+
</ul></li>
|
|
118
|
+
</ul></li>
|
|
119
|
+
<li>0.6.1 (2021-06-07)<ul>
|
|
120
|
+
<li>Update information in .gemspec.</li>
|
|
121
|
+
</ul></li>
|
|
122
|
+
<li>0.6.0 (2020-07-10)<ul>
|
|
123
|
+
<li>User-visible changes:<ul>
|
|
124
|
+
<li>Drop support for Ruby 1.8 (thanks to takahashim).</li>
|
|
125
|
+
<li>Fix various encoding problems (thanks to takahashim).</li>
|
|
126
|
+
<li>Add CP932 (Windows-31J) support through a new option <code>--cp932</code> (thanks to emasaka).</li>
|
|
127
|
+
<li>Introduce readme.md, which will obsolete readme.html eventually (thanks to takahashim).</li>
|
|
128
|
+
</ul></li>
|
|
129
|
+
<li>Developer-related changes:<ul>
|
|
130
|
+
<li>Use Mutex#synchronize instead of Thread.exclusive (thanks to hsbt).</li>
|
|
131
|
+
<li>Remove JIS0208.TXT to comply with its terms of use (thanks to kmuto).</li>
|
|
132
|
+
<li>Introduce top-level class DocDiff to avoid name conflict (thanks to hibariya).</li>
|
|
133
|
+
</ul></li>
|
|
134
|
+
</ul></li>
|
|
135
|
+
<li>0.5.0 (2011-08-12)<ul>
|
|
136
|
+
<li>Gemify. Now you can download docdiff via rubygems.org.</li>
|
|
137
|
+
<li>Fix failing test on ruby1.9.2-p290.</li>
|
|
138
|
+
</ul></li>
|
|
108
139
|
<li>0.4.0 (2011-02-23)<ul>
|
|
109
140
|
<li>Compatible with Ruby 1.9 (thanks to Kazuhiko).</li>
|
|
110
141
|
</ul></li>
|
|
@@ -215,7 +246,6 @@
|
|
|
215
246
|
<li>Better auto-recognition of encodings and eols.</li>
|
|
216
247
|
<li>Make CSS and tty escape sequence customizable in config files.</li>
|
|
217
248
|
<li>Better multilingualization using Ruby 1.9 feature.</li>
|
|
218
|
-
<li>Write "DocPatch".</li>
|
|
219
249
|
</ul>
|
|
220
250
|
|
|
221
251
|
<hr />
|
|
@@ -649,6 +679,13 @@ SUCH DAMAGE.
|
|
|
649
679
|
<li>Hiroshi OHKUBO (bug report)</li>
|
|
650
680
|
<li>Shugo MAEDA (bug report)</li>
|
|
651
681
|
<li>Kazuhiko (patch)</li>
|
|
682
|
+
<li>Shintaro Kakutani (patches)</li>
|
|
683
|
+
<li>Masayoshi Takahashi (patches)</li>
|
|
684
|
+
<li>Masakazu Takahashi (patch)</li>
|
|
685
|
+
<li>Hibariya (bug report)</li>
|
|
686
|
+
<li>Hiroshi SHIBATA (patch)</li>
|
|
687
|
+
<li>Tamotsu Takahashi (patches)</li>
|
|
688
|
+
<li>MIKAMI Yoshiyuki (patch)</li>
|
|
652
689
|
</ul>
|
|
653
690
|
|
|
654
691
|
<hr />
|
data/readme.md
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# DocDiff
|
|
2
|
+
|
|
3
|
+
(C) 2000 Hisashi MORITA
|
|
4
|
+
|
|
5
|
+
## Todo
|
|
6
|
+
|
|
7
|
+
* Incorporate ignore space patch.
|
|
8
|
+
* Better auto-recognition of encodings and eols.
|
|
9
|
+
* Make CSS and tty escape sequence customizable in config files.
|
|
10
|
+
* Better multilingualization using Ruby 1.9 feature.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## Description
|
|
14
|
+
|
|
15
|
+
Compares two text files by word, by character, or by line
|
|
16
|
+
|
|
17
|
+
## Summary
|
|
18
|
+
|
|
19
|
+
DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as HTML, tty, Manued, or user-defined markup.
|
|
20
|
+
|
|
21
|
+
It supports several encodings and end-of-line characters, including ASCII (and other single byte encodings such as ISO-8859-*), UTF-8, EUC-JP, Shift_JIS, CR, LF, and CRLF.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## Requirement
|
|
25
|
+
|
|
26
|
+
* Ruby (http://www.ruby-lang.org)
|
|
27
|
+
(Note that you may need additional ruby library such as iconv, if your OS's Ruby package does not include those.)
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
Note that you need appropriate permission for proper installation (you may have to have a root/administrator privilege).
|
|
32
|
+
|
|
33
|
+
* Place `docdiff/` directory and its contents to ruby library directory, so that ruby interpreter can load them.
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
# cp -r docdiff /usr/lib/ruby/1.9.1
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
* Place `docdiff.rb` in command binary directory.
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
# cp docdiff.rb /usr/bin/
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
* (Optional) You may want to rename it to `docdiff`.
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
# mv /usr/bin/docdiff.rb /usr/bin/docdiff
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
* (Optional) When invoked as `chardiff` or `worddiff`, docdiff runs with resolution set to `char` or `word`, respectively.
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
# ln -s /usr/bin/docdiff.rb /usr/bin/chardiff.rb
|
|
55
|
+
# ln -s /usr/bin/docdiff.rb /usr/bin/worddiff.rb
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
* Set appropriate permission.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
# chmod +x /usr/bin/docdiff.rb
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
* (Optional) If you want site-wide configuration file, place `docdiff.conf.example` as `/etc/docdiff/docdiff.conf` and edit it.
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
# cp docdiff.conf.example /etc/docdiff.conf
|
|
68
|
+
# $EDITOR /etc/docdiff.conf
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
* (Optional) If you want per-user configuration file, place `docdiff.conf.example` as `~/etc/docdiff/docdiff.conf` and edit it.
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
% cp docdiff.conf.example ~/etc/docdiff.conf
|
|
75
|
+
% $EDITOR ~/etc/docdiff.conf
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Usage
|
|
79
|
+
|
|
80
|
+
### Synopsis
|
|
81
|
+
|
|
82
|
+
% docdiff [options] oldfile newfile
|
|
83
|
+
|
|
84
|
+
e.g.
|
|
85
|
+
|
|
86
|
+
% docdiff old.txt new.txt > diff.html
|
|
87
|
+
|
|
88
|
+
See the help message for detail (`docdiff --help`).
|
|
89
|
+
|
|
90
|
+
## License
|
|
91
|
+
|
|
92
|
+
This software is distributed under so-called modified BSD style license (http://www.opensource.org/licenses/bsd-license.php (without advertisement clause)). By contributing to this software, you agree that your contribution may be incorporated under the same license.
|
|
93
|
+
|
|
94
|
+
Copyright and condition of use of main portion of the source:
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
Copyright (C) Hisashi MORITA. All rights reserved.
|
|
98
|
+
|
|
99
|
+
Redistribution and use in source and binary forms, with or without
|
|
100
|
+
modification, are permitted provided that the following conditions
|
|
101
|
+
are met:
|
|
102
|
+
1. Redistributions of source code must retain the above copyright
|
|
103
|
+
notice, this list of conditions and the following disclaimer.
|
|
104
|
+
2. Redistributions in binary form must reproduce the above copyright
|
|
105
|
+
notice, this list of conditions and the following disclaimer in the
|
|
106
|
+
documentation and/or other materials provided with the distribution.
|
|
107
|
+
3. Neither the name of the University nor the names of its contributors
|
|
108
|
+
may be used to endorse or promote products derived from this software
|
|
109
|
+
without specific prior written permission.
|
|
110
|
+
|
|
111
|
+
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
112
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
113
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
114
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
115
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
116
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
117
|
+
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
118
|
+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
119
|
+
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
120
|
+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
121
|
+
SUCH DAMAGE.
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
diff library (`docdiff/diff.rb` and `docdiff/diff/*`) was originally a part of Ruby/CVS by Akira TANAKA.
|
|
125
|
+
Ruby/CVS is licensed under modified BSD style license.
|
|
126
|
+
See the following for detail.
|
|
127
|
+
|
|
128
|
+
* http://raa.ruby-lang.org/list.rhtml?name=ruby-cvs
|
|
129
|
+
* http://cvs.m17n.org/~akr/ruby-cvs/
|
|
130
|
+
|
|
131
|
+
## Credits
|
|
132
|
+
|
|
133
|
+
* Hisashi MORITA (primary author)
|
|
134
|
+
|
|
135
|
+
## Acknowledgments
|
|
136
|
+
|
|
137
|
+
* Akira TANAKA (diff library author)
|
|
138
|
+
* Shin'ichiro HARA (initial idea and algorithm suggestion)
|
|
139
|
+
* Masatoshi SEKI (patch)
|
|
140
|
+
* Akira YAMADA (patch, Debian package)
|
|
141
|
+
* Kenshi MUTO (testing, bug report, Debian package)
|
|
142
|
+
* Kazuhiro NISHIYAMA (bug report)
|
|
143
|
+
* Hiroshi OHKUBO (bug report)
|
|
144
|
+
* Shugo MAEDA (bug report)
|
|
145
|
+
* Kazuhiko (patch)
|
|
146
|
+
* Shintaro Kakutani (patches)
|
|
147
|
+
* Masayoshi Takahashi (patches)
|
|
148
|
+
* Masakazu Takahashi (patch)
|
|
149
|
+
* Hibariya (bug report)
|
|
150
|
+
* Hiroshi SHIBATA (patch)
|
|
151
|
+
* Tamotsu Takahashi (patches)
|
|
152
|
+
* MIKAMI Yoshiyuki (patch)
|
|
153
|
+
|
|
154
|
+
Excuse us this list is far from complete and fails to acknowledge many
|
|
155
|
+
more who have helped us somehow. We really appreciate it.
|
|
156
|
+
|
|
157
|
+
## Resources
|
|
158
|
+
|
|
159
|
+
### Format
|
|
160
|
+
|
|
161
|
+
* HTML/XHTML http://www.w3.org
|
|
162
|
+
* tty (Graphic rendition using VT100 / ANSI escape sequence)
|
|
163
|
+
* VT100: http://vt100.net/docs/tp83/appendixb.html
|
|
164
|
+
* ANSI: http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html
|
|
165
|
+
* Manued (Manuscript Editing language: a proofreading method for text)
|
|
166
|
+
* http://www.archi.is.tohoku.ac.jp/~yamauchi/otherprojects/manued/index.shtml
|
|
167
|
+
|
|
168
|
+
### Similar Software
|
|
169
|
+
|
|
170
|
+
There are several other software that can compare text word by word and/or character by character.
|
|
171
|
+
|
|
172
|
+
* GNU wdiff (Seems to support single byte characters only.)
|
|
173
|
+
http://www.gnu.org/directory/GNU/wdiff.html
|
|
174
|
+
* cdif by Kazumasa UTASHIRO (Supports several Japanese encodings.)
|
|
175
|
+
http://srekcah.org/~utashiro/perl/scripts/cdif
|
|
176
|
+
* ediff for Emacsen
|
|
177
|
+
http://www.xemacs.org/Documentation/packages/html/ediff.html
|
|
178
|
+
* diff-detail for xyzzy, by Hiroshi OHKUBO
|
|
179
|
+
http://ohkubo.s53.xrea.com/xyzzy/index.html#diff-detail
|
|
180
|
+
* Manuediff (Outputs difference in Manued format.)
|
|
181
|
+
http://hibiki.miyagi-ct.ac.jp/~suzuki/comp/export/manuediff.html
|
|
182
|
+
* YASDiff (Yet Another Scheme powered diff) by Y. Fujisawa
|
|
183
|
+
http://nnri.dip.jp/~yf/cgi-bin/yaswiki2.cgi?name=YASDiff&parentid=0
|
|
184
|
+
* WinMerge (GUI diff tool for Windows)
|
|
185
|
+
http://winmerge.org/
|
data/test/charstring_test.rb
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
2
|
# -*- coding: euc-jp; -*-
|
|
3
|
+
|
|
4
|
+
# frozen_string_literal: false
|
|
5
|
+
|
|
3
6
|
require 'test/unit'
|
|
4
7
|
require 'docdiff/charstring'
|
|
5
8
|
require 'nkf'
|
|
6
9
|
|
|
7
|
-
class
|
|
10
|
+
class TC_DocDiff_CharString < Test::Unit::TestCase
|
|
11
|
+
CharString = DocDiff::CharString
|
|
8
12
|
|
|
9
13
|
def setup()
|
|
10
14
|
#
|
|
@@ -856,10 +860,6 @@ class TC_CharString < Test::Unit::TestCase
|
|
|
856
860
|
# test module functions
|
|
857
861
|
|
|
858
862
|
def assert_guess_encoding(expected, str)
|
|
859
|
-
unless CharString.ruby_m17n?
|
|
860
|
-
assert_equal(expected, CharString.guess_encoding_using_pureruby(str))
|
|
861
|
-
assert_equal(expected, CharString.guess_encoding_using_iconv(str))
|
|
862
|
-
end
|
|
863
863
|
assert_equal(expected, CharString.guess_encoding(str))
|
|
864
864
|
end
|
|
865
865
|
|
|
@@ -874,33 +874,18 @@ class TC_CharString < Test::Unit::TestCase
|
|
|
874
874
|
# assert_equal(expected, CharString.guess_encoding(str))
|
|
875
875
|
# end
|
|
876
876
|
def test_guess_encoding_unknown()
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
expected = "ASCII-8BIT"
|
|
880
|
-
else
|
|
881
|
-
str = "\xff\xff\xff\xff" # "\xDE\xAD\xBE\xEF"
|
|
882
|
-
expected = "UNKNOWN"
|
|
883
|
-
end
|
|
877
|
+
str = "".encode("BINARY") # cannot put invalid string literal
|
|
878
|
+
expected = "ASCII-8BIT"
|
|
884
879
|
assert_guess_encoding(expected, str)
|
|
885
880
|
end
|
|
886
881
|
def test_guess_encoding_ascii_1()
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
expected = "US-ASCII"
|
|
890
|
-
else
|
|
891
|
-
str = "ASCII string"
|
|
892
|
-
expected = "US-ASCII"
|
|
893
|
-
end
|
|
882
|
+
str = "ASCII string".encode("US-ASCII")
|
|
883
|
+
expected = "US-ASCII"
|
|
894
884
|
assert_guess_encoding(expected, str)
|
|
895
885
|
end
|
|
896
886
|
def test_guess_encoding_ascii_2()
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
expected = "US-ASCII"
|
|
900
|
-
else
|
|
901
|
-
str = "abc\ndef\n"
|
|
902
|
-
expected = "US-ASCII"
|
|
903
|
-
end
|
|
887
|
+
str = "abc\ndef\n".encode("US-ASCII")
|
|
888
|
+
expected = "US-ASCII"
|
|
904
889
|
assert_guess_encoding(expected, str)
|
|
905
890
|
end
|
|
906
891
|
# CharString.guess_encoding mistakes JIS for ASCII sometimes, due to Iconv.
|
|
@@ -939,6 +924,11 @@ class TC_CharString < Test::Unit::TestCase
|
|
|
939
924
|
expected = "Shift_JIS"
|
|
940
925
|
assert_guess_encoding(expected, str)
|
|
941
926
|
end
|
|
927
|
+
def test_guess_encoding_cp932_1()
|
|
928
|
+
str = NKF.nkf('--oc=CP932', "\\u2460") # CIRCLED DIGIT ONE
|
|
929
|
+
expected = "Windows-31J" # CP932 == Windows-31J in Ruby 1.9+
|
|
930
|
+
assert_guess_encoding(expected, str)
|
|
931
|
+
end
|
|
942
932
|
def test_guess_encoding_utf8_1()
|
|
943
933
|
str = NKF.nkf("-E -w", "���ܸ��Latin��ʸ��")
|
|
944
934
|
expected = "UTF-8"
|
data/test/diff_test.rb
CHANGED
data/test/difference_test.rb
CHANGED
data/test/docdiff_test.rb
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
2
|
# -*- coding: us-ascii; -*-
|
|
3
|
+
|
|
4
|
+
# frozen_string_literal: false
|
|
5
|
+
|
|
3
6
|
require 'test/unit'
|
|
4
7
|
require 'docdiff'
|
|
5
8
|
require 'nkf'
|
|
6
9
|
|
|
7
|
-
class
|
|
10
|
+
class TC_DocDiff < Test::Unit::TestCase
|
|
11
|
+
Document = DocDiff::Document
|
|
8
12
|
|
|
9
13
|
def setup()
|
|
10
14
|
#
|
|
@@ -125,7 +129,6 @@ class TC_Document < Test::Unit::TestCase
|
|
|
125
129
|
"",
|
|
126
130
|
nil].join
|
|
127
131
|
expected = {:foo1=>true, :foo2=>"bar baz", :foo3=>123, :foo4=>false}
|
|
128
|
-
docdiff = DocDiff.new
|
|
129
132
|
assert_equal(expected,
|
|
130
133
|
DocDiff.parse_config_file_content(content))
|
|
131
134
|
end
|
|
@@ -184,7 +187,13 @@ class TC_Document < Test::Unit::TestCase
|
|
|
184
187
|
expected = "<=>foo </=><=>b</=><!->a</!-><!+>ee</!+><=>r</=><=>\n</=><=>baz</=>"
|
|
185
188
|
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "char", :format => "user", :digest => false}))
|
|
186
189
|
end
|
|
187
|
-
|
|
190
|
+
def test_cli()
|
|
191
|
+
expected = "Hello, my name is [-Watanabe.-]{+matz.+}\n"
|
|
192
|
+
cmd = "ruby -I lib bin/docdiff --wdiff" +
|
|
193
|
+
" sample/01.en.ascii.lf sample/02.en.ascii.lf"
|
|
194
|
+
actual = `#{cmd}`.scan(/^.*?$\n/m).first
|
|
195
|
+
assert_equal(expected, actual)
|
|
196
|
+
end
|
|
188
197
|
|
|
189
198
|
def teardown()
|
|
190
199
|
#
|
data/test/document_test.rb
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
2
|
# -*- coding: euc-jp; -*-
|
|
3
|
+
|
|
4
|
+
# frozen_string_literal: false
|
|
5
|
+
|
|
3
6
|
require 'test/unit'
|
|
4
7
|
require 'docdiff/document'
|
|
5
8
|
require 'nkf'
|
|
6
9
|
|
|
7
|
-
class
|
|
10
|
+
class TC_DocDiff_Document < Test::Unit::TestCase
|
|
11
|
+
Document = DocDiff::Document
|
|
12
|
+
CharString = DocDiff::CharString
|
|
8
13
|
|
|
9
14
|
def setup()
|
|
10
15
|
#
|
|
@@ -18,11 +23,7 @@ class TC_Document < Test::Unit::TestCase
|
|
|
18
23
|
assert_equal(expected, doc.encoding)
|
|
19
24
|
end
|
|
20
25
|
def test_encoding_auto()
|
|
21
|
-
doc =
|
|
22
|
-
Document.new("Foo bar.\nBaz quux.".encode("US-ASCII"))
|
|
23
|
-
else
|
|
24
|
-
Document.new("Foo bar.\nBaz quux.")
|
|
25
|
-
end
|
|
26
|
+
doc = Document.new("Foo bar.\nBaz quux.".encode("US-ASCII"))
|
|
26
27
|
expected = 'US-ASCII'
|
|
27
28
|
assert_equal(expected, doc.encoding)
|
|
28
29
|
end
|
data/test/view_test.rb
CHANGED