docdiff 0.6.7 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -36
- data/README_ja.md +70 -37
- data/doc/example/docdiff.conf.example +3 -0
- data/doc/img/screenshot-html.png +0 -0
- data/doc/img/screenshot-tty-char.png +0 -0
- data/doc/img/screenshot-tty-digest-block.png +0 -0
- data/doc/img/screenshot-tty-digest-license-block.png +0 -0
- data/doc/img/screenshot-tty-digest-license.png +0 -0
- data/doc/img/screenshot-tty-digest.png +0 -0
- data/doc/img/screenshot-tty-en-ja.png +0 -0
- data/doc/img/screenshot-tty-manued.png +0 -0
- data/doc/img/screenshot-tty-wdiff.png +0 -0
- data/doc/img/screenshot-tty-word-char.png +0 -0
- data/doc/man/docdiff.adoc +3 -3
- data/doc/news.md +11 -0
- data/docdiff.gemspec +1 -1
- data/lib/doc_diff.rb +5 -5
- data/lib/docdiff/charstring.rb +36 -40
- data/lib/docdiff/cli.rb +23 -10
- data/lib/docdiff/document.rb +44 -44
- data/lib/docdiff/encoding/en_ascii.rb +4 -4
- data/lib/docdiff/version.rb +1 -1
- data/lib/docdiff/view.rb +27 -13
- data/test/charstring_test.rb +221 -221
- data/test/cli_test.rb +12 -12
- data/test/document_test.rb +223 -223
- data/test/fixture/humpty_dumpty_01_en.txt +4 -0
- data/test/fixture/humpty_dumpty_01_ja.txt +4 -0
- data/test/fixture/{humpty_dumpty01_ascii_lf.txt → humpty_dumpty_02_en.txt} +2 -2
- data/test/fixture/humpty_dumpty_02_ja.txt +4 -0
- data/test/view_test.rb +38 -31
- metadata +26 -14
- data/test/fixture/humpty_dumpty02_ascii_lf.txt +0 -4
- /data/doc/img/{screenshot-format-html-digest-firefox.png → old/screenshot-format-html-digest-firefox.png} +0 -0
- /data/doc/img/{screenshot-format-html-firefox.png → old/screenshot-format-html-firefox.png} +0 -0
- /data/doc/img/{screenshot-format-tty-cmdexe-en.png → old/screenshot-format-tty-cmdexe-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-cmdexe-ja.png → old/screenshot-format-tty-cmdexe-ja.png} +0 -0
- /data/doc/img/{screenshot-format-tty-rxvtunicode-en.png → old/screenshot-format-tty-rxvtunicode-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-rxvtunicode-ja.png → old/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
- /data/doc/img/{screenshot-format-tty-xterm-en.png → old/screenshot-format-tty-xterm-en.png} +0 -0
- /data/doc/img/{screenshot-format-tty-xterm-ja.png → old/screenshot-format-tty-xterm-ja.png} +0 -0
- /data/doc/img/{screenshot-resolution-linewordchar-xterm.png → old/screenshot-resolution-linewordchar-xterm.png} +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e84a8c85103e65839c20b2b36b98c2d2991f1f21135a8742da68d130c6dd4290
|
|
4
|
+
data.tar.gz: 76fde0a2c869bf96b5688d4917e3aa9cadffb44c6b67bcc1c9eb2939a44aecc3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cdf66ad98c6047d4a6a858762d79ed1569f21e65401ad2dcfc3a791828cd0407fc5c9411efb567796ddf0f3d45e38bdd68a6739060c7612b9290b111cbced1bd
|
|
7
|
+
data.tar.gz: 7848047f1904a90934b4bffd4b7539b14380c995af379a3874af1d92bf0ffb145fd4e976567e2fab6a6f167816698415c5c1094e824923a2c236b1d747c27fe9
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# DocDiff
|
|
2
2
|
|
|
3
|
-
* English
|
|
3
|
+
* English / [Japanese](README_ja.md)
|
|
4
4
|
|
|
5
5
|
(C) 2000 Hisashi MORITA
|
|
6
6
|
|
|
@@ -10,42 +10,21 @@ Compares two text files by word, by character, or by line
|
|
|
10
10
|
|
|
11
11
|
## Screenshots
|
|
12
12
|
|
|
13
|
-
<
|
|
13
|
+
<p>TTY output<br />
|
|
14
|
+
<img src="doc/img/screenshot-tty-en-ja.png" alt="TTY output"/></p>
|
|
14
15
|
|
|
15
16
|
<p>HTML output<br />
|
|
16
|
-
<img src="doc/img/screenshot-
|
|
17
|
+
<img src="doc/img/screenshot-html.png" alt="HTML output"/></p>
|
|
17
18
|
|
|
18
|
-
<p>
|
|
19
|
-
<img src="doc/img/screenshot-
|
|
19
|
+
<p>Comparison by character<br />
|
|
20
|
+
<img src="doc/img/screenshot-tty-char.png" alt="Comparison by character"/></p>
|
|
20
21
|
|
|
21
|
-
<p>
|
|
22
|
-
<img src="doc/img/screenshot-
|
|
23
|
-
|
|
24
|
-
<p>tty output (comparing Japanese text)<br />
|
|
25
|
-
<img src="doc/img/screenshot-format-tty-rxvtunicode-ja.png" alt="tty output (comparing Japanese text)" /></p>
|
|
26
|
-
|
|
27
|
-
<p>tty output<br />
|
|
28
|
-
<img src="doc/img/screenshot-format-tty-xterm-en.png" alt="tty output" /></p>
|
|
29
|
-
|
|
30
|
-
<p>tty output (comparing Japanese text)<br />
|
|
31
|
-
<img src="doc/img/screenshot-format-tty-xterm-ja.png" alt="tty output (comparing Japanese text)" /></p>
|
|
32
|
-
|
|
33
|
-
</div>
|
|
34
|
-
|
|
35
|
-
<p>Comparing English text (codepage 437) on Windows (Cygwin)<br />
|
|
36
|
-
<img src="doc/img/screenshot-format-tty-cmdexe-en.png" alt="Comparing English text (codepage 437) on Windows (Cygwin)" /></p>
|
|
37
|
-
|
|
38
|
-
<p>Comparing Japanese text (codepage 932) on Windows (Cygwin)<br />
|
|
39
|
-
<img src="doc/img/screenshot-format-tty-cmdexe-ja.png" alt="Comparing Japanese text (codepage 932) on Windows (Cygwin)" /></p>
|
|
40
|
-
|
|
41
|
-
<p>You can compare text files by line, word, or character (format: tty)<br/>
|
|
42
|
-
<img src="doc/img/screenshot-resolution-linewordchar-xterm.png" alt="You can compare text files by line, word, or character (format: tty)" /></p>
|
|
43
|
-
|
|
44
|
-
(Screenshots as of version 0.3.2.)
|
|
22
|
+
<p>Digest mode (useful for viewing scattered changes within a long text)<br />
|
|
23
|
+
<img src="doc/img/screenshot-tty-digest-license.png" alt="Digest mode"/></p>
|
|
45
24
|
|
|
46
25
|
## Summary
|
|
47
26
|
|
|
48
|
-
DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as
|
|
27
|
+
DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as TTY, HTML, Manued, or user-defined markup.
|
|
49
28
|
|
|
50
29
|
It supports several encodings and end-of-line characters, including ASCII (and other single byte encodings such as ISO-8859-*), UTF-8, EUC-JP, Shift_JIS (Windows-31J), CR, LF, and CRLF.
|
|
51
30
|
|
|
@@ -54,17 +33,71 @@ It supports several encodings and end-of-line characters, including ASCII (and o
|
|
|
54
33
|
### Synopsis
|
|
55
34
|
|
|
56
35
|
```
|
|
57
|
-
$ docdiff [options]
|
|
36
|
+
$ docdiff [options] file1 file2
|
|
58
37
|
```
|
|
59
38
|
|
|
60
|
-
|
|
39
|
+
Type `docdiff --help` to show command line options.
|
|
40
|
+
|
|
41
|
+
### Options
|
|
61
42
|
|
|
62
43
|
```
|
|
63
|
-
|
|
44
|
+
docdiff [options] file1 file2
|
|
45
|
+
--resolution=RESOLUTION
|
|
46
|
+
specify resolution (granularity)
|
|
47
|
+
line|word|char (default: word)
|
|
48
|
+
--line same as --resolution=line
|
|
49
|
+
--word same as --resolution=word
|
|
50
|
+
--char same as --resolution=char
|
|
51
|
+
--encoding=ENCODING
|
|
52
|
+
specify character encoding
|
|
53
|
+
ASCII|EUC-JP|Shift_JIS|CP932|UTF-8|auto (default: auto)
|
|
54
|
+
(try ASCII for single byte encodings such as ISO-8859)
|
|
55
|
+
--ascii same as --encoding=ASCII
|
|
56
|
+
--iso8859 same as --encoding=ASCII
|
|
57
|
+
--iso8859x same as --encoding=ASCII (deprecated)
|
|
58
|
+
--eucjp same as --encoding=EUC-JP
|
|
59
|
+
--sjis same as --encoding=Shift_JIS
|
|
60
|
+
--cp932 same as --encoding=CP932
|
|
61
|
+
--utf8 same as --encoding=UTF-8
|
|
62
|
+
--eol=EOL specify end-of-line character
|
|
63
|
+
CR|LF|CRLF|auto (default: auto)
|
|
64
|
+
--cr same as --eol=CR
|
|
65
|
+
--lf same as --eol=LF
|
|
66
|
+
--crlf same as --eol=CRLF
|
|
67
|
+
--format=FORMAT specify output format
|
|
68
|
+
tty|manued|html|wdiff|stat|user (default: tty)
|
|
69
|
+
(stat is deprecated)
|
|
70
|
+
(user tags can be defined in configuration file)
|
|
71
|
+
--tty same as --format=tty
|
|
72
|
+
--manued same as --format=manued
|
|
73
|
+
--html same as --format=html
|
|
74
|
+
--wdiff same as --format=wdiff
|
|
75
|
+
--stat same as --format=stat (not implemented) (deprecated)
|
|
76
|
+
-L, --label LABEL use label instead of file name
|
|
77
|
+
(not implemented; exists for compatibility with diff)
|
|
78
|
+
--digest digest output, do not show all
|
|
79
|
+
--summary same as --digest
|
|
80
|
+
--display=DISPLAY
|
|
81
|
+
specify presentation type (effective only with digest)
|
|
82
|
+
inline|block|multi (default: inline)
|
|
83
|
+
(experimental feature) (multi is deprecated)
|
|
84
|
+
--cache use file cache (not implemented) (deprecated)
|
|
85
|
+
--pager=PAGER specify pager
|
|
86
|
+
(falls back to $DOCDIFF_PAGER, $PAGER, or none)
|
|
87
|
+
--no-pager do not use pager
|
|
88
|
+
--config-file=FILE
|
|
89
|
+
specify configuration file to read
|
|
90
|
+
--no-config-file do not read configuration files
|
|
91
|
+
--verbose run verbosely (not well-supported) (deprecated)
|
|
92
|
+
--help show help message
|
|
93
|
+
--version show version
|
|
94
|
+
--license show license (deprecated)
|
|
95
|
+
--author show author(s) (deprecated)
|
|
96
|
+
When invoked as worddiff or chardiff, resolution will be set accordingly.
|
|
97
|
+
Configuration files: /etc/docdiff/docdiff.conf, ~/.config/docdiff/docdiff.conf,
|
|
98
|
+
or ~/etc/docdiff/docdiff.conf (deprecated)
|
|
64
99
|
```
|
|
65
100
|
|
|
66
|
-
See the help message for detail (`docdiff --help`).
|
|
67
|
-
|
|
68
101
|
### Example
|
|
69
102
|
|
|
70
103
|
<pre>
|
|
@@ -333,7 +366,7 @@ Excuse us this list is far from complete and fails to acknowledge many more who
|
|
|
333
366
|
### Formats
|
|
334
367
|
|
|
335
368
|
* [HTML/XHTML](https://www.w3.org/)
|
|
336
|
-
*
|
|
369
|
+
* TTY (Graphic rendition using VT100 / ANSI escape sequences)
|
|
337
370
|
- [VT100](https://vt100.net/docs/tp83/appendixb.html)
|
|
338
371
|
- [ANSI](https://tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html)
|
|
339
372
|
* [Manued](https://sundayresearch.eu/hitoshi/otherprojects/manued/) ([in Japanese](https://sundayresearch.eu/hitoshi/otherprojects/manued/index-j.html)) (Manuscript Editing Language: a proofreading method for text)
|
data/README_ja.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# DocDiff
|
|
2
2
|
|
|
3
|
-
* [English](README.md)
|
|
3
|
+
* [English](README.md) / Japanese
|
|
4
4
|
|
|
5
5
|
(C) 2000 Hisashi MORITA
|
|
6
6
|
|
|
@@ -10,42 +10,21 @@
|
|
|
10
10
|
|
|
11
11
|
## スクリーンショット
|
|
12
12
|
|
|
13
|
-
<
|
|
13
|
+
<p>TTY出力<br />
|
|
14
|
+
<img src="doc/img/screenshot-tty-en-ja.png" alt="TTY output"/></p>
|
|
14
15
|
|
|
15
16
|
<p>HTML出力<br />
|
|
16
|
-
<img src="doc/img/screenshot-
|
|
17
|
+
<img src="doc/img/screenshot-html.png" alt="HTML output"/></p>
|
|
17
18
|
|
|
18
|
-
<p
|
|
19
|
-
<img src="doc/img/screenshot-
|
|
19
|
+
<p>文字ごとの比較<br />
|
|
20
|
+
<img src="doc/img/screenshot-tty-char.png" alt="Comparison by character"/></p>
|
|
20
21
|
|
|
21
|
-
<p
|
|
22
|
-
<img src="doc/img/screenshot-
|
|
23
|
-
|
|
24
|
-
<p>tty出力(日本語のテキストを比較)<br />
|
|
25
|
-
<img src="doc/img/screenshot-format-tty-rxvtunicode-ja.png" alt="tty output (comparing Japanese text)" /></p>
|
|
26
|
-
|
|
27
|
-
<p>tty出力<br />
|
|
28
|
-
<img src="doc/img/screenshot-format-tty-xterm-en.png" alt="tty output" /></p>
|
|
29
|
-
|
|
30
|
-
<p>tty出力(日本語のテキストを比較)<br />
|
|
31
|
-
<img src="doc/img/screenshot-format-tty-xterm-ja.png" alt="tty output (comparing Japanese text)" /></p>
|
|
32
|
-
|
|
33
|
-
</div>
|
|
34
|
-
|
|
35
|
-
<p>英語のテキスト(コードページ437)を比較(Windows上のCygwin環境)<br />
|
|
36
|
-
<img src="doc/img/screenshot-format-tty-cmdexe-en.png" alt="Comparing English text (codepage 437) on Windows (Cygwin)" /></p>
|
|
37
|
-
|
|
38
|
-
<p>日本語のテキスト(コードページ937)を比較(Windows上のCygwin環境)<br />
|
|
39
|
-
<img src="doc/img/screenshot-format-tty-cmdexe-ja.png" alt="Comparing Japanese text (codepage 932) on Windows (Cygwin)" /></p>
|
|
40
|
-
|
|
41
|
-
<p>行ごと、単語ごと、文字ごとの比較が可能(フォーマットはtty)<br/>
|
|
42
|
-
<img src="doc/img/screenshot-resolution-linewordchar-xterm.png" alt="You can compare text files by line, word, or character (format: tty)" /></p>
|
|
43
|
-
|
|
44
|
-
(バージョン0.3.2時点のスクリーンショットです。)
|
|
22
|
+
<p>ダイジェストモード(長いテキスト中に散らばった変更点を確認するのに便利)<br />
|
|
23
|
+
<img src="doc/img/screenshot-tty-digest-license.png" alt="Digest mode"/></p>
|
|
45
24
|
|
|
46
25
|
## 概要
|
|
47
26
|
|
|
48
|
-
DocDiffは2つのテキストファイルを比較してその違いを表示します。単語ごと、文字ごと、そして行ごとにファイルを比較できます。結果を出力する形式は、HTML、
|
|
27
|
+
DocDiffは2つのテキストファイルを比較してその違いを表示します。単語ごと、文字ごと、そして行ごとにファイルを比較できます。結果を出力する形式は、TTY(文字端末向けのエスケープシーケンス)、HTML、Manued(真鵺道という校正用のマークアップ形式)などが用意されており、ユーザ定義のタグを使うこともできます。
|
|
49
28
|
|
|
50
29
|
次のエンコーディング(文字コード)と行末コード(改行文字)をサポートしています: ASCII(およびISO-8859-*などのシングルバイトエンコーディング)、UTF-8、EUC-JP、Shift_JIS(Windows-31J)、そしてCR、LF、CRLF。
|
|
51
30
|
|
|
@@ -54,18 +33,72 @@ DocDiffは2つのテキストファイルを比較してその違いを表示し
|
|
|
54
33
|
### 概要
|
|
55
34
|
|
|
56
35
|
```
|
|
57
|
-
$ docdiff [options]
|
|
36
|
+
$ docdiff [options] file1 file2
|
|
58
37
|
```
|
|
59
38
|
|
|
60
|
-
|
|
39
|
+
コマンドラインオプションについては、`docdiff --help`で出力されるヘルプメッセージを参照してください。
|
|
40
|
+
|
|
41
|
+
### コマンドラインオプション
|
|
61
42
|
|
|
62
43
|
```
|
|
63
|
-
|
|
44
|
+
docdiff [options] file1 file2
|
|
45
|
+
--resolution=RESOLUTION
|
|
46
|
+
specify resolution (granularity)
|
|
47
|
+
line|word|char (default: word)
|
|
48
|
+
--line same as --resolution=line
|
|
49
|
+
--word same as --resolution=word
|
|
50
|
+
--char same as --resolution=char
|
|
51
|
+
--encoding=ENCODING
|
|
52
|
+
specify character encoding
|
|
53
|
+
ASCII|EUC-JP|Shift_JIS|CP932|UTF-8|auto (default: auto)
|
|
54
|
+
(try ASCII for single byte encodings such as ISO-8859)
|
|
55
|
+
--ascii same as --encoding=ASCII
|
|
56
|
+
--iso8859 same as --encoding=ASCII
|
|
57
|
+
--iso8859x same as --encoding=ASCII (deprecated)
|
|
58
|
+
--eucjp same as --encoding=EUC-JP
|
|
59
|
+
--sjis same as --encoding=Shift_JIS
|
|
60
|
+
--cp932 same as --encoding=CP932
|
|
61
|
+
--utf8 same as --encoding=UTF-8
|
|
62
|
+
--eol=EOL specify end-of-line character
|
|
63
|
+
CR|LF|CRLF|auto (default: auto)
|
|
64
|
+
--cr same as --eol=CR
|
|
65
|
+
--lf same as --eol=LF
|
|
66
|
+
--crlf same as --eol=CRLF
|
|
67
|
+
--format=FORMAT specify output format
|
|
68
|
+
tty|manued|html|wdiff|stat|user (default: tty)
|
|
69
|
+
(stat is deprecated)
|
|
70
|
+
(user tags can be defined in configuration file)
|
|
71
|
+
--tty same as --format=tty
|
|
72
|
+
--manued same as --format=manued
|
|
73
|
+
--html same as --format=html
|
|
74
|
+
--wdiff same as --format=wdiff
|
|
75
|
+
--stat same as --format=stat (not implemented) (deprecated)
|
|
76
|
+
-L, --label LABEL use label instead of file name
|
|
77
|
+
(not implemented; exists for compatibility with diff)
|
|
78
|
+
--digest digest output, do not show all
|
|
79
|
+
--summary same as --digest
|
|
80
|
+
--display=DISPLAY
|
|
81
|
+
specify presentation type (effective only with digest)
|
|
82
|
+
inline|block|multi (default: inline)
|
|
83
|
+
(experimental feature) (multi is deprecated)
|
|
84
|
+
--cache use file cache (not implemented) (deprecated)
|
|
85
|
+
--pager=PAGER specify pager
|
|
86
|
+
(falls back to $DOCDIFF_PAGER, $PAGER, or none)
|
|
87
|
+
--no-pager do not use pager
|
|
88
|
+
--config-file=FILE
|
|
89
|
+
specify configuration file to read
|
|
90
|
+
--no-config-file do not read configuration files
|
|
91
|
+
--verbose run verbosely (not well-supported) (deprecated)
|
|
92
|
+
--help show help message
|
|
93
|
+
--version show version
|
|
94
|
+
--license show license (deprecated)
|
|
95
|
+
--author show author(s) (deprecated)
|
|
96
|
+
When invoked as worddiff or chardiff, resolution will be set accordingly.
|
|
97
|
+
Configuration files: /etc/docdiff/docdiff.conf, ~/.config/docdiff/docdiff.conf,
|
|
98
|
+
or ~/etc/docdiff/docdiff.conf (deprecated)
|
|
64
99
|
```
|
|
65
100
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
### 例
|
|
101
|
+
### 実行例
|
|
69
102
|
|
|
70
103
|
<pre>
|
|
71
104
|
$ cat 01_ja_eucjp_lf.txt
|
|
@@ -333,7 +366,7 @@ Diffライブラリ(`lib/docdiff/diff.rb`および`lib/docdiff/diff/*`)は
|
|
|
333
366
|
### フォーマット
|
|
334
367
|
|
|
335
368
|
* [HTML/XHTML](https://www.w3.org/)
|
|
336
|
-
*
|
|
369
|
+
* TTY (Graphic rendition using VT100 / ANSI escape sequences)
|
|
337
370
|
- [VT100](https://vt100.net/docs/tp83/appendixb.html)
|
|
338
371
|
- [ANSI](https://tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html)
|
|
339
372
|
* [Manued](https://sundayresearch.eu/hitoshi/otherprojects/manued/) ([in Japanese](https://sundayresearch.eu/hitoshi/otherprojects/manued/index-j.html)) (Manuscript Editing Language: a proofreading method for text)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/doc/man/docdiff.adoc
CHANGED
|
@@ -12,7 +12,7 @@ docdiff - character/word-oriented diff
|
|
|
12
12
|
|
|
13
13
|
Compares two text files by word, by character, or by line
|
|
14
14
|
|
|
15
|
-
DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as
|
|
15
|
+
DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as tty, HTML, Manued, or user-defined markup.
|
|
16
16
|
|
|
17
17
|
It supports several encodings and end-of-line characters, including ASCII (and other single byte encodings such as ISO-8859-*), UTF-8, EUC-JP, Shift_JIS (Windows-31J), CR, LF, and CRLF.
|
|
18
18
|
|
|
@@ -72,7 +72,7 @@ It supports several encodings and end-of-line characters, including ASCII (and o
|
|
|
72
72
|
|
|
73
73
|
*--format*=FORMAT::
|
|
74
74
|
specify output format
|
|
75
|
-
_tty_|_manued_|_html_|_wdiff_|_stat_|_user_ (default:
|
|
75
|
+
_tty_|_manued_|_html_|_wdiff_|_stat_|_user_ (default: _tty_) (_stat_ is deprecated)
|
|
76
76
|
(user tags can be defined in config file)
|
|
77
77
|
|
|
78
78
|
*--tty*::
|
|
@@ -107,7 +107,7 @@ It supports several encodings and end-of-line characters, including ASCII (and o
|
|
|
107
107
|
use file cache (not implemented) (deprecated)
|
|
108
108
|
|
|
109
109
|
*--pager*=PAGER::
|
|
110
|
-
specify pager (if available, $_DOCDIFF_PAGER_ is used by default)
|
|
110
|
+
specify pager (if available, $_DOCDIFF_PAGER_ or $_PAGER_ is used by default)
|
|
111
111
|
|
|
112
112
|
*--no-pager*::
|
|
113
113
|
do not use pager
|
data/doc/news.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# News
|
|
2
2
|
|
|
3
|
+
### 0.7.0 (2026-04-05)
|
|
4
|
+
|
|
5
|
+
* User-visible changes:
|
|
6
|
+
- Default output format changed from html to tty. (breaking change)
|
|
7
|
+
- If you prefer the old behaviour, add `format = html` to your configuration file (e.g. `~/.config/docdiff/docdiff.conf`).
|
|
8
|
+
- Enabled automatic pager activation via `$PAGER`. (breaking change)
|
|
9
|
+
- Changed line number format in digest mode to `L1, L1` and `(L1), LL1-2` (previously `1,1` and `(1),1-2`). (somewhat breaking change)
|
|
10
|
+
- Updated screenshots in README.
|
|
11
|
+
* Developer-related changes:
|
|
12
|
+
- Renamed methods: `split_to_foo` to `to_foo`, `line` to `lines`, etc.
|
|
13
|
+
|
|
3
14
|
### 0.6.7 (2026-02-25)
|
|
4
15
|
|
|
5
16
|
* User-visible changes:
|
data/docdiff.gemspec
CHANGED
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
|
|
|
15
15
|
DocDiff compares two text files and shows the
|
|
16
16
|
difference. It can compare files word by word,
|
|
17
17
|
character by character, or line by line. It has
|
|
18
|
-
several output formats such as
|
|
18
|
+
several output formats such as tty, HTML, Manued,
|
|
19
19
|
or user-defined markup.
|
|
20
20
|
EOS
|
|
21
21
|
|
data/lib/doc_diff.rb
CHANGED
|
@@ -28,7 +28,7 @@ class DocDiff
|
|
|
28
28
|
resolution: "word",
|
|
29
29
|
encoding: "auto",
|
|
30
30
|
eol: "auto",
|
|
31
|
-
format: "
|
|
31
|
+
format: "tty",
|
|
32
32
|
cache: true,
|
|
33
33
|
digest: false,
|
|
34
34
|
pager: nil,
|
|
@@ -41,7 +41,7 @@ class DocDiff
|
|
|
41
41
|
attr_accessor :config
|
|
42
42
|
|
|
43
43
|
def compare_by_line(doc1, doc2)
|
|
44
|
-
Difference.new(doc1.
|
|
44
|
+
Difference.new(doc1.to_lines, doc2.to_lines)
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def compare_by_line_word(doc1, doc2)
|
|
@@ -51,7 +51,7 @@ class DocDiff
|
|
|
51
51
|
if line.first == :change_elt
|
|
52
52
|
before_change = Document.new(line[1].join, doc1.encoding, doc1.eol)
|
|
53
53
|
after_change = Document.new(line[2].join, doc2.encoding, doc2.eol)
|
|
54
|
-
Difference.new(before_change.
|
|
54
|
+
Difference.new(before_change.to_words, after_change.to_words).each do |word|
|
|
55
55
|
words << word
|
|
56
56
|
end
|
|
57
57
|
else # :common_elt_elt, :del_elt, or :add_elt
|
|
@@ -69,7 +69,7 @@ class DocDiff
|
|
|
69
69
|
if line.first == :change_elt
|
|
70
70
|
before_change = Document.new(line[1].join, doc1.encoding, doc1.eol)
|
|
71
71
|
after_change = Document.new(line[2].join, doc2.encoding, doc2.eol)
|
|
72
|
-
Difference.new(before_change.
|
|
72
|
+
Difference.new(before_change.to_words, after_change.to_words).each do |word|
|
|
73
73
|
lines_and_words << word
|
|
74
74
|
end
|
|
75
75
|
else # :common_elt_elt, :del_elt, or :add_elt
|
|
@@ -81,7 +81,7 @@ class DocDiff
|
|
|
81
81
|
if line_or_word.first == :change_elt
|
|
82
82
|
before_change = Document.new(line_or_word[1].join, doc1.encoding, doc1.eol)
|
|
83
83
|
after_change = Document.new(line_or_word[2].join, doc2.encoding, doc2.eol)
|
|
84
|
-
Difference.new(before_change.
|
|
84
|
+
Difference.new(before_change.to_chars, after_change.to_chars).each do |char|
|
|
85
85
|
lines_words_and_chars << char
|
|
86
86
|
end
|
|
87
87
|
else # :common_elt_elt, :del_elt, or :add_elt
|
data/lib/docdiff/charstring.rb
CHANGED
|
@@ -97,36 +97,36 @@ class DocDiff
|
|
|
97
97
|
# Note that some languages (like Japanese) do not have 'word' or 'phrase',
|
|
98
98
|
# thus some of the following methods are not 'linguistically correct'.
|
|
99
99
|
|
|
100
|
-
def
|
|
101
|
-
|
|
100
|
+
def count_bytes
|
|
101
|
+
to_bytes.size
|
|
102
102
|
end
|
|
103
103
|
|
|
104
|
-
def
|
|
105
|
-
|
|
104
|
+
def count_chars # eol = 1 char
|
|
105
|
+
to_chars.size
|
|
106
106
|
end
|
|
107
107
|
|
|
108
|
-
def
|
|
109
|
-
|
|
108
|
+
def count_graph_chars
|
|
109
|
+
count_latin_graph_chars + count_ja_graph_chars
|
|
110
110
|
end
|
|
111
111
|
|
|
112
|
-
def
|
|
113
|
-
|
|
112
|
+
def count_blank_chars
|
|
113
|
+
count_latin_blank_chars + count_ja_blank_chars
|
|
114
114
|
end
|
|
115
115
|
|
|
116
|
-
def
|
|
117
|
-
|
|
116
|
+
def count_words
|
|
117
|
+
to_words.size
|
|
118
118
|
end
|
|
119
119
|
|
|
120
|
-
def
|
|
121
|
-
|
|
120
|
+
def count_valid_words
|
|
121
|
+
count_latin_valid_words + count_ja_valid_words
|
|
122
122
|
end
|
|
123
123
|
|
|
124
|
-
def
|
|
125
|
-
|
|
124
|
+
def count_lines # this is common to all encodings.
|
|
125
|
+
to_lines.size
|
|
126
126
|
end
|
|
127
127
|
|
|
128
|
-
def
|
|
129
|
-
|
|
128
|
+
def count_empty_lines
|
|
129
|
+
to_lines.count { |line| /^(?:#{eol_char})|^$/m.match(line) }
|
|
130
130
|
end
|
|
131
131
|
|
|
132
132
|
# for Ruby-1.9
|
|
@@ -146,11 +146,11 @@ class DocDiff
|
|
|
146
146
|
end
|
|
147
147
|
end
|
|
148
148
|
|
|
149
|
-
def
|
|
149
|
+
def to_bytes
|
|
150
150
|
encode("ASCII-8BIT").scan(/./nm)
|
|
151
151
|
end
|
|
152
152
|
|
|
153
|
-
def
|
|
153
|
+
def to_chars
|
|
154
154
|
re =
|
|
155
155
|
if eol_char # sometimes string has no end-of-line char
|
|
156
156
|
Regexp.new("(?:#{eol_char})|(?:.)", Regexp::MULTILINE)
|
|
@@ -160,52 +160,52 @@ class DocDiff
|
|
|
160
160
|
encode("UTF-8").scan(re).map { |e| e.encode(encoding) }
|
|
161
161
|
end
|
|
162
162
|
|
|
163
|
-
def
|
|
163
|
+
def count_latin_graph_chars
|
|
164
164
|
re = Regexp.new("[#{Encodings["UTF-8"]::GRAPH}]", Regexp::MULTILINE)
|
|
165
165
|
encode("UTF-8").scan(re).size
|
|
166
166
|
end
|
|
167
167
|
|
|
168
|
-
def
|
|
168
|
+
def count_ja_graph_chars
|
|
169
169
|
re = Regexp.new("[#{Encodings["UTF-8"]::JA_GRAPH}]", Regexp::MULTILINE)
|
|
170
170
|
encode("UTF-8").scan(re).size
|
|
171
171
|
end
|
|
172
172
|
|
|
173
|
-
def
|
|
173
|
+
def count_latin_blank_chars
|
|
174
174
|
re = Regexp.new("[#{Encodings["UTF-8"]::BLANK}]", Regexp::MULTILINE)
|
|
175
175
|
encode("UTF-8").scan(re).size
|
|
176
176
|
end
|
|
177
177
|
|
|
178
|
-
def
|
|
178
|
+
def count_ja_blank_chars
|
|
179
179
|
re = Regexp.new("[#{Encodings["UTF-8"]::JA_BLANK}]", Regexp::MULTILINE)
|
|
180
180
|
encode("UTF-8").scan(re).size
|
|
181
181
|
end
|
|
182
182
|
|
|
183
|
-
def
|
|
183
|
+
def to_words
|
|
184
184
|
re = Regexp.new(Encodings["UTF-8"]::WORD_REGEXP_SRC, Regexp::MULTILINE)
|
|
185
185
|
encode("UTF-8").scan(re).map { |e| e.encode(encoding) }
|
|
186
186
|
end
|
|
187
187
|
|
|
188
|
-
def
|
|
188
|
+
def count_latin_words
|
|
189
189
|
re = Regexp.new("[#{Encodings["UTF-8"]::PRINT}]", Regexp::MULTILINE)
|
|
190
|
-
|
|
190
|
+
to_words.count { |word| re.match(word.encode("UTF-8")) }
|
|
191
191
|
end
|
|
192
192
|
|
|
193
|
-
def
|
|
193
|
+
def count_ja_words
|
|
194
194
|
re = Regexp.new("[#{Encodings["UTF-8"]::JA_PRINT}]", Regexp::MULTILINE)
|
|
195
|
-
|
|
195
|
+
to_words.count { |word| re.match(word.encode("UTF-8")) }
|
|
196
196
|
end
|
|
197
197
|
|
|
198
|
-
def
|
|
198
|
+
def count_latin_valid_words
|
|
199
199
|
re = Regexp.new("[#{Encodings["UTF-8"]::ALNUM}]", Regexp::MULTILINE)
|
|
200
|
-
|
|
200
|
+
to_words.count { |word| re.match(word.encode("UTF-8")) }
|
|
201
201
|
end
|
|
202
202
|
|
|
203
|
-
def
|
|
203
|
+
def count_ja_valid_words
|
|
204
204
|
re = Regexp.new("[#{Encodings["UTF-8"]::JA_GRAPH}]", Regexp::MULTILINE)
|
|
205
|
-
|
|
205
|
+
to_words.count { |word| re.match(word.encode("UTF-8")) }
|
|
206
206
|
end
|
|
207
207
|
|
|
208
|
-
def
|
|
208
|
+
def to_lines
|
|
209
209
|
raise <<~EOS.chomp unless EOLChars[eol]
|
|
210
210
|
EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed.
|
|
211
211
|
EOS
|
|
@@ -219,16 +219,16 @@ class DocDiff
|
|
|
219
219
|
encode("UTF-8").scan(re).map { |e| e.encode(encoding) }
|
|
220
220
|
end
|
|
221
221
|
|
|
222
|
-
def
|
|
222
|
+
def count_graph_lines
|
|
223
223
|
graph = (Encodings["UTF-8"]::GRAPH + Encodings["UTF-8"]::JA_GRAPH).chars.uniq.join
|
|
224
224
|
re = Regexp.new("[#{Regexp.quote(graph)}]", Regexp::MULTILINE)
|
|
225
|
-
|
|
225
|
+
to_lines.count { |line| re.match(line.encode("UTF-8")) }
|
|
226
226
|
end
|
|
227
227
|
|
|
228
|
-
def
|
|
228
|
+
def count_blank_lines
|
|
229
229
|
blank = (Encodings["UTF-8"]::BLANK + Encodings["UTF-8"]::JA_BLANK).chars.uniq.join
|
|
230
230
|
re = Regexp.new("^[#{blank}]+(?:#{eol_char})?", Regexp::MULTILINE)
|
|
231
|
-
|
|
231
|
+
to_lines.count { |line| re.match(line.encode("UTF-8")) }
|
|
232
232
|
end
|
|
233
233
|
|
|
234
234
|
# load encoding modules
|
|
@@ -236,10 +236,6 @@ class DocDiff
|
|
|
236
236
|
require "docdiff/encoding/ja_eucjp"
|
|
237
237
|
require "docdiff/encoding/ja_sjis"
|
|
238
238
|
require "docdiff/encoding/ja_utf8"
|
|
239
|
-
alias_method :to_bytes, :split_to_byte
|
|
240
|
-
alias_method :to_chars, :split_to_char
|
|
241
|
-
alias_method :to_words, :split_to_word
|
|
242
|
-
alias_method :to_lines, :split_to_line
|
|
243
239
|
|
|
244
240
|
module CR
|
|
245
241
|
EOL = "CR"
|