docdiff 0.6.7 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +69 -36
  3. data/README_ja.md +70 -37
  4. data/doc/example/docdiff.conf.example +3 -0
  5. data/doc/img/screenshot-html.png +0 -0
  6. data/doc/img/screenshot-tty-char.png +0 -0
  7. data/doc/img/screenshot-tty-digest-block.png +0 -0
  8. data/doc/img/screenshot-tty-digest-license-block.png +0 -0
  9. data/doc/img/screenshot-tty-digest-license.png +0 -0
  10. data/doc/img/screenshot-tty-digest.png +0 -0
  11. data/doc/img/screenshot-tty-en-ja.png +0 -0
  12. data/doc/img/screenshot-tty-manued.png +0 -0
  13. data/doc/img/screenshot-tty-wdiff.png +0 -0
  14. data/doc/img/screenshot-tty-word-char.png +0 -0
  15. data/doc/man/docdiff.adoc +3 -3
  16. data/doc/news.md +11 -0
  17. data/docdiff.gemspec +1 -1
  18. data/lib/doc_diff.rb +5 -5
  19. data/lib/docdiff/charstring.rb +36 -40
  20. data/lib/docdiff/cli.rb +23 -10
  21. data/lib/docdiff/document.rb +44 -44
  22. data/lib/docdiff/encoding/en_ascii.rb +4 -4
  23. data/lib/docdiff/version.rb +1 -1
  24. data/lib/docdiff/view.rb +27 -13
  25. data/test/charstring_test.rb +221 -221
  26. data/test/cli_test.rb +12 -12
  27. data/test/document_test.rb +223 -223
  28. data/test/fixture/humpty_dumpty_01_en.txt +4 -0
  29. data/test/fixture/humpty_dumpty_01_ja.txt +4 -0
  30. data/test/fixture/{humpty_dumpty01_ascii_lf.txt → humpty_dumpty_02_en.txt} +2 -2
  31. data/test/fixture/humpty_dumpty_02_ja.txt +4 -0
  32. data/test/view_test.rb +38 -31
  33. metadata +26 -14
  34. data/test/fixture/humpty_dumpty02_ascii_lf.txt +0 -4
  35. /data/doc/img/{screenshot-format-html-digest-firefox.png → old/screenshot-format-html-digest-firefox.png} +0 -0
  36. /data/doc/img/{screenshot-format-html-firefox.png → old/screenshot-format-html-firefox.png} +0 -0
  37. /data/doc/img/{screenshot-format-tty-cmdexe-en.png → old/screenshot-format-tty-cmdexe-en.png} +0 -0
  38. /data/doc/img/{screenshot-format-tty-cmdexe-ja.png → old/screenshot-format-tty-cmdexe-ja.png} +0 -0
  39. /data/doc/img/{screenshot-format-tty-rxvtunicode-en.png → old/screenshot-format-tty-rxvtunicode-en.png} +0 -0
  40. /data/doc/img/{screenshot-format-tty-rxvtunicode-ja.png → old/screenshot-format-tty-rxvtunicode-ja.png} +0 -0
  41. /data/doc/img/{screenshot-format-tty-xterm-en.png → old/screenshot-format-tty-xterm-en.png} +0 -0
  42. /data/doc/img/{screenshot-format-tty-xterm-ja.png → old/screenshot-format-tty-xterm-ja.png} +0 -0
  43. /data/doc/img/{screenshot-resolution-linewordchar-xterm.png → old/screenshot-resolution-linewordchar-xterm.png} +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0780c057df7a736c77e7e4b3542a18a1ce9335140c1ef141885b1acb06d1b12
4
- data.tar.gz: f479c7a739f14c5c3b78d0d5078515982ceb3f61a40c23ec96f5803b4865e837
3
+ metadata.gz: e84a8c85103e65839c20b2b36b98c2d2991f1f21135a8742da68d130c6dd4290
4
+ data.tar.gz: 76fde0a2c869bf96b5688d4917e3aa9cadffb44c6b67bcc1c9eb2939a44aecc3
5
5
  SHA512:
6
- metadata.gz: 6dfa6dd69a1d3e5f4cdb7024c66a628090a9cbce23c7486471e1cd0f3cf242edb6485628218205b0ab2f5e8a8357b551f97a8c12db6c10aa25e844f406f8e19c
7
- data.tar.gz: cefeb857d6939c609a13d620a16c2db8db43d1c86a140ebb65a4d2c73a20be3023be0333821d13fc1b42c0a63f550229d2e8890f97fdfae5cb3decc94d0ae11c
6
+ metadata.gz: cdf66ad98c6047d4a6a858762d79ed1569f21e65401ad2dcfc3a791828cd0407fc5c9411efb567796ddf0f3d45e38bdd68a6739060c7612b9290b111cbced1bd
7
+ data.tar.gz: 7848047f1904a90934b4bffd4b7539b14380c995af379a3874af1d92bf0ffb145fd4e976567e2fab6a6f167816698415c5c1094e824923a2c236b1d747c27fe9
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # DocDiff
2
2
 
3
- * English | [Japanese](README_ja.md)
3
+ * English / [Japanese](README_ja.md)
4
4
 
5
5
  (C) 2000 Hisashi MORITA
6
6
 
@@ -10,42 +10,21 @@ Compares two text files by word, by character, or by line
10
10
 
11
11
  ## Screenshots
12
12
 
13
- <div style="display: grid; grid-template-columns: 1fr 1fr;">
13
+ <p>TTY output<br />
14
+ <img src="doc/img/screenshot-tty-en-ja.png" alt="TTY output"/></p>
14
15
 
15
16
  <p>HTML output<br />
16
- <img src="doc/img/screenshot-format-html-firefox.png" alt="HTML output"/></p>
17
+ <img src="doc/img/screenshot-html.png" alt="HTML output"/></p>
17
18
 
18
- <p>HTML output (digest)<br />
19
- <img src="doc/img/screenshot-format-html-digest-firefox.png" alt="HTML output (digest)" /></p>
19
+ <p>Comparison by character<br />
20
+ <img src="doc/img/screenshot-tty-char.png" alt="Comparison by character"/></p>
20
21
 
21
- <p>tty output<br />
22
- <img src="doc/img/screenshot-format-tty-rxvtunicode-en.png" alt="tty output" /></p>
23
-
24
- <p>tty output (comparing Japanese text)<br />
25
- <img src="doc/img/screenshot-format-tty-rxvtunicode-ja.png" alt="tty output (comparing Japanese text)" /></p>
26
-
27
- <p>tty output<br />
28
- <img src="doc/img/screenshot-format-tty-xterm-en.png" alt="tty output" /></p>
29
-
30
- <p>tty output (comparing Japanese text)<br />
31
- <img src="doc/img/screenshot-format-tty-xterm-ja.png" alt="tty output (comparing Japanese text)" /></p>
32
-
33
- </div>
34
-
35
- <p>Comparing English text (codepage 437) on Windows (Cygwin)<br />
36
- <img src="doc/img/screenshot-format-tty-cmdexe-en.png" alt="Comparing English text (codepage 437) on Windows (Cygwin)" /></p>
37
-
38
- <p>Comparing Japanese text (codepage 932) on Windows (Cygwin)<br />
39
- <img src="doc/img/screenshot-format-tty-cmdexe-ja.png" alt="Comparing Japanese text (codepage 932) on Windows (Cygwin)" /></p>
40
-
41
- <p>You can compare text files by line, word, or character (format: tty)<br/>
42
- <img src="doc/img/screenshot-resolution-linewordchar-xterm.png" alt="You can compare text files by line, word, or character (format: tty)" /></p>
43
-
44
- (Screenshots as of version 0.3.2.)
22
+ <p>Digest mode (useful for viewing scattered changes within a long text)<br />
23
+ <img src="doc/img/screenshot-tty-digest-license.png" alt="Digest mode"/></p>
45
24
 
46
25
  ## Summary
47
26
 
48
- DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as HTML, tty, Manued, or user-defined markup.
27
+ DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as TTY, HTML, Manued, or user-defined markup.
49
28
 
50
29
  It supports several encodings and end-of-line characters, including ASCII (and other single byte encodings such as ISO-8859-*), UTF-8, EUC-JP, Shift_JIS (Windows-31J), CR, LF, and CRLF.
51
30
 
@@ -54,17 +33,71 @@ It supports several encodings and end-of-line characters, including ASCII (and o
54
33
  ### Synopsis
55
34
 
56
35
  ```
57
- $ docdiff [options] oldfile newfile
36
+ $ docdiff [options] file1 file2
58
37
  ```
59
38
 
60
- e.g.
39
+ Type `docdiff --help` to show command line options.
40
+
41
+ ### Options
61
42
 
62
43
  ```
63
- $ docdiff old.txt new.txt > diff.html
44
+ docdiff [options] file1 file2
45
+ --resolution=RESOLUTION
46
+ specify resolution (granularity)
47
+ line|word|char (default: word)
48
+ --line same as --resolution=line
49
+ --word same as --resolution=word
50
+ --char same as --resolution=char
51
+ --encoding=ENCODING
52
+ specify character encoding
53
+ ASCII|EUC-JP|Shift_JIS|CP932|UTF-8|auto (default: auto)
54
+ (try ASCII for single byte encodings such as ISO-8859)
55
+ --ascii same as --encoding=ASCII
56
+ --iso8859 same as --encoding=ASCII
57
+ --iso8859x same as --encoding=ASCII (deprecated)
58
+ --eucjp same as --encoding=EUC-JP
59
+ --sjis same as --encoding=Shift_JIS
60
+ --cp932 same as --encoding=CP932
61
+ --utf8 same as --encoding=UTF-8
62
+ --eol=EOL specify end-of-line character
63
+ CR|LF|CRLF|auto (default: auto)
64
+ --cr same as --eol=CR
65
+ --lf same as --eol=LF
66
+ --crlf same as --eol=CRLF
67
+ --format=FORMAT specify output format
68
+ tty|manued|html|wdiff|stat|user (default: tty)
69
+ (stat is deprecated)
70
+ (user tags can be defined in configuration file)
71
+ --tty same as --format=tty
72
+ --manued same as --format=manued
73
+ --html same as --format=html
74
+ --wdiff same as --format=wdiff
75
+ --stat same as --format=stat (not implemented) (deprecated)
76
+ -L, --label LABEL use label instead of file name
77
+ (not implemented; exists for compatibility with diff)
78
+ --digest digest output, do not show all
79
+ --summary same as --digest
80
+ --display=DISPLAY
81
+ specify presentation type (effective only with digest)
82
+ inline|block|multi (default: inline)
83
+ (experimental feature) (multi is deprecated)
84
+ --cache use file cache (not implemented) (deprecated)
85
+ --pager=PAGER specify pager
86
+ (falls back to $DOCDIFF_PAGER, $PAGER, or none)
87
+ --no-pager do not use pager
88
+ --config-file=FILE
89
+ specify configuration file to read
90
+ --no-config-file do not read configuration files
91
+ --verbose run verbosely (not well-supported) (deprecated)
92
+ --help show help message
93
+ --version show version
94
+ --license show license (deprecated)
95
+ --author show author(s) (deprecated)
96
+ When invoked as worddiff or chardiff, resolution will be set accordingly.
97
+ Configuration files: /etc/docdiff/docdiff.conf, ~/.config/docdiff/docdiff.conf,
98
+ or ~/etc/docdiff/docdiff.conf (deprecated)
64
99
  ```
65
100
 
66
- See the help message for detail (`docdiff --help`).
67
-
68
101
  ### Example
69
102
 
70
103
  <pre>
@@ -333,7 +366,7 @@ Excuse us this list is far from complete and fails to acknowledge many more who
333
366
  ### Formats
334
367
 
335
368
  * [HTML/XHTML](https://www.w3.org/)
336
- * tty (Graphic rendition using VT100 / ANSI escape sequences)
369
+ * TTY (Graphic rendition using VT100 / ANSI escape sequences)
337
370
  - [VT100](https://vt100.net/docs/tp83/appendixb.html)
338
371
  - [ANSI](https://tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html)
339
372
  * [Manued](https://sundayresearch.eu/hitoshi/otherprojects/manued/) ([in Japanese](https://sundayresearch.eu/hitoshi/otherprojects/manued/index-j.html)) (Manuscript Editing Language: a proofreading method for text)
data/README_ja.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # DocDiff
2
2
 
3
- * [English](README.md) | Japanese
3
+ * [English](README.md) / Japanese
4
4
 
5
5
  (C) 2000 Hisashi MORITA
6
6
 
@@ -10,42 +10,21 @@
10
10
 
11
11
  ## スクリーンショット
12
12
 
13
- <div style="display: grid; grid-template-columns: 1fr 1fr;">
13
+ <p>TTY出力<br />
14
+ <img src="doc/img/screenshot-tty-en-ja.png" alt="TTY output"/></p>
14
15
 
15
16
  <p>HTML出力<br />
16
- <img src="doc/img/screenshot-format-html-firefox.png" alt="HTML output"/></p>
17
+ <img src="doc/img/screenshot-html.png" alt="HTML output"/></p>
17
18
 
18
- <p>HTML出力(ダイジェスト)<br />
19
- <img src="doc/img/screenshot-format-html-digest-firefox.png" alt="HTML output (digest)" /></p>
19
+ <p>文字ごとの比較<br />
20
+ <img src="doc/img/screenshot-tty-char.png" alt="Comparison by character"/></p>
20
21
 
21
- <p>tty出力<br />
22
- <img src="doc/img/screenshot-format-tty-rxvtunicode-en.png" alt="tty output" /></p>
23
-
24
- <p>tty出力(日本語のテキストを比較)<br />
25
- <img src="doc/img/screenshot-format-tty-rxvtunicode-ja.png" alt="tty output (comparing Japanese text)" /></p>
26
-
27
- <p>tty出力<br />
28
- <img src="doc/img/screenshot-format-tty-xterm-en.png" alt="tty output" /></p>
29
-
30
- <p>tty出力(日本語のテキストを比較)<br />
31
- <img src="doc/img/screenshot-format-tty-xterm-ja.png" alt="tty output (comparing Japanese text)" /></p>
32
-
33
- </div>
34
-
35
- <p>英語のテキスト(コードページ437)を比較(Windows上のCygwin環境)<br />
36
- <img src="doc/img/screenshot-format-tty-cmdexe-en.png" alt="Comparing English text (codepage 437) on Windows (Cygwin)" /></p>
37
-
38
- <p>日本語のテキスト(コードページ937)を比較(Windows上のCygwin環境)<br />
39
- <img src="doc/img/screenshot-format-tty-cmdexe-ja.png" alt="Comparing Japanese text (codepage 932) on Windows (Cygwin)" /></p>
40
-
41
- <p>行ごと、単語ごと、文字ごとの比較が可能(フォーマットはtty)<br/>
42
- <img src="doc/img/screenshot-resolution-linewordchar-xterm.png" alt="You can compare text files by line, word, or character (format: tty)" /></p>
43
-
44
- (バージョン0.3.2時点のスクリーンショットです。)
22
+ <p>ダイジェストモード(長いテキスト中に散らばった変更点を確認するのに便利)<br />
23
+ <img src="doc/img/screenshot-tty-digest-license.png" alt="Digest mode"/></p>
45
24
 
46
25
  ## 概要
47
26
 
48
- DocDiffは2つのテキストファイルを比較してその違いを表示します。単語ごと、文字ごと、そして行ごとにファイルを比較できます。結果を出力する形式は、HTML、tty(文字端末向けのエスケープシーケンス)、Manued(真鵺道という校正用のマークアップ形式)などが用意されており、ユーザ定義のタグを使うこともできます。
27
+ DocDiffは2つのテキストファイルを比較してその違いを表示します。単語ごと、文字ごと、そして行ごとにファイルを比較できます。結果を出力する形式は、TTY(文字端末向けのエスケープシーケンス)、HTML、Manued(真鵺道という校正用のマークアップ形式)などが用意されており、ユーザ定義のタグを使うこともできます。
49
28
 
50
29
  次のエンコーディング(文字コード)と行末コード(改行文字)をサポートしています: ASCII(およびISO-8859-*などのシングルバイトエンコーディング)、UTF-8、EUC-JP、Shift_JIS(Windows-31J)、そしてCR、LF、CRLF。
51
30
 
@@ -54,18 +33,72 @@ DocDiffは2つのテキストファイルを比較してその違いを表示し
54
33
  ### 概要
55
34
 
56
35
  ```
57
- $ docdiff [options] oldfile newfile
36
+ $ docdiff [options] file1 file2
58
37
  ```
59
38
 
60
- e.g.
39
+ コマンドラインオプションについては、`docdiff --help`で出力されるヘルプメッセージを参照してください。
40
+
41
+ ### コマンドラインオプション
61
42
 
62
43
  ```
63
- $ docdiff old.txt new.txt > diff.html
44
+ docdiff [options] file1 file2
45
+ --resolution=RESOLUTION
46
+ specify resolution (granularity)
47
+ line|word|char (default: word)
48
+ --line same as --resolution=line
49
+ --word same as --resolution=word
50
+ --char same as --resolution=char
51
+ --encoding=ENCODING
52
+ specify character encoding
53
+ ASCII|EUC-JP|Shift_JIS|CP932|UTF-8|auto (default: auto)
54
+ (try ASCII for single byte encodings such as ISO-8859)
55
+ --ascii same as --encoding=ASCII
56
+ --iso8859 same as --encoding=ASCII
57
+ --iso8859x same as --encoding=ASCII (deprecated)
58
+ --eucjp same as --encoding=EUC-JP
59
+ --sjis same as --encoding=Shift_JIS
60
+ --cp932 same as --encoding=CP932
61
+ --utf8 same as --encoding=UTF-8
62
+ --eol=EOL specify end-of-line character
63
+ CR|LF|CRLF|auto (default: auto)
64
+ --cr same as --eol=CR
65
+ --lf same as --eol=LF
66
+ --crlf same as --eol=CRLF
67
+ --format=FORMAT specify output format
68
+ tty|manued|html|wdiff|stat|user (default: tty)
69
+ (stat is deprecated)
70
+ (user tags can be defined in configuration file)
71
+ --tty same as --format=tty
72
+ --manued same as --format=manued
73
+ --html same as --format=html
74
+ --wdiff same as --format=wdiff
75
+ --stat same as --format=stat (not implemented) (deprecated)
76
+ -L, --label LABEL use label instead of file name
77
+ (not implemented; exists for compatibility with diff)
78
+ --digest digest output, do not show all
79
+ --summary same as --digest
80
+ --display=DISPLAY
81
+ specify presentation type (effective only with digest)
82
+ inline|block|multi (default: inline)
83
+ (experimental feature) (multi is deprecated)
84
+ --cache use file cache (not implemented) (deprecated)
85
+ --pager=PAGER specify pager
86
+ (falls back to $DOCDIFF_PAGER, $PAGER, or none)
87
+ --no-pager do not use pager
88
+ --config-file=FILE
89
+ specify configuration file to read
90
+ --no-config-file do not read configuration files
91
+ --verbose run verbosely (not well-supported) (deprecated)
92
+ --help show help message
93
+ --version show version
94
+ --license show license (deprecated)
95
+ --author show author(s) (deprecated)
96
+ When invoked as worddiff or chardiff, resolution will be set accordingly.
97
+ Configuration files: /etc/docdiff/docdiff.conf, ~/.config/docdiff/docdiff.conf,
98
+ or ~/etc/docdiff/docdiff.conf (deprecated)
64
99
  ```
65
100
 
66
- 詳しくはヘルプメッセージを参照してください(`docdiff --help`)。
67
-
68
- ### 例
101
+ ### 実行例
69
102
 
70
103
  <pre>
71
104
  $ cat 01_ja_eucjp_lf.txt
@@ -333,7 +366,7 @@ Diffライブラリ(`lib/docdiff/diff.rb`および`lib/docdiff/diff/*`)は
333
366
  ### フォーマット
334
367
 
335
368
  * [HTML/XHTML](https://www.w3.org/)
336
- * tty (Graphic rendition using VT100 / ANSI escape sequences)
369
+ * TTY (Graphic rendition using VT100 / ANSI escape sequences)
337
370
  - [VT100](https://vt100.net/docs/tp83/appendixb.html)
338
371
  - [ANSI](https://tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html)
339
372
  * [Manued](https://sundayresearch.eu/hitoshi/otherprojects/manued/) ([in Japanese](https://sundayresearch.eu/hitoshi/otherprojects/manued/index-j.html)) (Manuscript Editing Language: a proofreading method for text)
@@ -21,3 +21,6 @@
21
21
  # tag_change_before_end = '</!->'
22
22
  # tag_change_after_start = '<!+>'
23
23
  # tag_change_after_end = '</!+>'
24
+ #
25
+ ## set default output format to html
26
+ # format = html
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/doc/man/docdiff.adoc CHANGED
@@ -12,7 +12,7 @@ docdiff - character/word-oriented diff
12
12
 
13
13
  Compares two text files by word, by character, or by line
14
14
 
15
- DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as HTML, tty, Manued, or user-defined markup.
15
+ DocDiff compares two text files and shows the difference. It can compare files word by word, character by character, or line by line. It has several output formats such as tty, HTML, Manued, or user-defined markup.
16
16
 
17
17
  It supports several encodings and end-of-line characters, including ASCII (and other single byte encodings such as ISO-8859-*), UTF-8, EUC-JP, Shift_JIS (Windows-31J), CR, LF, and CRLF.
18
18
 
@@ -72,7 +72,7 @@ It supports several encodings and end-of-line characters, including ASCII (and o
72
72
 
73
73
  *--format*=FORMAT::
74
74
  specify output format
75
- _tty_|_manued_|_html_|_wdiff_|_stat_|_user_ (default: _html_) (_stat_ is deprecated)
75
+ _tty_|_manued_|_html_|_wdiff_|_stat_|_user_ (default: _tty_) (_stat_ is deprecated)
76
76
  (user tags can be defined in config file)
77
77
 
78
78
  *--tty*::
@@ -107,7 +107,7 @@ It supports several encodings and end-of-line characters, including ASCII (and o
107
107
  use file cache (not implemented) (deprecated)
108
108
 
109
109
  *--pager*=PAGER::
110
- specify pager (if available, $_DOCDIFF_PAGER_ is used by default)
110
+ specify pager (if available, $_DOCDIFF_PAGER_ or $_PAGER_ is used by default)
111
111
 
112
112
  *--no-pager*::
113
113
  do not use pager
data/doc/news.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # News
2
2
 
3
+ ### 0.7.0 (2026-04-05)
4
+
5
+ * User-visible changes:
6
+ - Default output format changed from html to tty. (breaking change)
7
+ - If you prefer the old behaviour, add `format = html` to your configuration file (e.g. `~/.config/docdiff/docdiff.conf`).
8
+ - Enabled automatic pager activation via `$PAGER`. (breaking change)
9
+ - Changed line number format in digest mode to `L1, L1` and `(L1), LL1-2` (previously `1,1` and `(1),1-2`). (somewhat breaking change)
10
+ - Updated screenshots in README.
11
+ * Developer-related changes:
12
+ - Renamed methods: `split_to_foo` to `to_foo`, `line` to `lines`, etc.
13
+
3
14
  ### 0.6.7 (2026-02-25)
4
15
 
5
16
  * User-visible changes:
data/docdiff.gemspec CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
15
15
  DocDiff compares two text files and shows the
16
16
  difference. It can compare files word by word,
17
17
  character by character, or line by line. It has
18
- several output formats such as HTML, tty, Manued,
18
+ several output formats such as tty, HTML, Manued,
19
19
  or user-defined markup.
20
20
  EOS
21
21
 
data/lib/doc_diff.rb CHANGED
@@ -28,7 +28,7 @@ class DocDiff
28
28
  resolution: "word",
29
29
  encoding: "auto",
30
30
  eol: "auto",
31
- format: "html",
31
+ format: "tty",
32
32
  cache: true,
33
33
  digest: false,
34
34
  pager: nil,
@@ -41,7 +41,7 @@ class DocDiff
41
41
  attr_accessor :config
42
42
 
43
43
  def compare_by_line(doc1, doc2)
44
- Difference.new(doc1.split_to_line, doc2.split_to_line)
44
+ Difference.new(doc1.to_lines, doc2.to_lines)
45
45
  end
46
46
 
47
47
  def compare_by_line_word(doc1, doc2)
@@ -51,7 +51,7 @@ class DocDiff
51
51
  if line.first == :change_elt
52
52
  before_change = Document.new(line[1].join, doc1.encoding, doc1.eol)
53
53
  after_change = Document.new(line[2].join, doc2.encoding, doc2.eol)
54
- Difference.new(before_change.split_to_word, after_change.split_to_word).each do |word|
54
+ Difference.new(before_change.to_words, after_change.to_words).each do |word|
55
55
  words << word
56
56
  end
57
57
  else # :common_elt_elt, :del_elt, or :add_elt
@@ -69,7 +69,7 @@ class DocDiff
69
69
  if line.first == :change_elt
70
70
  before_change = Document.new(line[1].join, doc1.encoding, doc1.eol)
71
71
  after_change = Document.new(line[2].join, doc2.encoding, doc2.eol)
72
- Difference.new(before_change.split_to_word, after_change.split_to_word).each do |word|
72
+ Difference.new(before_change.to_words, after_change.to_words).each do |word|
73
73
  lines_and_words << word
74
74
  end
75
75
  else # :common_elt_elt, :del_elt, or :add_elt
@@ -81,7 +81,7 @@ class DocDiff
81
81
  if line_or_word.first == :change_elt
82
82
  before_change = Document.new(line_or_word[1].join, doc1.encoding, doc1.eol)
83
83
  after_change = Document.new(line_or_word[2].join, doc2.encoding, doc2.eol)
84
- Difference.new(before_change.split_to_char, after_change.split_to_char).each do |char|
84
+ Difference.new(before_change.to_chars, after_change.to_chars).each do |char|
85
85
  lines_words_and_chars << char
86
86
  end
87
87
  else # :common_elt_elt, :del_elt, or :add_elt
@@ -97,36 +97,36 @@ class DocDiff
97
97
  # Note that some languages (like Japanese) do not have 'word' or 'phrase',
98
98
  # thus some of the following methods are not 'linguistically correct'.
99
99
 
100
- def count_byte
101
- split_to_byte.size
100
+ def count_bytes
101
+ to_bytes.size
102
102
  end
103
103
 
104
- def count_char # eol = 1 char
105
- split_to_char.size
104
+ def count_chars # eol = 1 char
105
+ to_chars.size
106
106
  end
107
107
 
108
- def count_graph_char
109
- count_latin_graph_char + count_ja_graph_char
108
+ def count_graph_chars
109
+ count_latin_graph_chars + count_ja_graph_chars
110
110
  end
111
111
 
112
- def count_blank_char
113
- count_latin_blank_char + count_ja_blank_char
112
+ def count_blank_chars
113
+ count_latin_blank_chars + count_ja_blank_chars
114
114
  end
115
115
 
116
- def count_word
117
- split_to_word.size
116
+ def count_words
117
+ to_words.size
118
118
  end
119
119
 
120
- def count_valid_word
121
- count_latin_valid_word + count_ja_valid_word
120
+ def count_valid_words
121
+ count_latin_valid_words + count_ja_valid_words
122
122
  end
123
123
 
124
- def count_line # this is common to all encodings.
125
- split_to_line.size
124
+ def count_lines # this is common to all encodings.
125
+ to_lines.size
126
126
  end
127
127
 
128
- def count_empty_line
129
- split_to_line.count { |line| /^(?:#{eol_char})|^$/m.match(line) }
128
+ def count_empty_lines
129
+ to_lines.count { |line| /^(?:#{eol_char})|^$/m.match(line) }
130
130
  end
131
131
 
132
132
  # for Ruby-1.9
@@ -146,11 +146,11 @@ class DocDiff
146
146
  end
147
147
  end
148
148
 
149
- def split_to_byte
149
+ def to_bytes
150
150
  encode("ASCII-8BIT").scan(/./nm)
151
151
  end
152
152
 
153
- def split_to_char
153
+ def to_chars
154
154
  re =
155
155
  if eol_char # sometimes string has no end-of-line char
156
156
  Regexp.new("(?:#{eol_char})|(?:.)", Regexp::MULTILINE)
@@ -160,52 +160,52 @@ class DocDiff
160
160
  encode("UTF-8").scan(re).map { |e| e.encode(encoding) }
161
161
  end
162
162
 
163
- def count_latin_graph_char
163
+ def count_latin_graph_chars
164
164
  re = Regexp.new("[#{Encodings["UTF-8"]::GRAPH}]", Regexp::MULTILINE)
165
165
  encode("UTF-8").scan(re).size
166
166
  end
167
167
 
168
- def count_ja_graph_char
168
+ def count_ja_graph_chars
169
169
  re = Regexp.new("[#{Encodings["UTF-8"]::JA_GRAPH}]", Regexp::MULTILINE)
170
170
  encode("UTF-8").scan(re).size
171
171
  end
172
172
 
173
- def count_latin_blank_char
173
+ def count_latin_blank_chars
174
174
  re = Regexp.new("[#{Encodings["UTF-8"]::BLANK}]", Regexp::MULTILINE)
175
175
  encode("UTF-8").scan(re).size
176
176
  end
177
177
 
178
- def count_ja_blank_char
178
+ def count_ja_blank_chars
179
179
  re = Regexp.new("[#{Encodings["UTF-8"]::JA_BLANK}]", Regexp::MULTILINE)
180
180
  encode("UTF-8").scan(re).size
181
181
  end
182
182
 
183
- def split_to_word
183
+ def to_words
184
184
  re = Regexp.new(Encodings["UTF-8"]::WORD_REGEXP_SRC, Regexp::MULTILINE)
185
185
  encode("UTF-8").scan(re).map { |e| e.encode(encoding) }
186
186
  end
187
187
 
188
- def count_latin_word
188
+ def count_latin_words
189
189
  re = Regexp.new("[#{Encodings["UTF-8"]::PRINT}]", Regexp::MULTILINE)
190
- split_to_word.count { |word| re.match(word.encode("UTF-8")) }
190
+ to_words.count { |word| re.match(word.encode("UTF-8")) }
191
191
  end
192
192
 
193
- def count_ja_word
193
+ def count_ja_words
194
194
  re = Regexp.new("[#{Encodings["UTF-8"]::JA_PRINT}]", Regexp::MULTILINE)
195
- split_to_word.count { |word| re.match(word.encode("UTF-8")) }
195
+ to_words.count { |word| re.match(word.encode("UTF-8")) }
196
196
  end
197
197
 
198
- def count_latin_valid_word
198
+ def count_latin_valid_words
199
199
  re = Regexp.new("[#{Encodings["UTF-8"]::ALNUM}]", Regexp::MULTILINE)
200
- split_to_word.count { |word| re.match(word.encode("UTF-8")) }
200
+ to_words.count { |word| re.match(word.encode("UTF-8")) }
201
201
  end
202
202
 
203
- def count_ja_valid_word
203
+ def count_ja_valid_words
204
204
  re = Regexp.new("[#{Encodings["UTF-8"]::JA_GRAPH}]", Regexp::MULTILINE)
205
- split_to_word.count { |word| re.match(word.encode("UTF-8")) }
205
+ to_words.count { |word| re.match(word.encode("UTF-8")) }
206
206
  end
207
207
 
208
- def split_to_line
208
+ def to_lines
209
209
  raise <<~EOS.chomp unless EOLChars[eol]
210
210
  EOLChars[eol] is #{EOLChars[eol].inspect}: eol not specified or auto-detection failed.
211
211
  EOS
@@ -219,16 +219,16 @@ class DocDiff
219
219
  encode("UTF-8").scan(re).map { |e| e.encode(encoding) }
220
220
  end
221
221
 
222
- def count_graph_line
222
+ def count_graph_lines
223
223
  graph = (Encodings["UTF-8"]::GRAPH + Encodings["UTF-8"]::JA_GRAPH).chars.uniq.join
224
224
  re = Regexp.new("[#{Regexp.quote(graph)}]", Regexp::MULTILINE)
225
- split_to_line.count { |line| re.match(line.encode("UTF-8")) }
225
+ to_lines.count { |line| re.match(line.encode("UTF-8")) }
226
226
  end
227
227
 
228
- def count_blank_line
228
+ def count_blank_lines
229
229
  blank = (Encodings["UTF-8"]::BLANK + Encodings["UTF-8"]::JA_BLANK).chars.uniq.join
230
230
  re = Regexp.new("^[#{blank}]+(?:#{eol_char})?", Regexp::MULTILINE)
231
- split_to_line.count { |line| re.match(line.encode("UTF-8")) }
231
+ to_lines.count { |line| re.match(line.encode("UTF-8")) }
232
232
  end
233
233
 
234
234
  # load encoding modules
@@ -236,10 +236,6 @@ class DocDiff
236
236
  require "docdiff/encoding/ja_eucjp"
237
237
  require "docdiff/encoding/ja_sjis"
238
238
  require "docdiff/encoding/ja_utf8"
239
- alias_method :to_bytes, :split_to_byte
240
- alias_method :to_chars, :split_to_char
241
- alias_method :to_words, :split_to_word
242
- alias_method :to_lines, :split_to_line
243
239
 
244
240
  module CR
245
241
  EOL = "CR"