docdiff 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/ruby
2
+ require 'test/unit'
3
+ require "docdiff/diff"
4
+
5
+ class TC_Diff < Test::Unit::TestCase
6
+
7
+ def setup()
8
+ #
9
+ end
10
+
11
+ def test_new_ses()
12
+ a1 = [:a, :b, :c]
13
+ a2 = [:a, :x, :c]
14
+ expected = [[:common_elt_elt, [:a], [:a]],
15
+ [:del_elt, [:b], nil],
16
+ [:add_elt, nil, [:x]],
17
+ [:common_elt_elt, [:c], [:c]]]
18
+ actual = []
19
+ actual_speculative = []
20
+ actual_shortestpath = []
21
+ actual_contours = []
22
+ Diff.new(a1, a2).ses .each{|e| actual << e}
23
+ Diff.new(a1, a2).ses(:speculative ).each{|e| actual_speculative << e}
24
+ Diff.new(a1, a2).ses(:shortestpath).each{|e| actual_shortestpath << e}
25
+ Diff.new(a1, a2).ses(:contours ).each{|e| actual_contours << e}
26
+ assert_equal(expected, actual)
27
+ assert_equal(expected, actual_speculative)
28
+ assert_equal(expected, actual_shortestpath)
29
+ assert_equal(expected, actual_contours)
30
+ end
31
+
32
+ def teardown()
33
+ #
34
+ end
35
+
36
+ end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/ruby
2
+ require 'test/unit'
3
+ require 'docdiff/difference'
4
+
5
+ class TC_Difference < Test::Unit::TestCase
6
+
7
+ def setup()
8
+ #
9
+ end
10
+
11
+ def test_new()
12
+ array1 = [:a, :b, :c]
13
+ array2 = [:a, :x, :c]
14
+ expected = [[:common_elt_elt, [:a], [:a]],
15
+ [:change_elt, [:b], [:x]],
16
+ [:common_elt_elt, [:c], [:c]]]
17
+ assert_equal(expected, Difference.new(array1, array2))
18
+ end
19
+
20
+ def test_raw_list()
21
+ array1 = [:a, :b, :c]
22
+ array2 = [:a, :x, :c]
23
+ expected = [[:common_elt_elt, [:a], [:a]],
24
+ [:del_elt, [:b], nil],
25
+ [:add_elt, nil, [:x]],
26
+ [:common_elt_elt, [:c], [:c]]]
27
+ assert_equal(expected, Difference.new(array1, array2).raw_list)
28
+ end
29
+
30
+ def test_former_only()
31
+ array1 = [:a, :b, :c]
32
+ array2 = [:a, :x, :c]
33
+ expected = [[:common_elt_elt, [:a], [:a]],
34
+ [:change_elt, [:b], nil],
35
+ [:common_elt_elt, [:c], [:c]]]
36
+ assert_equal(expected, Difference.new(array1, array2).former_only)
37
+ array1 = [:a, :b, :c]
38
+ array2 = [:a, :c, :d]
39
+ expected = [[:common_elt_elt, [:a], [:a]],
40
+ [:del_elt, [:b], nil],
41
+ [:common_elt_elt, [:c], [:c]]]
42
+ assert_equal(expected, Difference.new(array1, array2).former_only)
43
+ end
44
+
45
+ def test_latter_only()
46
+ array1 = [:a, :b, :c]
47
+ array2 = [:a, :x, :c]
48
+ expected = [[:common_elt_elt, [:a], [:a]],
49
+ [:change_elt, nil, [:x]],
50
+ [:common_elt_elt, [:c], [:c]]]
51
+ assert_equal(expected, Difference.new(array1, array2).latter_only)
52
+ array1 = [:a, :b, :c]
53
+ array2 = [:a, :c, :d]
54
+ expected = [[:common_elt_elt, [:a], [:a]],
55
+ [:common_elt_elt, [:c], [:c]],
56
+ [:add_elt, nil, [:d]]]
57
+ assert_equal(expected, Difference.new(array1, array2).latter_only)
58
+ end
59
+
60
+ def teardown()
61
+ #
62
+ end
63
+
64
+ end
@@ -0,0 +1,193 @@
1
+ #!/usr/bin/ruby
2
+ # -*- coding: us-ascii; -*-
3
+ require 'test/unit'
4
+ require 'docdiff'
5
+ require 'nkf'
6
+
7
+ class TC_Document < Test::Unit::TestCase
8
+
9
+ def setup()
10
+ #
11
+ end
12
+
13
+ def test_compare_by_line()
14
+ doc1 = Document.new("Foo bar.\nBaz quux.", 'US-ASCII', 'LF')
15
+ doc2 = Document.new("Foo.\nBaz quux.", 'US-ASCII', 'LF')
16
+ docdiff = DocDiff.new
17
+ expected = [[:change_elt, ["Foo bar.\n"], ["Foo.\n"]],
18
+ [:common_elt_elt, ['Baz quux.'], ['Baz quux.']]]
19
+ assert_equal(expected, docdiff.compare_by_line(doc1, doc2))
20
+ end
21
+ def test_compare_by_line_word()
22
+ doc1 = Document.new("a b c d\ne f", 'US-ASCII', 'LF')
23
+ doc2 = Document.new("a x c d\ne f", 'US-ASCII', 'LF')
24
+ docdiff = DocDiff.new
25
+ expected = [[:common_elt_elt, ["a "], ["a "]],
26
+ [:change_elt, ["b "], ["x "]],
27
+ [:common_elt_elt, ["c ", "d", "\n"], ["c ", "d", "\n"]],
28
+ [:common_elt_elt, ["e f"], ["e f"]]]
29
+ assert_equal(expected,
30
+ docdiff.compare_by_line_word(doc1, doc2))
31
+ end
32
+ def test_compare_by_line_word_char()
33
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
34
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
35
+ docdiff = DocDiff.new
36
+ expected = [[:common_elt_elt, ['foo '], ['foo ']],
37
+ [:common_elt_elt, ['b'], ['b']],
38
+ [:change_elt, ['a'], ['e', 'e']],
39
+ [:common_elt_elt, ['r'], ['r']],
40
+ [:common_elt_elt, ["\n"], ["\n"]],
41
+ [:common_elt_elt, ['baz'], ['baz']]]
42
+ assert_equal(expected,
43
+ docdiff.compare_by_line_word_char(doc1, doc2))
44
+ end
45
+
46
+ def test_run_line_html()
47
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
48
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
49
+ docdiff = DocDiff.new
50
+ expected = '<?xml version="1.0" encoding="US-ASCII"?>' + "\n" +
51
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' + "\n" +
52
+ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + "\n" +
53
+ '<html><head>' + "\n" +
54
+ '<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII" />' + "\n" +
55
+ '<title>Difference</title>' + "\n" +
56
+ '<style type="text/css">' + "\n" +
57
+ ' body {font-family: monospace;}' + "\n" +
58
+ ' span.del {background: hotpink; border: thin inset;}' + "\n" +
59
+ ' span.add {background: deepskyblue; font-weight: bolder; border: thin outset;}' + "\n" +
60
+ ' span.before-change {background: yellow; border: thin inset;}' + "\n" +
61
+ ' span.after-change {background: lime; font-weight: bolder; border: thin outset;}' + "\n" +
62
+ " li.entry .position {font-weight: bolder; margin-top: 0em; margin-bottom: 0em; padding-top: 0.5em; padding-bottom: 0em;}\n" +
63
+ " li.entry .body {margin-top: 0em; margin-bottom: 0em; padding-top: 0em; padding-bottom: 0.5em;}\n" +
64
+ " li.entry {border-top: thin solid gray;}\n" +
65
+ '</style>' + "\n" +
66
+ '</head><body><div>' + "\n" +
67
+ '<span class="before-change"><del>foo bar<br />' + "\n" + '</del></span>' +
68
+ '<span class="after-change"><ins>foo beer<br />' + "\n" + '</ins></span>' +
69
+ '<span class="common">baz' + "</span>" + "\n</div></body></html>" + "\n"
70
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "html", :digest => false}))
71
+ end
72
+
73
+ def test_run_line_manued()
74
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
75
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
76
+ docdiff = DocDiff.new
77
+ expected = "defparentheses [ ]\n" +
78
+ "defdelete /\n" +
79
+ "defswap |\n" +
80
+ "defcomment ;\n" +
81
+ "defescape ~\n" +
82
+ "deforder newer-last\n" +
83
+ "defversion 0.9.5\n" +
84
+ "[foo bar\n/foo beer\n]baz"
85
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "manued", :digest => false}))
86
+ end
87
+ def test_run_word_manued()
88
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
89
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
90
+ docdiff = DocDiff.new
91
+ expected = "defparentheses [ ]\n" +
92
+ "defdelete /\n" +
93
+ "defswap |\n" +
94
+ "defcomment ;\n" +
95
+ "defescape ~\n" +
96
+ "deforder newer-last\n" +
97
+ "defversion 0.9.5\n" +
98
+ "foo [bar/beer]\nbaz"
99
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "word", :format => "manued", :digest => false}))
100
+ end
101
+ def test_run_char_manued()
102
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
103
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
104
+ docdiff = DocDiff.new
105
+ expected = "defparentheses [ ]\n" +
106
+ "defdelete /\n" +
107
+ "defswap |\n" +
108
+ "defcomment ;\n" +
109
+ "defescape ~\n" +
110
+ "deforder newer-last\n" +
111
+ "defversion 0.9.5\n" +
112
+ "foo b[a/ee]r\nbaz"
113
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "char", :format => "manued", :digest => false}))
114
+ end
115
+
116
+ def test_parse_config_file_content()
117
+ content = ["# comment line\n",
118
+ " # comment line with leading space\n",
119
+ "foo1 = bar\n",
120
+ "foo2 = bar baz \n",
121
+ " foo3 = 123 # comment\n",
122
+ "foo4 = no \n",
123
+ "foo1 = tRue\n",
124
+ "\n",
125
+ "",
126
+ nil].join
127
+ expected = {:foo1=>true, :foo2=>"bar baz", :foo3=>123, :foo4=>false}
128
+ docdiff = DocDiff.new
129
+ assert_equal(expected,
130
+ DocDiff.parse_config_file_content(content))
131
+ end
132
+
133
+ def test_run_line_user()
134
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
135
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
136
+ config = {:tag_common_start => '<=>',
137
+ :tag_common_end => '</=>',
138
+ :tag_del_start => '<->',
139
+ :tag_del_end => '</->',
140
+ :tag_add_start => '<+>',
141
+ :tag_add_end => '</+>',
142
+ :tag_change_before_start => '<!->',
143
+ :tag_change_before_end => '</!->',
144
+ :tag_change_after_start => '<!+>',
145
+ :tag_change_after_end => '</!+>'}
146
+ docdiff = DocDiff.new
147
+ docdiff.config.update(config)
148
+ expected = "<!->foo bar\n</!-><!+>foo beer\n</!+><=>baz</=>"
149
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "user", :digest => false}))
150
+ end
151
+ def test_run_word_user()
152
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
153
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
154
+ config = {:tag_common_start => '<=>',
155
+ :tag_common_end => '</=>',
156
+ :tag_del_start => '<->',
157
+ :tag_del_end => '</->',
158
+ :tag_add_start => '<+>',
159
+ :tag_add_end => '</+>',
160
+ :tag_change_before_start => '<!->',
161
+ :tag_change_before_end => '</!->',
162
+ :tag_change_after_start => '<!+>',
163
+ :tag_change_after_end => '</!+>'}
164
+ docdiff = DocDiff.new
165
+ docdiff.config.update(config)
166
+ expected = "<=>foo </=><!->bar</!-><!+>beer</!+><=>\n</=><=>baz</=>"
167
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "word", :format => "user", :digest => false}))
168
+ end
169
+ def test_run_char_user()
170
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
171
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
172
+ config = {:tag_common_start => '<=>',
173
+ :tag_common_end => '</=>',
174
+ :tag_del_start => '<->',
175
+ :tag_del_end => '</->',
176
+ :tag_add_start => '<+>',
177
+ :tag_add_end => '</+>',
178
+ :tag_change_before_start => '<!->',
179
+ :tag_change_before_end => '</!->',
180
+ :tag_change_after_start => '<!+>',
181
+ :tag_change_after_end => '</!+>'}
182
+ docdiff = DocDiff.new
183
+ docdiff.config.update(config)
184
+ expected = "<=>foo </=><=>b</=><!->a</!-><!+>ee</!+><=>r</=><=>\n</=><=>baz</=>"
185
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "char", :format => "user", :digest => false}))
186
+ end
187
+
188
+
189
+ def teardown()
190
+ #
191
+ end
192
+
193
+ end
@@ -0,0 +1,626 @@
1
+ #!/usr/bin/ruby
2
+ # -*- coding: euc-jp; -*-
3
+ require 'test/unit'
4
+ require 'docdiff/document'
5
+ require 'nkf'
6
+
7
+ class TC_Document < Test::Unit::TestCase
8
+
9
+ def setup()
10
+ #
11
+ end
12
+
13
+ def test_encoding()
14
+ doc = Document.new("Foo bar.\nBaz quux.")
15
+ doc.encoding = 'US-ASCII'
16
+ doc.eol = 'LF'
17
+ expected = 'US-ASCII'
18
+ assert_equal(expected, doc.encoding)
19
+ end
20
+ def test_encoding_auto()
21
+ doc = if CharString.ruby_m17n?
22
+ Document.new("Foo bar.\nBaz quux.".encode("US-ASCII"))
23
+ else
24
+ Document.new("Foo bar.\nBaz quux.")
25
+ end
26
+ expected = 'US-ASCII'
27
+ assert_equal(expected, doc.encoding)
28
+ end
29
+ def test_eol()
30
+ doc = Document.new("Foo bar.\nBaz quux.")
31
+ doc.encoding = 'US-ASCII'
32
+ doc.eol = 'LF'
33
+ expected = 'LF'
34
+ assert_equal(expected, doc.eol)
35
+ end
36
+ def test_eol_auto_lf()
37
+ doc = Document.new("Foo bar.\nBaz quux.")
38
+ expected = 'LF'
39
+ assert_equal(expected, doc.eol)
40
+ end
41
+ def test_eol_auto_none()
42
+ doc = Document.new("Foo bar.")
43
+ expected = "NONE"
44
+ assert_equal(expected, doc.eol)
45
+ end
46
+ def test_eol_char_lf()
47
+ doc = Document.new("Foo bar.\nBaz quux.")
48
+ # doc.encoding = "US-ASCII"
49
+ # doc.eol = "LF"
50
+ expected = "\n"
51
+ assert_equal(expected, doc.eol_char)
52
+ end
53
+ def test_split_by_line()
54
+ doc = Document.new("Hello, my name is Watanabe.\nI am just another Ruby porter.\n")
55
+ expected = ["Hello, my name is Watanabe.\n", "I am just another Ruby porter.\n"]
56
+ assert_equal(expected, doc.split_to_line)
57
+ end
58
+
59
+ # test eol split_to_line() method
60
+ def test_cr_split_to_line()
61
+ doc = Document.new("foo\rbar\r")
62
+ expected = ["foo\r", "bar\r"]
63
+ assert_equal(expected, doc.split_to_line)
64
+ end
65
+ def test_cr_split_to_line_chomped_lastline()
66
+ doc = Document.new("foo\rbar")
67
+ expected = ["foo\r", "bar"]
68
+ assert_equal(expected, doc.split_to_line)
69
+ end
70
+ def test_cr_split_to_line_empty_line()
71
+ doc = Document.new("foo\r\rbar\r")
72
+ expected = ["foo\r", "\r", "bar\r"]
73
+ assert_equal(expected, doc.split_to_line)
74
+ end
75
+ def test_lf_split_to_line()
76
+ doc = Document.new("foo\nbar\n")
77
+ expected = ["foo\n", "bar\n"]
78
+ assert_equal(expected, doc.split_to_line)
79
+ end
80
+ def test_lf_split_to_line_chomped_lastline()
81
+ doc = Document.new("foo\nbar")
82
+ expected = ["foo\n", "bar"]
83
+ assert_equal(expected, doc.split_to_line)
84
+ end
85
+ def test_lf_split_to_line_empty_line()
86
+ doc = Document.new("foo\n\nbar\n")
87
+ expected = ["foo\n", "\n", "bar\n"]
88
+ assert_equal(expected, doc.split_to_line)
89
+ end
90
+ def test_crlf_split_to_line()
91
+ doc = Document.new("foo\r\nbar\r\n")
92
+ expected = ["foo\r\n", "bar\r\n"]
93
+ assert_equal(expected, doc.split_to_line)
94
+ end
95
+ def test_crlf_split_to_line_chomped_lastline()
96
+ doc = Document.new("foo\r\nbar")
97
+ expected = ["foo\r\n", "bar"]
98
+ assert_equal(expected, doc.split_to_line)
99
+ end
100
+ def test_crlf_split_to_line_empty_line()
101
+ doc = Document.new("foo\r\n\r\nbar\r\n")
102
+ expected = ["foo\r\n", "\r\n", "bar\r\n"]
103
+ assert_equal(expected, doc.split_to_line)
104
+ end
105
+
106
+ # test ASCII module
107
+ def test_ascii_split_to_word()
108
+ doc = Document.new("foo bar")
109
+ expected = ["foo ", "bar"]
110
+ assert_equal(expected, doc.split_to_word)
111
+ end
112
+ def test_ascii_split_to_word_withsymbol()
113
+ doc = Document.new("foo (bar) baz-baz")
114
+ expected = ["foo ", "(bar) ", "baz-baz"]
115
+ assert_equal(expected, doc.split_to_word)
116
+ end
117
+ def test_ascii_split_to_word_withquote()
118
+ doc = Document.new("foo's 'foo' \"bar\" 'baz.'")
119
+ expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
120
+ assert_equal(expected, doc.split_to_word)
121
+ end
122
+ def test_ascii_split_to_word_withlongspace()
123
+ doc = Document.new(" foo bar")
124
+ expected = [" ", "foo ", " ", "bar"]
125
+ assert_equal(expected, doc.split_to_word)
126
+ end
127
+ def test_ascii_split_to_word_withdash()
128
+ doc = Document.new("foo -- bar, baz - quux")
129
+ expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
130
+ assert_equal(expected, doc.split_to_word)
131
+ end
132
+ def test_ascii_split_to_char()
133
+ doc = Document.new("foo bar")
134
+ expected = ["f","o","o"," ","b","a","r"]
135
+ assert_equal(expected, doc.split_to_char)
136
+ end
137
+ def test_ascii_split_to_char_with_eol_cr()
138
+ doc = Document.new("foo bar\r")
139
+ expected = ["f","o","o"," ","b","a","r","\r"]
140
+ assert_equal(expected, doc.split_to_char)
141
+ end
142
+ def test_ascii_split_to_char_with_eol_lf()
143
+ doc = Document.new("foo bar\n")
144
+ expected = ["f","o","o"," ","b","a","r","\n"]
145
+ assert_equal(expected, doc.split_to_char)
146
+ end
147
+ def test_ascii_split_to_char_with_eol_crlf()
148
+ doc = Document.new("foo bar\r\n")
149
+ expected = ["f","o","o"," ","b","a","r","\r\n"]
150
+ assert_equal(expected, doc.split_to_char)
151
+ end
152
+ def test_ascii_split_to_byte()
153
+ doc = Document.new("foo bar\r\n")
154
+ expected = ["f","o","o"," ","b","a","r","\r","\n"]
155
+ assert_equal(expected, doc.split_to_byte)
156
+ end
157
+ def test_ascii_count_byte()
158
+ doc = Document.new("foo bar\r\n")
159
+ expected = 9
160
+ assert_equal(expected, doc.count_byte)
161
+ end
162
+ def test_ascii_count_char()
163
+ doc = Document.new("foo bar\r\nbaz quux\r\n")
164
+ expected = 17
165
+ assert_equal(expected, doc.count_char)
166
+ end
167
+ def test_ascii_count_latin_graph_char()
168
+ doc = Document.new("foo bar\r\nbaz quux\r\n")
169
+ expected = 13
170
+ assert_equal(expected, doc.count_latin_graph_char)
171
+ end
172
+ def test_ascii_count_graph_char()
173
+ doc = Document.new("foo bar\r\nbaz quux\r\n")
174
+ expected = 13
175
+ assert_equal(expected, doc.count_graph_char)
176
+ end
177
+ def test_ascii_count_latin_blank_char()
178
+ doc = Document.new("foo bar\r\nbaz\tquux\r\n")
179
+ expected = 2
180
+ assert_equal(expected, doc.count_latin_blank_char)
181
+ end
182
+ def test_ascii_count_blank_char()
183
+ doc = Document.new("foo bar\r\nbaz\tquux\r\n")
184
+ expected = 2
185
+ assert_equal(expected, doc.count_blank_char)
186
+ end
187
+ def test_ascii_count_word()
188
+ doc = Document.new("foo bar \r\nbaz quux\r\n")
189
+ expected = 6
190
+ assert_equal(expected, doc.count_word)
191
+ end
192
+ def test_ascii_count_latin_word()
193
+ doc = Document.new("foo bar \r\nbaz quux\r\n")
194
+ expected = 5 # " " is also counted as a word
195
+ assert_equal(expected, doc.count_latin_word)
196
+ end
197
+ def test_ascii_count_latin_valid_word()
198
+ doc = Document.new("1 foo \r\n%%% ()\r\n")
199
+ expected = 2
200
+ assert_equal(expected, doc.count_latin_valid_word)
201
+ end
202
+ def test_ascii_count_line()
203
+ doc = Document.new("foo\r\nbar")
204
+ expected = 2
205
+ assert_equal(expected, doc.count_line)
206
+ end
207
+ def test_ascii_count_graph_line()
208
+ doc = Document.new("foo\r\n ")
209
+ expected = 1
210
+ assert_equal(expected, doc.count_graph_line)
211
+ end
212
+ def test_ascii_count_empty_line()
213
+ doc = Document.new("foo\r\n \r\n\t\r\n\r\n")
214
+ expected = 1
215
+ assert_equal(expected, doc.count_empty_line)
216
+ end
217
+ def test_ascii_count_blank_line()
218
+ doc = Document.new("\r\n \r\n\t\r\n ")
219
+ expected = 3
220
+ assert_equal(expected, doc.count_blank_line)
221
+ end
222
+
223
+ # test EUCJP module
224
+ def test_eucjp_split_to_word()
225
+ doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��foo bar"))
226
+ expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
227
+ assert_equal(expected, doc.split_to_word)
228
+ end
229
+ def test_eucjp_split_to_word_kanhira()
230
+ doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��"))
231
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
232
+ assert_equal(expected, doc.split_to_word)
233
+ end
234
+ def test_eucjp_split_to_word_katahira()
235
+ doc = Document.new(NKF.nkf("-e", "�������ʤ�ʸ��"))
236
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
237
+ assert_equal(expected, doc.split_to_word)
238
+ end
239
+ def test_eucjp_split_to_word_kataonbiki()
240
+ doc = Document.new(NKF.nkf("-e", "��ӡ�������"), "EUC-JP")
241
+ expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
242
+ assert_equal(expected, doc.split_to_word)
243
+ end
244
+ def test_eucjp_split_to_word_hiraonbiki()
245
+ doc = Document.new(NKF.nkf("-e", "���ӡ���"), "EUC-JP")
246
+ expected = (["�", "��ӡ���"]).collect{|c| NKF.nkf("-e", c)}
247
+ assert_equal(expected, doc.split_to_word)
248
+ end
249
+ def test_eucjp_split_to_word_latinmix()
250
+ doc = Document.new(NKF.nkf("-e", "���ܸ��Latin��ʸ��"))
251
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
252
+ assert_equal(expected, doc.split_to_word)
253
+ end
254
+ def test_eucjp_split_to_char()
255
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b"))
256
+ expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
257
+ assert_equal(expected, doc.split_to_char)
258
+ end
259
+ def test_eucjp_split_to_char_with_cr()
260
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r"))
261
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
262
+ assert_equal(expected, doc.split_to_char)
263
+ end
264
+ def test_eucjp_split_to_char_with_lf()
265
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\n"))
266
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
267
+ assert_equal(expected, doc.split_to_char)
268
+ end
269
+ def test_eucjp_split_to_char_with_crlf()
270
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
271
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
272
+ assert_equal(expected, doc.split_to_char)
273
+ end
274
+ def test_eucjp_count_char()
275
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
276
+ expected = 7
277
+ assert_equal(expected, doc.count_char)
278
+ end
279
+ def test_eucjp_count_latin_graph_char()
280
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
281
+ expected = 2
282
+ assert_equal(expected, doc.count_latin_graph_char)
283
+ end
284
+ def test_eucjp_count_ja_graph_char()
285
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
286
+ expected = 3
287
+ assert_equal(expected, doc.count_ja_graph_char)
288
+ end
289
+ def test_eucjp_count_graph_char()
290
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
291
+ expected = 5
292
+ assert_equal(expected, doc.count_graph_char)
293
+ end
294
+ def test_eucjp_count_latin_blank_char()
295
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\ta b\r\n"))
296
+ expected = 2
297
+ assert_equal(expected, doc.count_latin_blank_char)
298
+ end
299
+ def test_eucjp_count_ja_blank_char()
300
+ doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
301
+ expected = 1
302
+ assert_equal(expected, doc.count_ja_blank_char)
303
+ end
304
+ def test_eucjp_count_blank_char()
305
+ doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
306
+ expected = 3
307
+ assert_equal(expected, doc.count_blank_char)
308
+ end
309
+ def test_eucjp_count_word()
310
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
311
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
312
+ assert_equal(expected, doc.count_word)
313
+ end
314
+ def test_eucjp_count_ja_word()
315
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
316
+ expected = 3
317
+ assert_equal(expected, doc.count_ja_word)
318
+ end
319
+ def test_eucjp_count_latin_valid_word()
320
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
321
+ expected = 2
322
+ assert_equal(expected, doc.count_latin_valid_word)
323
+ end
324
+ def test_eucjp_count_ja_valid_word()
325
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
326
+ expected = 2
327
+ assert_equal(expected, doc.count_ja_valid_word)
328
+ end
329
+ def test_eucjp_count_valid_word()
330
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
331
+ expected = 4
332
+ assert_equal(expected, doc.count_valid_word)
333
+ end
334
+ def test_eucjp_count_line()
335
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
336
+ expected = 6
337
+ assert_equal(expected, doc.count_line)
338
+ end
339
+ def test_eucjp_count_graph_line()
340
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
341
+ expected = 3
342
+ assert_equal(expected, doc.count_graph_line)
343
+ end
344
+ def test_eucjp_count_empty_line()
345
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
346
+ expected = 1
347
+ assert_equal(expected, doc.count_empty_line)
348
+ end
349
+ def test_eucjp_count_blank_line()
350
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
351
+ expected = 2
352
+ assert_equal(expected, doc.count_blank_line)
353
+ end
354
+
355
+ # test SJIS module
356
+ def test_sjis_split_to_word()
357
+ doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��foo bar"))
358
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
359
+ assert_equal(expected, doc.split_to_word)
360
+ end
361
+ def test_sjisplit_s_to_word_kanhira()
362
+ doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��"))
363
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
364
+ assert_equal(expected, doc.split_to_word)
365
+ end
366
+ def test_sjis_split_to_word_katahira()
367
+ doc = Document.new(NKF.nkf("-s", "�������ʤ�ʸ��"))
368
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
369
+ assert_equal(expected, doc.split_to_word)
370
+ end
371
+ def test_sjis_split_to_word_kataonbiki()
372
+ doc = Document.new(NKF.nkf("-s", "��ӡ��λ���"))
373
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
374
+ assert_equal(expected, doc.split_to_word)
375
+ end
376
+ def test_sjis_split_to_word_hiraonbiki()
377
+ doc = Document.new(NKF.nkf("-s", "���ӡ���"))
378
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
379
+ assert_equal(expected, doc.split_to_word)
380
+ end
381
+ def test_sjis_split_to_word_latinmix()
382
+ doc = Document.new(NKF.nkf("-s", "���ܸ��Latin��ʸ��"))
383
+ expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
384
+ assert_equal(expected, doc.split_to_word)
385
+ end
386
+ def test_sjis_split_to_char()
387
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b"))
388
+ expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
389
+ assert_equal(expected, doc.split_to_char)
390
+ end
391
+ def test_sjis_split_to_char_with_cr()
392
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r"))
393
+ expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
394
+ assert_equal(expected, doc.split_to_char)
395
+ end
396
+ def test_sjis_split_to_char_with_lf()
397
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\n"))
398
+ expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
399
+ assert_equal(expected, doc.split_to_char)
400
+ end
401
+ def test_sjis_split_to_char_with_crlf()
402
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r\n"))
403
+ expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
404
+ assert_equal(expected, doc.split_to_char)
405
+ end
406
+ def test_sjis_count_char()
407
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
408
+ expected = 7
409
+ assert_equal(expected, doc.count_char)
410
+ end
411
+ def test_sjis_count_latin_graph_char()
412
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
413
+ expected = 2
414
+ assert_equal(expected, doc.count_latin_graph_char)
415
+ end
416
+ def test_sjis_count_ja_graph_char()
417
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
418
+ expected = 3
419
+ assert_equal(expected, doc.count_ja_graph_char)
420
+ end
421
+ def test_sjis_count_graph_char()
422
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
423
+ expected = 5
424
+ assert_equal(expected, doc.count_graph_char)
425
+ end
426
+ def test_sjis_count_latin_blank_char()
427
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\ta b\r\n"))
428
+ expected = 2
429
+ assert_equal(expected, doc.count_latin_blank_char)
430
+ end
431
+ def test_sjis_count_ja_blank_char()
432
+ doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
433
+ expected = 1
434
+ assert_equal(expected, doc.count_ja_blank_char)
435
+ end
436
+ def test_sjis_count_blank_char()
437
+ doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
438
+ expected = 3
439
+ assert_equal(expected, doc.count_blank_char)
440
+ end
441
+ def test_sjis_count_word()
442
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
443
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
444
+ assert_equal(expected, doc.count_word)
445
+ end
446
+ def test_sjis_count_ja_word()
447
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
448
+ expected = 3
449
+ assert_equal(expected, doc.count_ja_word)
450
+ end
451
+ def test_sjis_count_latin_valid_word()
452
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
453
+ expected = 2
454
+ assert_equal(expected, doc.count_latin_valid_word)
455
+ end
456
+ def test_sjis_count_ja_valid_word()
457
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
458
+ expected = 2
459
+ assert_equal(expected, doc.count_ja_valid_word)
460
+ end
461
+ def test_sjis_count_valid_word()
462
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
463
+ expected = 4
464
+ assert_equal(expected, doc.count_valid_word)
465
+ end
466
+ def test_sjis_count_line()
467
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
468
+ expected = 6
469
+ assert_equal(expected, doc.count_line)
470
+ end
471
+ def test_sjis_count_graph_line()
472
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
473
+ expected = 3
474
+ assert_equal(expected, doc.count_graph_line)
475
+ end
476
+ def test_sjis_count_empty_line()
477
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
478
+ expected = 1
479
+ assert_equal(expected, doc.count_empty_line)
480
+ end
481
+ def test_sjis_count_blank_line()
482
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
483
+ expected = 2
484
+ assert_equal(expected, doc.count_blank_line)
485
+ end
486
+
487
+ # test UTF8 module
488
+ def test_utf8_split_to_word()
489
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��foo bar"))
490
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
491
+ assert_equal(expected, doc.split_to_word)
492
+ end
493
+ def test_utf8_split_to_word_kanhira()
494
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��"))
495
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
496
+ assert_equal(expected, doc.split_to_word)
497
+ end
498
+ def test_utf8_split_to_word_katahira()
499
+ doc = Document.new(NKF.nkf("-E -w", "�������ʤ�ʸ��"))
500
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
501
+ assert_equal(expected, doc.split_to_word)
502
+ end
503
+ def test_utf8_split_to_word_kataonbiki()
504
+ doc = Document.new(NKF.nkf("-E -w", "��ӡ��λ���"))
505
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
506
+ assert_equal(expected, doc.split_to_word)
507
+ end
508
+ def test_utf8_split_to_word_hiraonbiki()
509
+ doc = Document.new(NKF.nkf("-E -w", "���ӡ���"))
510
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
511
+ assert_equal(expected, doc.split_to_word)
512
+ end
513
+ def test_utf8_split_to_word_latinmix()
514
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ��Latin��ʸ��"))
515
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
516
+ assert_equal(expected, doc.split_to_word)
517
+ end
518
+ def test_utf8_split_to_char()
519
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b"), "UTF-8")
520
+ expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
521
+ assert_equal(expected, doc.split_to_char)
522
+ end
523
+ def test_utf8_split_to_char_with_cr()
524
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r"), "UTF-8")
525
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
526
+ assert_equal(expected, doc.split_to_char)
527
+ end
528
+ def test_utf8_split_to_char_with_lf()
529
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\n"), "UTF-8")
530
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
531
+ assert_equal(expected, doc.split_to_char)
532
+ end
533
+ def test_utf8_split_to_char_with_crlf()
534
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
535
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
536
+ assert_equal(expected, doc.split_to_char)
537
+ end
538
+ def test_utf8_count_char()
539
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
540
+ expected = 7
541
+ assert_equal(expected, doc.count_char)
542
+ end
543
+ def test_utf8_count_latin_graph_char()
544
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
545
+ expected = 2
546
+ assert_equal(expected, doc.count_latin_graph_char)
547
+ end
548
+ def test_utf8_count_ja_graph_char()
549
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
550
+ expected = 3
551
+ assert_equal(expected, doc.count_ja_graph_char)
552
+ end
553
+ def test_utf8_count_graph_char()
554
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
555
+ expected = 5
556
+ assert_equal(expected, doc.count_graph_char)
557
+ end
558
+ def test_utf8_count_latin_blank_char()
559
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\ta b\r\n"))
560
+ expected = 2
561
+ assert_equal(expected, doc.count_latin_blank_char)
562
+ end
563
+ def test_utf8_count_ja_blank_char()
564
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
565
+ expected = 1
566
+ assert_equal(expected, doc.count_ja_blank_char)
567
+ end
568
+ def test_utf8_count_blank_char()
569
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
570
+ expected = 3
571
+ assert_equal(expected, doc.count_blank_char)
572
+ end
573
+ def test_utf8_count_word()
574
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
575
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
576
+ assert_equal(expected, doc.count_word)
577
+ end
578
+ def test_utf8_count_ja_word()
579
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
580
+ expected = 3
581
+ assert_equal(expected, doc.count_ja_word)
582
+ end
583
+ def test_utf8_count_latin_valid_word()
584
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
585
+ expected = 2
586
+ assert_equal(expected, doc.count_latin_valid_word)
587
+ end
588
+ def test_utf8_count_ja_valid_word()
589
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
590
+ expected = 2
591
+ assert_equal(expected, doc.count_ja_valid_word)
592
+ end
593
+ def test_utf8_count_valid_word()
594
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
595
+ expected = 4
596
+ assert_equal(expected, doc.count_valid_word)
597
+ end
598
+ def test_utf8_count_line()
599
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
600
+ expected = 6
601
+ assert_equal(expected, doc.count_line)
602
+ end
603
+ def test_utf8_count_graph_line()
604
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
605
+ expected = 3
606
+ assert_equal(expected, doc.count_graph_line)
607
+ end
608
+ def test_utf8_count_empty_line()
609
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
610
+ expected = 1
611
+ assert_equal(expected, doc.count_empty_line)
612
+ end
613
+ def test_utf8_count_blank_line()
614
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
615
+ expected = 2
616
+ assert_equal(expected, doc.count_blank_line)
617
+ end
618
+
619
+
620
+
621
+
622
+ def teardown()
623
+ #
624
+ end
625
+
626
+ end