docdiff 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/ruby
2
+ require 'test/unit'
3
+ require "docdiff/diff"
4
+
5
+ class TC_Diff < Test::Unit::TestCase
6
+
7
+ def setup()
8
+ #
9
+ end
10
+
11
+ def test_new_ses()
12
+ a1 = [:a, :b, :c]
13
+ a2 = [:a, :x, :c]
14
+ expected = [[:common_elt_elt, [:a], [:a]],
15
+ [:del_elt, [:b], nil],
16
+ [:add_elt, nil, [:x]],
17
+ [:common_elt_elt, [:c], [:c]]]
18
+ actual = []
19
+ actual_speculative = []
20
+ actual_shortestpath = []
21
+ actual_contours = []
22
+ Diff.new(a1, a2).ses .each{|e| actual << e}
23
+ Diff.new(a1, a2).ses(:speculative ).each{|e| actual_speculative << e}
24
+ Diff.new(a1, a2).ses(:shortestpath).each{|e| actual_shortestpath << e}
25
+ Diff.new(a1, a2).ses(:contours ).each{|e| actual_contours << e}
26
+ assert_equal(expected, actual)
27
+ assert_equal(expected, actual_speculative)
28
+ assert_equal(expected, actual_shortestpath)
29
+ assert_equal(expected, actual_contours)
30
+ end
31
+
32
+ def teardown()
33
+ #
34
+ end
35
+
36
+ end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/ruby
2
+ require 'test/unit'
3
+ require 'docdiff/difference'
4
+
5
+ class TC_Difference < Test::Unit::TestCase
6
+
7
+ def setup()
8
+ #
9
+ end
10
+
11
+ def test_new()
12
+ array1 = [:a, :b, :c]
13
+ array2 = [:a, :x, :c]
14
+ expected = [[:common_elt_elt, [:a], [:a]],
15
+ [:change_elt, [:b], [:x]],
16
+ [:common_elt_elt, [:c], [:c]]]
17
+ assert_equal(expected, Difference.new(array1, array2))
18
+ end
19
+
20
+ def test_raw_list()
21
+ array1 = [:a, :b, :c]
22
+ array2 = [:a, :x, :c]
23
+ expected = [[:common_elt_elt, [:a], [:a]],
24
+ [:del_elt, [:b], nil],
25
+ [:add_elt, nil, [:x]],
26
+ [:common_elt_elt, [:c], [:c]]]
27
+ assert_equal(expected, Difference.new(array1, array2).raw_list)
28
+ end
29
+
30
+ def test_former_only()
31
+ array1 = [:a, :b, :c]
32
+ array2 = [:a, :x, :c]
33
+ expected = [[:common_elt_elt, [:a], [:a]],
34
+ [:change_elt, [:b], nil],
35
+ [:common_elt_elt, [:c], [:c]]]
36
+ assert_equal(expected, Difference.new(array1, array2).former_only)
37
+ array1 = [:a, :b, :c]
38
+ array2 = [:a, :c, :d]
39
+ expected = [[:common_elt_elt, [:a], [:a]],
40
+ [:del_elt, [:b], nil],
41
+ [:common_elt_elt, [:c], [:c]]]
42
+ assert_equal(expected, Difference.new(array1, array2).former_only)
43
+ end
44
+
45
+ def test_latter_only()
46
+ array1 = [:a, :b, :c]
47
+ array2 = [:a, :x, :c]
48
+ expected = [[:common_elt_elt, [:a], [:a]],
49
+ [:change_elt, nil, [:x]],
50
+ [:common_elt_elt, [:c], [:c]]]
51
+ assert_equal(expected, Difference.new(array1, array2).latter_only)
52
+ array1 = [:a, :b, :c]
53
+ array2 = [:a, :c, :d]
54
+ expected = [[:common_elt_elt, [:a], [:a]],
55
+ [:common_elt_elt, [:c], [:c]],
56
+ [:add_elt, nil, [:d]]]
57
+ assert_equal(expected, Difference.new(array1, array2).latter_only)
58
+ end
59
+
60
+ def teardown()
61
+ #
62
+ end
63
+
64
+ end
@@ -0,0 +1,193 @@
1
+ #!/usr/bin/ruby
2
+ # -*- coding: us-ascii; -*-
3
+ require 'test/unit'
4
+ require 'docdiff'
5
+ require 'nkf'
6
+
7
+ class TC_Document < Test::Unit::TestCase
8
+
9
+ def setup()
10
+ #
11
+ end
12
+
13
+ def test_compare_by_line()
14
+ doc1 = Document.new("Foo bar.\nBaz quux.", 'US-ASCII', 'LF')
15
+ doc2 = Document.new("Foo.\nBaz quux.", 'US-ASCII', 'LF')
16
+ docdiff = DocDiff.new
17
+ expected = [[:change_elt, ["Foo bar.\n"], ["Foo.\n"]],
18
+ [:common_elt_elt, ['Baz quux.'], ['Baz quux.']]]
19
+ assert_equal(expected, docdiff.compare_by_line(doc1, doc2))
20
+ end
21
+ def test_compare_by_line_word()
22
+ doc1 = Document.new("a b c d\ne f", 'US-ASCII', 'LF')
23
+ doc2 = Document.new("a x c d\ne f", 'US-ASCII', 'LF')
24
+ docdiff = DocDiff.new
25
+ expected = [[:common_elt_elt, ["a "], ["a "]],
26
+ [:change_elt, ["b "], ["x "]],
27
+ [:common_elt_elt, ["c ", "d", "\n"], ["c ", "d", "\n"]],
28
+ [:common_elt_elt, ["e f"], ["e f"]]]
29
+ assert_equal(expected,
30
+ docdiff.compare_by_line_word(doc1, doc2))
31
+ end
32
+ def test_compare_by_line_word_char()
33
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
34
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
35
+ docdiff = DocDiff.new
36
+ expected = [[:common_elt_elt, ['foo '], ['foo ']],
37
+ [:common_elt_elt, ['b'], ['b']],
38
+ [:change_elt, ['a'], ['e', 'e']],
39
+ [:common_elt_elt, ['r'], ['r']],
40
+ [:common_elt_elt, ["\n"], ["\n"]],
41
+ [:common_elt_elt, ['baz'], ['baz']]]
42
+ assert_equal(expected,
43
+ docdiff.compare_by_line_word_char(doc1, doc2))
44
+ end
45
+
46
+ def test_run_line_html()
47
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
48
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
49
+ docdiff = DocDiff.new
50
+ expected = '<?xml version="1.0" encoding="US-ASCII"?>' + "\n" +
51
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' + "\n" +
52
+ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + "\n" +
53
+ '<html><head>' + "\n" +
54
+ '<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII" />' + "\n" +
55
+ '<title>Difference</title>' + "\n" +
56
+ '<style type="text/css">' + "\n" +
57
+ ' body {font-family: monospace;}' + "\n" +
58
+ ' span.del {background: hotpink; border: thin inset;}' + "\n" +
59
+ ' span.add {background: deepskyblue; font-weight: bolder; border: thin outset;}' + "\n" +
60
+ ' span.before-change {background: yellow; border: thin inset;}' + "\n" +
61
+ ' span.after-change {background: lime; font-weight: bolder; border: thin outset;}' + "\n" +
62
+ " li.entry .position {font-weight: bolder; margin-top: 0em; margin-bottom: 0em; padding-top: 0.5em; padding-bottom: 0em;}\n" +
63
+ " li.entry .body {margin-top: 0em; margin-bottom: 0em; padding-top: 0em; padding-bottom: 0.5em;}\n" +
64
+ " li.entry {border-top: thin solid gray;}\n" +
65
+ '</style>' + "\n" +
66
+ '</head><body><div>' + "\n" +
67
+ '<span class="before-change"><del>foo bar<br />' + "\n" + '</del></span>' +
68
+ '<span class="after-change"><ins>foo beer<br />' + "\n" + '</ins></span>' +
69
+ '<span class="common">baz' + "</span>" + "\n</div></body></html>" + "\n"
70
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "html", :digest => false}))
71
+ end
72
+
73
+ def test_run_line_manued()
74
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
75
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
76
+ docdiff = DocDiff.new
77
+ expected = "defparentheses [ ]\n" +
78
+ "defdelete /\n" +
79
+ "defswap |\n" +
80
+ "defcomment ;\n" +
81
+ "defescape ~\n" +
82
+ "deforder newer-last\n" +
83
+ "defversion 0.9.5\n" +
84
+ "[foo bar\n/foo beer\n]baz"
85
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "manued", :digest => false}))
86
+ end
87
+ def test_run_word_manued()
88
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
89
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
90
+ docdiff = DocDiff.new
91
+ expected = "defparentheses [ ]\n" +
92
+ "defdelete /\n" +
93
+ "defswap |\n" +
94
+ "defcomment ;\n" +
95
+ "defescape ~\n" +
96
+ "deforder newer-last\n" +
97
+ "defversion 0.9.5\n" +
98
+ "foo [bar/beer]\nbaz"
99
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "word", :format => "manued", :digest => false}))
100
+ end
101
+ def test_run_char_manued()
102
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
103
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
104
+ docdiff = DocDiff.new
105
+ expected = "defparentheses [ ]\n" +
106
+ "defdelete /\n" +
107
+ "defswap |\n" +
108
+ "defcomment ;\n" +
109
+ "defescape ~\n" +
110
+ "deforder newer-last\n" +
111
+ "defversion 0.9.5\n" +
112
+ "foo b[a/ee]r\nbaz"
113
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "char", :format => "manued", :digest => false}))
114
+ end
115
+
116
+ def test_parse_config_file_content()
117
+ content = ["# comment line\n",
118
+ " # comment line with leading space\n",
119
+ "foo1 = bar\n",
120
+ "foo2 = bar baz \n",
121
+ " foo3 = 123 # comment\n",
122
+ "foo4 = no \n",
123
+ "foo1 = tRue\n",
124
+ "\n",
125
+ "",
126
+ nil].join
127
+ expected = {:foo1=>true, :foo2=>"bar baz", :foo3=>123, :foo4=>false}
128
+ docdiff = DocDiff.new
129
+ assert_equal(expected,
130
+ DocDiff.parse_config_file_content(content))
131
+ end
132
+
133
+ def test_run_line_user()
134
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
135
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
136
+ config = {:tag_common_start => '<=>',
137
+ :tag_common_end => '</=>',
138
+ :tag_del_start => '<->',
139
+ :tag_del_end => '</->',
140
+ :tag_add_start => '<+>',
141
+ :tag_add_end => '</+>',
142
+ :tag_change_before_start => '<!->',
143
+ :tag_change_before_end => '</!->',
144
+ :tag_change_after_start => '<!+>',
145
+ :tag_change_after_end => '</!+>'}
146
+ docdiff = DocDiff.new
147
+ docdiff.config.update(config)
148
+ expected = "<!->foo bar\n</!-><!+>foo beer\n</!+><=>baz</=>"
149
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "user", :digest => false}))
150
+ end
151
+ def test_run_word_user()
152
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
153
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
154
+ config = {:tag_common_start => '<=>',
155
+ :tag_common_end => '</=>',
156
+ :tag_del_start => '<->',
157
+ :tag_del_end => '</->',
158
+ :tag_add_start => '<+>',
159
+ :tag_add_end => '</+>',
160
+ :tag_change_before_start => '<!->',
161
+ :tag_change_before_end => '</!->',
162
+ :tag_change_after_start => '<!+>',
163
+ :tag_change_after_end => '</!+>'}
164
+ docdiff = DocDiff.new
165
+ docdiff.config.update(config)
166
+ expected = "<=>foo </=><!->bar</!-><!+>beer</!+><=>\n</=><=>baz</=>"
167
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "word", :format => "user", :digest => false}))
168
+ end
169
+ def test_run_char_user()
170
+ doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
171
+ doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
172
+ config = {:tag_common_start => '<=>',
173
+ :tag_common_end => '</=>',
174
+ :tag_del_start => '<->',
175
+ :tag_del_end => '</->',
176
+ :tag_add_start => '<+>',
177
+ :tag_add_end => '</+>',
178
+ :tag_change_before_start => '<!->',
179
+ :tag_change_before_end => '</!->',
180
+ :tag_change_after_start => '<!+>',
181
+ :tag_change_after_end => '</!+>'}
182
+ docdiff = DocDiff.new
183
+ docdiff.config.update(config)
184
+ expected = "<=>foo </=><=>b</=><!->a</!-><!+>ee</!+><=>r</=><=>\n</=><=>baz</=>"
185
+ assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "char", :format => "user", :digest => false}))
186
+ end
187
+
188
+
189
+ def teardown()
190
+ #
191
+ end
192
+
193
+ end
@@ -0,0 +1,626 @@
1
+ #!/usr/bin/ruby
2
+ # -*- coding: euc-jp; -*-
3
+ require 'test/unit'
4
+ require 'docdiff/document'
5
+ require 'nkf'
6
+
7
+ class TC_Document < Test::Unit::TestCase
8
+
9
+ def setup()
10
+ #
11
+ end
12
+
13
+ def test_encoding()
14
+ doc = Document.new("Foo bar.\nBaz quux.")
15
+ doc.encoding = 'US-ASCII'
16
+ doc.eol = 'LF'
17
+ expected = 'US-ASCII'
18
+ assert_equal(expected, doc.encoding)
19
+ end
20
+ def test_encoding_auto()
21
+ doc = if CharString.ruby_m17n?
22
+ Document.new("Foo bar.\nBaz quux.".encode("US-ASCII"))
23
+ else
24
+ Document.new("Foo bar.\nBaz quux.")
25
+ end
26
+ expected = 'US-ASCII'
27
+ assert_equal(expected, doc.encoding)
28
+ end
29
+ def test_eol()
30
+ doc = Document.new("Foo bar.\nBaz quux.")
31
+ doc.encoding = 'US-ASCII'
32
+ doc.eol = 'LF'
33
+ expected = 'LF'
34
+ assert_equal(expected, doc.eol)
35
+ end
36
+ def test_eol_auto_lf()
37
+ doc = Document.new("Foo bar.\nBaz quux.")
38
+ expected = 'LF'
39
+ assert_equal(expected, doc.eol)
40
+ end
41
+ def test_eol_auto_none()
42
+ doc = Document.new("Foo bar.")
43
+ expected = "NONE"
44
+ assert_equal(expected, doc.eol)
45
+ end
46
+ def test_eol_char_lf()
47
+ doc = Document.new("Foo bar.\nBaz quux.")
48
+ # doc.encoding = "US-ASCII"
49
+ # doc.eol = "LF"
50
+ expected = "\n"
51
+ assert_equal(expected, doc.eol_char)
52
+ end
53
+ def test_split_by_line()
54
+ doc = Document.new("Hello, my name is Watanabe.\nI am just another Ruby porter.\n")
55
+ expected = ["Hello, my name is Watanabe.\n", "I am just another Ruby porter.\n"]
56
+ assert_equal(expected, doc.split_to_line)
57
+ end
58
+
59
+ # test eol split_to_line() method
60
+ def test_cr_split_to_line()
61
+ doc = Document.new("foo\rbar\r")
62
+ expected = ["foo\r", "bar\r"]
63
+ assert_equal(expected, doc.split_to_line)
64
+ end
65
+ def test_cr_split_to_line_chomped_lastline()
66
+ doc = Document.new("foo\rbar")
67
+ expected = ["foo\r", "bar"]
68
+ assert_equal(expected, doc.split_to_line)
69
+ end
70
+ def test_cr_split_to_line_empty_line()
71
+ doc = Document.new("foo\r\rbar\r")
72
+ expected = ["foo\r", "\r", "bar\r"]
73
+ assert_equal(expected, doc.split_to_line)
74
+ end
75
+ def test_lf_split_to_line()
76
+ doc = Document.new("foo\nbar\n")
77
+ expected = ["foo\n", "bar\n"]
78
+ assert_equal(expected, doc.split_to_line)
79
+ end
80
+ def test_lf_split_to_line_chomped_lastline()
81
+ doc = Document.new("foo\nbar")
82
+ expected = ["foo\n", "bar"]
83
+ assert_equal(expected, doc.split_to_line)
84
+ end
85
+ def test_lf_split_to_line_empty_line()
86
+ doc = Document.new("foo\n\nbar\n")
87
+ expected = ["foo\n", "\n", "bar\n"]
88
+ assert_equal(expected, doc.split_to_line)
89
+ end
90
+ def test_crlf_split_to_line()
91
+ doc = Document.new("foo\r\nbar\r\n")
92
+ expected = ["foo\r\n", "bar\r\n"]
93
+ assert_equal(expected, doc.split_to_line)
94
+ end
95
+ def test_crlf_split_to_line_chomped_lastline()
96
+ doc = Document.new("foo\r\nbar")
97
+ expected = ["foo\r\n", "bar"]
98
+ assert_equal(expected, doc.split_to_line)
99
+ end
100
+ def test_crlf_split_to_line_empty_line()
101
+ doc = Document.new("foo\r\n\r\nbar\r\n")
102
+ expected = ["foo\r\n", "\r\n", "bar\r\n"]
103
+ assert_equal(expected, doc.split_to_line)
104
+ end
105
+
106
+ # test ASCII module
107
+ def test_ascii_split_to_word()
108
+ doc = Document.new("foo bar")
109
+ expected = ["foo ", "bar"]
110
+ assert_equal(expected, doc.split_to_word)
111
+ end
112
+ def test_ascii_split_to_word_withsymbol()
113
+ doc = Document.new("foo (bar) baz-baz")
114
+ expected = ["foo ", "(bar) ", "baz-baz"]
115
+ assert_equal(expected, doc.split_to_word)
116
+ end
117
+ def test_ascii_split_to_word_withquote()
118
+ doc = Document.new("foo's 'foo' \"bar\" 'baz.'")
119
+ expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
120
+ assert_equal(expected, doc.split_to_word)
121
+ end
122
+ def test_ascii_split_to_word_withlongspace()
123
+ doc = Document.new(" foo bar")
124
+ expected = [" ", "foo ", " ", "bar"]
125
+ assert_equal(expected, doc.split_to_word)
126
+ end
127
+ def test_ascii_split_to_word_withdash()
128
+ doc = Document.new("foo -- bar, baz - quux")
129
+ expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
130
+ assert_equal(expected, doc.split_to_word)
131
+ end
132
+ def test_ascii_split_to_char()
133
+ doc = Document.new("foo bar")
134
+ expected = ["f","o","o"," ","b","a","r"]
135
+ assert_equal(expected, doc.split_to_char)
136
+ end
137
+ def test_ascii_split_to_char_with_eol_cr()
138
+ doc = Document.new("foo bar\r")
139
+ expected = ["f","o","o"," ","b","a","r","\r"]
140
+ assert_equal(expected, doc.split_to_char)
141
+ end
142
+ def test_ascii_split_to_char_with_eol_lf()
143
+ doc = Document.new("foo bar\n")
144
+ expected = ["f","o","o"," ","b","a","r","\n"]
145
+ assert_equal(expected, doc.split_to_char)
146
+ end
147
+ def test_ascii_split_to_char_with_eol_crlf()
148
+ doc = Document.new("foo bar\r\n")
149
+ expected = ["f","o","o"," ","b","a","r","\r\n"]
150
+ assert_equal(expected, doc.split_to_char)
151
+ end
152
+ def test_ascii_split_to_byte()
153
+ doc = Document.new("foo bar\r\n")
154
+ expected = ["f","o","o"," ","b","a","r","\r","\n"]
155
+ assert_equal(expected, doc.split_to_byte)
156
+ end
157
+ def test_ascii_count_byte()
158
+ doc = Document.new("foo bar\r\n")
159
+ expected = 9
160
+ assert_equal(expected, doc.count_byte)
161
+ end
162
+ def test_ascii_count_char()
163
+ doc = Document.new("foo bar\r\nbaz quux\r\n")
164
+ expected = 17
165
+ assert_equal(expected, doc.count_char)
166
+ end
167
+ def test_ascii_count_latin_graph_char()
168
+ doc = Document.new("foo bar\r\nbaz quux\r\n")
169
+ expected = 13
170
+ assert_equal(expected, doc.count_latin_graph_char)
171
+ end
172
+ def test_ascii_count_graph_char()
173
+ doc = Document.new("foo bar\r\nbaz quux\r\n")
174
+ expected = 13
175
+ assert_equal(expected, doc.count_graph_char)
176
+ end
177
+ def test_ascii_count_latin_blank_char()
178
+ doc = Document.new("foo bar\r\nbaz\tquux\r\n")
179
+ expected = 2
180
+ assert_equal(expected, doc.count_latin_blank_char)
181
+ end
182
+ def test_ascii_count_blank_char()
183
+ doc = Document.new("foo bar\r\nbaz\tquux\r\n")
184
+ expected = 2
185
+ assert_equal(expected, doc.count_blank_char)
186
+ end
187
+ def test_ascii_count_word()
188
+ doc = Document.new("foo bar \r\nbaz quux\r\n")
189
+ expected = 6
190
+ assert_equal(expected, doc.count_word)
191
+ end
192
+ def test_ascii_count_latin_word()
193
+ doc = Document.new("foo bar \r\nbaz quux\r\n")
194
+ expected = 5 # " " is also counted as a word
195
+ assert_equal(expected, doc.count_latin_word)
196
+ end
197
+ def test_ascii_count_latin_valid_word()
198
+ doc = Document.new("1 foo \r\n%%% ()\r\n")
199
+ expected = 2
200
+ assert_equal(expected, doc.count_latin_valid_word)
201
+ end
202
+ def test_ascii_count_line()
203
+ doc = Document.new("foo\r\nbar")
204
+ expected = 2
205
+ assert_equal(expected, doc.count_line)
206
+ end
207
+ def test_ascii_count_graph_line()
208
+ doc = Document.new("foo\r\n ")
209
+ expected = 1
210
+ assert_equal(expected, doc.count_graph_line)
211
+ end
212
+ def test_ascii_count_empty_line()
213
+ doc = Document.new("foo\r\n \r\n\t\r\n\r\n")
214
+ expected = 1
215
+ assert_equal(expected, doc.count_empty_line)
216
+ end
217
+ def test_ascii_count_blank_line()
218
+ doc = Document.new("\r\n \r\n\t\r\n ")
219
+ expected = 3
220
+ assert_equal(expected, doc.count_blank_line)
221
+ end
222
+
223
+ # test EUCJP module
224
+ def test_eucjp_split_to_word()
225
+ doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��foo bar"))
226
+ expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
227
+ assert_equal(expected, doc.split_to_word)
228
+ end
229
+ def test_eucjp_split_to_word_kanhira()
230
+ doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��"))
231
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
232
+ assert_equal(expected, doc.split_to_word)
233
+ end
234
+ def test_eucjp_split_to_word_katahira()
235
+ doc = Document.new(NKF.nkf("-e", "�������ʤ�ʸ��"))
236
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
237
+ assert_equal(expected, doc.split_to_word)
238
+ end
239
+ def test_eucjp_split_to_word_kataonbiki()
240
+ doc = Document.new(NKF.nkf("-e", "��ӡ�������"), "EUC-JP")
241
+ expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
242
+ assert_equal(expected, doc.split_to_word)
243
+ end
244
+ def test_eucjp_split_to_word_hiraonbiki()
245
+ doc = Document.new(NKF.nkf("-e", "���ӡ���"), "EUC-JP")
246
+ expected = (["�", "��ӡ���"]).collect{|c| NKF.nkf("-e", c)}
247
+ assert_equal(expected, doc.split_to_word)
248
+ end
249
+ def test_eucjp_split_to_word_latinmix()
250
+ doc = Document.new(NKF.nkf("-e", "���ܸ��Latin��ʸ��"))
251
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
252
+ assert_equal(expected, doc.split_to_word)
253
+ end
254
+ def test_eucjp_split_to_char()
255
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b"))
256
+ expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
257
+ assert_equal(expected, doc.split_to_char)
258
+ end
259
+ def test_eucjp_split_to_char_with_cr()
260
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r"))
261
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
262
+ assert_equal(expected, doc.split_to_char)
263
+ end
264
+ def test_eucjp_split_to_char_with_lf()
265
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\n"))
266
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
267
+ assert_equal(expected, doc.split_to_char)
268
+ end
269
+ def test_eucjp_split_to_char_with_crlf()
270
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
271
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
272
+ assert_equal(expected, doc.split_to_char)
273
+ end
274
+ def test_eucjp_count_char()
275
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
276
+ expected = 7
277
+ assert_equal(expected, doc.count_char)
278
+ end
279
+ def test_eucjp_count_latin_graph_char()
280
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
281
+ expected = 2
282
+ assert_equal(expected, doc.count_latin_graph_char)
283
+ end
284
+ def test_eucjp_count_ja_graph_char()
285
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
286
+ expected = 3
287
+ assert_equal(expected, doc.count_ja_graph_char)
288
+ end
289
+ def test_eucjp_count_graph_char()
290
+ doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
291
+ expected = 5
292
+ assert_equal(expected, doc.count_graph_char)
293
+ end
294
+ def test_eucjp_count_latin_blank_char()
295
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\ta b\r\n"))
296
+ expected = 2
297
+ assert_equal(expected, doc.count_latin_blank_char)
298
+ end
299
+ def test_eucjp_count_ja_blank_char()
300
+ doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
301
+ expected = 1
302
+ assert_equal(expected, doc.count_ja_blank_char)
303
+ end
304
+ def test_eucjp_count_blank_char()
305
+ doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
306
+ expected = 3
307
+ assert_equal(expected, doc.count_blank_char)
308
+ end
309
+ def test_eucjp_count_word()
310
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
311
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
312
+ assert_equal(expected, doc.count_word)
313
+ end
314
+ def test_eucjp_count_ja_word()
315
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
316
+ expected = 3
317
+ assert_equal(expected, doc.count_ja_word)
318
+ end
319
+ def test_eucjp_count_latin_valid_word()
320
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
321
+ expected = 2
322
+ assert_equal(expected, doc.count_latin_valid_word)
323
+ end
324
+ def test_eucjp_count_ja_valid_word()
325
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
326
+ expected = 2
327
+ assert_equal(expected, doc.count_ja_valid_word)
328
+ end
329
+ def test_eucjp_count_valid_word()
330
+ doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
331
+ expected = 4
332
+ assert_equal(expected, doc.count_valid_word)
333
+ end
334
+ def test_eucjp_count_line()
335
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
336
+ expected = 6
337
+ assert_equal(expected, doc.count_line)
338
+ end
339
+ def test_eucjp_count_graph_line()
340
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
341
+ expected = 3
342
+ assert_equal(expected, doc.count_graph_line)
343
+ end
344
+ def test_eucjp_count_empty_line()
345
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
346
+ expected = 1
347
+ assert_equal(expected, doc.count_empty_line)
348
+ end
349
+ def test_eucjp_count_blank_line()
350
+ doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
351
+ expected = 2
352
+ assert_equal(expected, doc.count_blank_line)
353
+ end
354
+
355
+ # test SJIS module
356
+ def test_sjis_split_to_word()
357
+ doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��foo bar"))
358
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
359
+ assert_equal(expected, doc.split_to_word)
360
+ end
361
+ def test_sjisplit_s_to_word_kanhira()
362
+ doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��"))
363
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
364
+ assert_equal(expected, doc.split_to_word)
365
+ end
366
+ def test_sjis_split_to_word_katahira()
367
+ doc = Document.new(NKF.nkf("-s", "�������ʤ�ʸ��"))
368
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
369
+ assert_equal(expected, doc.split_to_word)
370
+ end
371
+ def test_sjis_split_to_word_kataonbiki()
372
+ doc = Document.new(NKF.nkf("-s", "��ӡ��λ���"))
373
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
374
+ assert_equal(expected, doc.split_to_word)
375
+ end
376
+ def test_sjis_split_to_word_hiraonbiki()
377
+ doc = Document.new(NKF.nkf("-s", "���ӡ���"))
378
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
379
+ assert_equal(expected, doc.split_to_word)
380
+ end
381
+ def test_sjis_split_to_word_latinmix()
382
+ doc = Document.new(NKF.nkf("-s", "���ܸ��Latin��ʸ��"))
383
+ expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
384
+ assert_equal(expected, doc.split_to_word)
385
+ end
386
+ def test_sjis_split_to_char()
387
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b"))
388
+ expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
389
+ assert_equal(expected, doc.split_to_char)
390
+ end
391
+ def test_sjis_split_to_char_with_cr()
392
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r"))
393
+ expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
394
+ assert_equal(expected, doc.split_to_char)
395
+ end
396
+ def test_sjis_split_to_char_with_lf()
397
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\n"))
398
+ expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
399
+ assert_equal(expected, doc.split_to_char)
400
+ end
401
+ def test_sjis_split_to_char_with_crlf()
402
+ doc = Document.new(NKF.nkf("-s", "ɽ�׻�a b\r\n"))
403
+ expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
404
+ assert_equal(expected, doc.split_to_char)
405
+ end
406
+ def test_sjis_count_char()
407
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
408
+ expected = 7
409
+ assert_equal(expected, doc.count_char)
410
+ end
411
+ def test_sjis_count_latin_graph_char()
412
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
413
+ expected = 2
414
+ assert_equal(expected, doc.count_latin_graph_char)
415
+ end
416
+ def test_sjis_count_ja_graph_char()
417
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
418
+ expected = 3
419
+ assert_equal(expected, doc.count_ja_graph_char)
420
+ end
421
+ def test_sjis_count_graph_char()
422
+ doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
423
+ expected = 5
424
+ assert_equal(expected, doc.count_graph_char)
425
+ end
426
+ def test_sjis_count_latin_blank_char()
427
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\ta b\r\n"))
428
+ expected = 2
429
+ assert_equal(expected, doc.count_latin_blank_char)
430
+ end
431
+ def test_sjis_count_ja_blank_char()
432
+ doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
433
+ expected = 1
434
+ assert_equal(expected, doc.count_ja_blank_char)
435
+ end
436
+ def test_sjis_count_blank_char()
437
+ doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
438
+ expected = 3
439
+ assert_equal(expected, doc.count_blank_char)
440
+ end
441
+ def test_sjis_count_word()
442
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
443
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
444
+ assert_equal(expected, doc.count_word)
445
+ end
446
+ def test_sjis_count_ja_word()
447
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
448
+ expected = 3
449
+ assert_equal(expected, doc.count_ja_word)
450
+ end
451
+ def test_sjis_count_latin_valid_word()
452
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
453
+ expected = 2
454
+ assert_equal(expected, doc.count_latin_valid_word)
455
+ end
456
+ def test_sjis_count_ja_valid_word()
457
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
458
+ expected = 2
459
+ assert_equal(expected, doc.count_ja_valid_word)
460
+ end
461
+ def test_sjis_count_valid_word()
462
+ doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
463
+ expected = 4
464
+ assert_equal(expected, doc.count_valid_word)
465
+ end
466
+ def test_sjis_count_line()
467
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
468
+ expected = 6
469
+ assert_equal(expected, doc.count_line)
470
+ end
471
+ def test_sjis_count_graph_line()
472
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
473
+ expected = 3
474
+ assert_equal(expected, doc.count_graph_line)
475
+ end
476
+ def test_sjis_count_empty_line()
477
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
478
+ expected = 1
479
+ assert_equal(expected, doc.count_empty_line)
480
+ end
481
+ def test_sjis_count_blank_line()
482
+ doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
483
+ expected = 2
484
+ assert_equal(expected, doc.count_blank_line)
485
+ end
486
+
487
+ # test UTF8 module
488
+ def test_utf8_split_to_word()
489
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��foo bar"))
490
+ expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
491
+ assert_equal(expected, doc.split_to_word)
492
+ end
493
+ def test_utf8_split_to_word_kanhira()
494
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��"))
495
+ expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
496
+ assert_equal(expected, doc.split_to_word)
497
+ end
498
+ def test_utf8_split_to_word_katahira()
499
+ doc = Document.new(NKF.nkf("-E -w", "�������ʤ�ʸ��"))
500
+ expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
501
+ assert_equal(expected, doc.split_to_word)
502
+ end
503
+ def test_utf8_split_to_word_kataonbiki()
504
+ doc = Document.new(NKF.nkf("-E -w", "��ӡ��λ���"))
505
+ expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
506
+ assert_equal(expected, doc.split_to_word)
507
+ end
508
+ def test_utf8_split_to_word_hiraonbiki()
509
+ doc = Document.new(NKF.nkf("-E -w", "���ӡ���"))
510
+ expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
511
+ assert_equal(expected, doc.split_to_word)
512
+ end
513
+ def test_utf8_split_to_word_latinmix()
514
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ��Latin��ʸ��"))
515
+ expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
516
+ assert_equal(expected, doc.split_to_word)
517
+ end
518
+ def test_utf8_split_to_char()
519
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b"), "UTF-8")
520
+ expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
521
+ assert_equal(expected, doc.split_to_char)
522
+ end
523
+ def test_utf8_split_to_char_with_cr()
524
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r"), "UTF-8")
525
+ expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
526
+ assert_equal(expected, doc.split_to_char)
527
+ end
528
+ def test_utf8_split_to_char_with_lf()
529
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\n"), "UTF-8")
530
+ expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
531
+ assert_equal(expected, doc.split_to_char)
532
+ end
533
+ def test_utf8_split_to_char_with_crlf()
534
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
535
+ expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
536
+ assert_equal(expected, doc.split_to_char)
537
+ end
538
+ def test_utf8_count_char()
539
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
540
+ expected = 7
541
+ assert_equal(expected, doc.count_char)
542
+ end
543
+ def test_utf8_count_latin_graph_char()
544
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
545
+ expected = 2
546
+ assert_equal(expected, doc.count_latin_graph_char)
547
+ end
548
+ def test_utf8_count_ja_graph_char()
549
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
550
+ expected = 3
551
+ assert_equal(expected, doc.count_ja_graph_char)
552
+ end
553
+ def test_utf8_count_graph_char()
554
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
555
+ expected = 5
556
+ assert_equal(expected, doc.count_graph_char)
557
+ end
558
+ def test_utf8_count_latin_blank_char()
559
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\ta b\r\n"))
560
+ expected = 2
561
+ assert_equal(expected, doc.count_latin_blank_char)
562
+ end
563
+ def test_utf8_count_ja_blank_char()
564
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
565
+ expected = 1
566
+ assert_equal(expected, doc.count_ja_blank_char)
567
+ end
568
+ def test_utf8_count_blank_char()
569
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
570
+ expected = 3
571
+ assert_equal(expected, doc.count_blank_char)
572
+ end
573
+ def test_utf8_count_word()
574
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
575
+ expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
576
+ assert_equal(expected, doc.count_word)
577
+ end
578
+ def test_utf8_count_ja_word()
579
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
580
+ expected = 3
581
+ assert_equal(expected, doc.count_ja_word)
582
+ end
583
+ def test_utf8_count_latin_valid_word()
584
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
585
+ expected = 2
586
+ assert_equal(expected, doc.count_latin_valid_word)
587
+ end
588
+ def test_utf8_count_ja_valid_word()
589
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
590
+ expected = 2
591
+ assert_equal(expected, doc.count_ja_valid_word)
592
+ end
593
+ def test_utf8_count_valid_word()
594
+ doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
595
+ expected = 4
596
+ assert_equal(expected, doc.count_valid_word)
597
+ end
598
+ def test_utf8_count_line()
599
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
600
+ expected = 6
601
+ assert_equal(expected, doc.count_line)
602
+ end
603
+ def test_utf8_count_graph_line()
604
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
605
+ expected = 3
606
+ assert_equal(expected, doc.count_graph_line)
607
+ end
608
+ def test_utf8_count_empty_line()
609
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
610
+ expected = 1
611
+ assert_equal(expected, doc.count_empty_line)
612
+ end
613
+ def test_utf8_count_blank_line()
614
+ doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
615
+ expected = 2
616
+ assert_equal(expected, doc.count_blank_line)
617
+ end
618
+
619
+
620
+
621
+
622
+ def teardown()
623
+ #
624
+ end
625
+
626
+ end