docdiff 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/.travis.yml +7 -0
- data/Gemfile +17 -0
- data/Guardfile +8 -0
- data/Makefile +108 -0
- data/Rakefile +17 -0
- data/bin/docdiff +179 -0
- data/devutil/JIS0208.TXT +6952 -0
- data/devutil/char_by_charclass.rb +23 -0
- data/devutil/charclass_by_char.rb +21 -0
- data/devutil/jis0208.rb +343 -0
- data/devutil/testjis0208.rb +38 -0
- data/docdiff.conf.example +22 -0
- data/docdiff.gemspec +23 -0
- data/docdiffwebui.cgi +176 -0
- data/docdiffwebui.html +123 -0
- data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
- data/img/docdiff-screenshot-format-html-firefox.png +0 -0
- data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
- data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
- data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
- data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
- data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
- data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
- data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
- data/index.html +181 -0
- data/langfilter.rb +14 -0
- data/lib/doc_diff.rb +170 -0
- data/lib/docdiff.rb +7 -0
- data/lib/docdiff/charstring.rb +579 -0
- data/lib/docdiff/diff.rb +217 -0
- data/lib/docdiff/diff/contours.rb +382 -0
- data/lib/docdiff/diff/editscript.rb +148 -0
- data/lib/docdiff/diff/rcsdiff.rb +107 -0
- data/lib/docdiff/diff/shortestpath.rb +93 -0
- data/lib/docdiff/diff/speculative.rb +40 -0
- data/lib/docdiff/diff/subsequence.rb +39 -0
- data/lib/docdiff/diff/unidiff.rb +124 -0
- data/lib/docdiff/difference.rb +92 -0
- data/lib/docdiff/document.rb +127 -0
- data/lib/docdiff/encoding/en_ascii.rb +97 -0
- data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
- data/lib/docdiff/encoding/ja_sjis.rb +260 -0
- data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
- data/lib/docdiff/version.rb +3 -0
- data/lib/docdiff/view.rb +476 -0
- data/lib/viewdiff.rb +375 -0
- data/readme.html +713 -0
- data/sample/01.en.ascii.cr +1 -0
- data/sample/01.en.ascii.crlf +2 -0
- data/sample/01.en.ascii.lf +2 -0
- data/sample/01.ja.eucjp.lf +2 -0
- data/sample/01.ja.sjis.cr +1 -0
- data/sample/01.ja.sjis.crlf +2 -0
- data/sample/01.ja.utf8.crlf +2 -0
- data/sample/02.en.ascii.cr +1 -0
- data/sample/02.en.ascii.crlf +2 -0
- data/sample/02.en.ascii.lf +2 -0
- data/sample/02.ja.eucjp.lf +2 -0
- data/sample/02.ja.sjis.cr +1 -0
- data/sample/02.ja.sjis.crlf +2 -0
- data/sample/02.ja.utf8.crlf +2 -0
- data/sample/humpty_dumpty01.ascii.lf +4 -0
- data/sample/humpty_dumpty02.ascii.lf +4 -0
- data/test/charstring_test.rb +1008 -0
- data/test/diff_test.rb +36 -0
- data/test/difference_test.rb +64 -0
- data/test/docdiff_test.rb +193 -0
- data/test/document_test.rb +626 -0
- data/test/test_helper.rb +7 -0
- data/test/view_test.rb +570 -0
- data/test/viewdiff_test.rb +908 -0
- metadata +129 -0
data/test/diff_test.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require 'test/unit'
|
3
|
+
require "docdiff/diff"
|
4
|
+
|
5
|
+
class TC_Diff < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup()
|
8
|
+
#
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_new_ses()
|
12
|
+
a1 = [:a, :b, :c]
|
13
|
+
a2 = [:a, :x, :c]
|
14
|
+
expected = [[:common_elt_elt, [:a], [:a]],
|
15
|
+
[:del_elt, [:b], nil],
|
16
|
+
[:add_elt, nil, [:x]],
|
17
|
+
[:common_elt_elt, [:c], [:c]]]
|
18
|
+
actual = []
|
19
|
+
actual_speculative = []
|
20
|
+
actual_shortestpath = []
|
21
|
+
actual_contours = []
|
22
|
+
Diff.new(a1, a2).ses .each{|e| actual << e}
|
23
|
+
Diff.new(a1, a2).ses(:speculative ).each{|e| actual_speculative << e}
|
24
|
+
Diff.new(a1, a2).ses(:shortestpath).each{|e| actual_shortestpath << e}
|
25
|
+
Diff.new(a1, a2).ses(:contours ).each{|e| actual_contours << e}
|
26
|
+
assert_equal(expected, actual)
|
27
|
+
assert_equal(expected, actual_speculative)
|
28
|
+
assert_equal(expected, actual_shortestpath)
|
29
|
+
assert_equal(expected, actual_contours)
|
30
|
+
end
|
31
|
+
|
32
|
+
def teardown()
|
33
|
+
#
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
require 'test/unit'
|
3
|
+
require 'docdiff/difference'
|
4
|
+
|
5
|
+
class TC_Difference < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup()
|
8
|
+
#
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_new()
|
12
|
+
array1 = [:a, :b, :c]
|
13
|
+
array2 = [:a, :x, :c]
|
14
|
+
expected = [[:common_elt_elt, [:a], [:a]],
|
15
|
+
[:change_elt, [:b], [:x]],
|
16
|
+
[:common_elt_elt, [:c], [:c]]]
|
17
|
+
assert_equal(expected, Difference.new(array1, array2))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_raw_list()
|
21
|
+
array1 = [:a, :b, :c]
|
22
|
+
array2 = [:a, :x, :c]
|
23
|
+
expected = [[:common_elt_elt, [:a], [:a]],
|
24
|
+
[:del_elt, [:b], nil],
|
25
|
+
[:add_elt, nil, [:x]],
|
26
|
+
[:common_elt_elt, [:c], [:c]]]
|
27
|
+
assert_equal(expected, Difference.new(array1, array2).raw_list)
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_former_only()
|
31
|
+
array1 = [:a, :b, :c]
|
32
|
+
array2 = [:a, :x, :c]
|
33
|
+
expected = [[:common_elt_elt, [:a], [:a]],
|
34
|
+
[:change_elt, [:b], nil],
|
35
|
+
[:common_elt_elt, [:c], [:c]]]
|
36
|
+
assert_equal(expected, Difference.new(array1, array2).former_only)
|
37
|
+
array1 = [:a, :b, :c]
|
38
|
+
array2 = [:a, :c, :d]
|
39
|
+
expected = [[:common_elt_elt, [:a], [:a]],
|
40
|
+
[:del_elt, [:b], nil],
|
41
|
+
[:common_elt_elt, [:c], [:c]]]
|
42
|
+
assert_equal(expected, Difference.new(array1, array2).former_only)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_latter_only()
|
46
|
+
array1 = [:a, :b, :c]
|
47
|
+
array2 = [:a, :x, :c]
|
48
|
+
expected = [[:common_elt_elt, [:a], [:a]],
|
49
|
+
[:change_elt, nil, [:x]],
|
50
|
+
[:common_elt_elt, [:c], [:c]]]
|
51
|
+
assert_equal(expected, Difference.new(array1, array2).latter_only)
|
52
|
+
array1 = [:a, :b, :c]
|
53
|
+
array2 = [:a, :c, :d]
|
54
|
+
expected = [[:common_elt_elt, [:a], [:a]],
|
55
|
+
[:common_elt_elt, [:c], [:c]],
|
56
|
+
[:add_elt, nil, [:d]]]
|
57
|
+
assert_equal(expected, Difference.new(array1, array2).latter_only)
|
58
|
+
end
|
59
|
+
|
60
|
+
def teardown()
|
61
|
+
#
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# -*- coding: us-ascii; -*-
|
3
|
+
require 'test/unit'
|
4
|
+
require 'docdiff'
|
5
|
+
require 'nkf'
|
6
|
+
|
7
|
+
class TC_Document < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup()
|
10
|
+
#
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_compare_by_line()
|
14
|
+
doc1 = Document.new("Foo bar.\nBaz quux.", 'US-ASCII', 'LF')
|
15
|
+
doc2 = Document.new("Foo.\nBaz quux.", 'US-ASCII', 'LF')
|
16
|
+
docdiff = DocDiff.new
|
17
|
+
expected = [[:change_elt, ["Foo bar.\n"], ["Foo.\n"]],
|
18
|
+
[:common_elt_elt, ['Baz quux.'], ['Baz quux.']]]
|
19
|
+
assert_equal(expected, docdiff.compare_by_line(doc1, doc2))
|
20
|
+
end
|
21
|
+
def test_compare_by_line_word()
|
22
|
+
doc1 = Document.new("a b c d\ne f", 'US-ASCII', 'LF')
|
23
|
+
doc2 = Document.new("a x c d\ne f", 'US-ASCII', 'LF')
|
24
|
+
docdiff = DocDiff.new
|
25
|
+
expected = [[:common_elt_elt, ["a "], ["a "]],
|
26
|
+
[:change_elt, ["b "], ["x "]],
|
27
|
+
[:common_elt_elt, ["c ", "d", "\n"], ["c ", "d", "\n"]],
|
28
|
+
[:common_elt_elt, ["e f"], ["e f"]]]
|
29
|
+
assert_equal(expected,
|
30
|
+
docdiff.compare_by_line_word(doc1, doc2))
|
31
|
+
end
|
32
|
+
def test_compare_by_line_word_char()
|
33
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
34
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
35
|
+
docdiff = DocDiff.new
|
36
|
+
expected = [[:common_elt_elt, ['foo '], ['foo ']],
|
37
|
+
[:common_elt_elt, ['b'], ['b']],
|
38
|
+
[:change_elt, ['a'], ['e', 'e']],
|
39
|
+
[:common_elt_elt, ['r'], ['r']],
|
40
|
+
[:common_elt_elt, ["\n"], ["\n"]],
|
41
|
+
[:common_elt_elt, ['baz'], ['baz']]]
|
42
|
+
assert_equal(expected,
|
43
|
+
docdiff.compare_by_line_word_char(doc1, doc2))
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_run_line_html()
|
47
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
48
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
49
|
+
docdiff = DocDiff.new
|
50
|
+
expected = '<?xml version="1.0" encoding="US-ASCII"?>' + "\n" +
|
51
|
+
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' + "\n" +
|
52
|
+
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + "\n" +
|
53
|
+
'<html><head>' + "\n" +
|
54
|
+
'<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII" />' + "\n" +
|
55
|
+
'<title>Difference</title>' + "\n" +
|
56
|
+
'<style type="text/css">' + "\n" +
|
57
|
+
' body {font-family: monospace;}' + "\n" +
|
58
|
+
' span.del {background: hotpink; border: thin inset;}' + "\n" +
|
59
|
+
' span.add {background: deepskyblue; font-weight: bolder; border: thin outset;}' + "\n" +
|
60
|
+
' span.before-change {background: yellow; border: thin inset;}' + "\n" +
|
61
|
+
' span.after-change {background: lime; font-weight: bolder; border: thin outset;}' + "\n" +
|
62
|
+
" li.entry .position {font-weight: bolder; margin-top: 0em; margin-bottom: 0em; padding-top: 0.5em; padding-bottom: 0em;}\n" +
|
63
|
+
" li.entry .body {margin-top: 0em; margin-bottom: 0em; padding-top: 0em; padding-bottom: 0.5em;}\n" +
|
64
|
+
" li.entry {border-top: thin solid gray;}\n" +
|
65
|
+
'</style>' + "\n" +
|
66
|
+
'</head><body><div>' + "\n" +
|
67
|
+
'<span class="before-change"><del>foo bar<br />' + "\n" + '</del></span>' +
|
68
|
+
'<span class="after-change"><ins>foo beer<br />' + "\n" + '</ins></span>' +
|
69
|
+
'<span class="common">baz' + "</span>" + "\n</div></body></html>" + "\n"
|
70
|
+
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "html", :digest => false}))
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_run_line_manued()
|
74
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
75
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
76
|
+
docdiff = DocDiff.new
|
77
|
+
expected = "defparentheses [ ]\n" +
|
78
|
+
"defdelete /\n" +
|
79
|
+
"defswap |\n" +
|
80
|
+
"defcomment ;\n" +
|
81
|
+
"defescape ~\n" +
|
82
|
+
"deforder newer-last\n" +
|
83
|
+
"defversion 0.9.5\n" +
|
84
|
+
"[foo bar\n/foo beer\n]baz"
|
85
|
+
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "manued", :digest => false}))
|
86
|
+
end
|
87
|
+
def test_run_word_manued()
|
88
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
89
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
90
|
+
docdiff = DocDiff.new
|
91
|
+
expected = "defparentheses [ ]\n" +
|
92
|
+
"defdelete /\n" +
|
93
|
+
"defswap |\n" +
|
94
|
+
"defcomment ;\n" +
|
95
|
+
"defescape ~\n" +
|
96
|
+
"deforder newer-last\n" +
|
97
|
+
"defversion 0.9.5\n" +
|
98
|
+
"foo [bar/beer]\nbaz"
|
99
|
+
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "word", :format => "manued", :digest => false}))
|
100
|
+
end
|
101
|
+
def test_run_char_manued()
|
102
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
103
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
104
|
+
docdiff = DocDiff.new
|
105
|
+
expected = "defparentheses [ ]\n" +
|
106
|
+
"defdelete /\n" +
|
107
|
+
"defswap |\n" +
|
108
|
+
"defcomment ;\n" +
|
109
|
+
"defescape ~\n" +
|
110
|
+
"deforder newer-last\n" +
|
111
|
+
"defversion 0.9.5\n" +
|
112
|
+
"foo b[a/ee]r\nbaz"
|
113
|
+
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "char", :format => "manued", :digest => false}))
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_parse_config_file_content()
|
117
|
+
content = ["# comment line\n",
|
118
|
+
" # comment line with leading space\n",
|
119
|
+
"foo1 = bar\n",
|
120
|
+
"foo2 = bar baz \n",
|
121
|
+
" foo3 = 123 # comment\n",
|
122
|
+
"foo4 = no \n",
|
123
|
+
"foo1 = tRue\n",
|
124
|
+
"\n",
|
125
|
+
"",
|
126
|
+
nil].join
|
127
|
+
expected = {:foo1=>true, :foo2=>"bar baz", :foo3=>123, :foo4=>false}
|
128
|
+
docdiff = DocDiff.new
|
129
|
+
assert_equal(expected,
|
130
|
+
DocDiff.parse_config_file_content(content))
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_run_line_user()
|
134
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
135
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
136
|
+
config = {:tag_common_start => '<=>',
|
137
|
+
:tag_common_end => '</=>',
|
138
|
+
:tag_del_start => '<->',
|
139
|
+
:tag_del_end => '</->',
|
140
|
+
:tag_add_start => '<+>',
|
141
|
+
:tag_add_end => '</+>',
|
142
|
+
:tag_change_before_start => '<!->',
|
143
|
+
:tag_change_before_end => '</!->',
|
144
|
+
:tag_change_after_start => '<!+>',
|
145
|
+
:tag_change_after_end => '</!+>'}
|
146
|
+
docdiff = DocDiff.new
|
147
|
+
docdiff.config.update(config)
|
148
|
+
expected = "<!->foo bar\n</!-><!+>foo beer\n</!+><=>baz</=>"
|
149
|
+
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "line", :format => "user", :digest => false}))
|
150
|
+
end
|
151
|
+
def test_run_word_user()
|
152
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
153
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
154
|
+
config = {:tag_common_start => '<=>',
|
155
|
+
:tag_common_end => '</=>',
|
156
|
+
:tag_del_start => '<->',
|
157
|
+
:tag_del_end => '</->',
|
158
|
+
:tag_add_start => '<+>',
|
159
|
+
:tag_add_end => '</+>',
|
160
|
+
:tag_change_before_start => '<!->',
|
161
|
+
:tag_change_before_end => '</!->',
|
162
|
+
:tag_change_after_start => '<!+>',
|
163
|
+
:tag_change_after_end => '</!+>'}
|
164
|
+
docdiff = DocDiff.new
|
165
|
+
docdiff.config.update(config)
|
166
|
+
expected = "<=>foo </=><!->bar</!-><!+>beer</!+><=>\n</=><=>baz</=>"
|
167
|
+
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "word", :format => "user", :digest => false}))
|
168
|
+
end
|
169
|
+
def test_run_char_user()
|
170
|
+
doc1 = Document.new("foo bar\nbaz", 'US-ASCII', 'LF')
|
171
|
+
doc2 = Document.new("foo beer\nbaz", 'US-ASCII', 'LF')
|
172
|
+
config = {:tag_common_start => '<=>',
|
173
|
+
:tag_common_end => '</=>',
|
174
|
+
:tag_del_start => '<->',
|
175
|
+
:tag_del_end => '</->',
|
176
|
+
:tag_add_start => '<+>',
|
177
|
+
:tag_add_end => '</+>',
|
178
|
+
:tag_change_before_start => '<!->',
|
179
|
+
:tag_change_before_end => '</!->',
|
180
|
+
:tag_change_after_start => '<!+>',
|
181
|
+
:tag_change_after_end => '</!+>'}
|
182
|
+
docdiff = DocDiff.new
|
183
|
+
docdiff.config.update(config)
|
184
|
+
expected = "<=>foo </=><=>b</=><!->a</!-><!+>ee</!+><=>r</=><=>\n</=><=>baz</=>"
|
185
|
+
assert_equal(expected, docdiff.run(doc1, doc2, {:resolution => "char", :format => "user", :digest => false}))
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
def teardown()
|
190
|
+
#
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
@@ -0,0 +1,626 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# -*- coding: euc-jp; -*-
|
3
|
+
require 'test/unit'
|
4
|
+
require 'docdiff/document'
|
5
|
+
require 'nkf'
|
6
|
+
|
7
|
+
class TC_Document < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup()
|
10
|
+
#
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_encoding()
|
14
|
+
doc = Document.new("Foo bar.\nBaz quux.")
|
15
|
+
doc.encoding = 'US-ASCII'
|
16
|
+
doc.eol = 'LF'
|
17
|
+
expected = 'US-ASCII'
|
18
|
+
assert_equal(expected, doc.encoding)
|
19
|
+
end
|
20
|
+
def test_encoding_auto()
|
21
|
+
doc = if CharString.ruby_m17n?
|
22
|
+
Document.new("Foo bar.\nBaz quux.".encode("US-ASCII"))
|
23
|
+
else
|
24
|
+
Document.new("Foo bar.\nBaz quux.")
|
25
|
+
end
|
26
|
+
expected = 'US-ASCII'
|
27
|
+
assert_equal(expected, doc.encoding)
|
28
|
+
end
|
29
|
+
def test_eol()
|
30
|
+
doc = Document.new("Foo bar.\nBaz quux.")
|
31
|
+
doc.encoding = 'US-ASCII'
|
32
|
+
doc.eol = 'LF'
|
33
|
+
expected = 'LF'
|
34
|
+
assert_equal(expected, doc.eol)
|
35
|
+
end
|
36
|
+
def test_eol_auto_lf()
|
37
|
+
doc = Document.new("Foo bar.\nBaz quux.")
|
38
|
+
expected = 'LF'
|
39
|
+
assert_equal(expected, doc.eol)
|
40
|
+
end
|
41
|
+
def test_eol_auto_none()
|
42
|
+
doc = Document.new("Foo bar.")
|
43
|
+
expected = "NONE"
|
44
|
+
assert_equal(expected, doc.eol)
|
45
|
+
end
|
46
|
+
def test_eol_char_lf()
|
47
|
+
doc = Document.new("Foo bar.\nBaz quux.")
|
48
|
+
# doc.encoding = "US-ASCII"
|
49
|
+
# doc.eol = "LF"
|
50
|
+
expected = "\n"
|
51
|
+
assert_equal(expected, doc.eol_char)
|
52
|
+
end
|
53
|
+
def test_split_by_line()
|
54
|
+
doc = Document.new("Hello, my name is Watanabe.\nI am just another Ruby porter.\n")
|
55
|
+
expected = ["Hello, my name is Watanabe.\n", "I am just another Ruby porter.\n"]
|
56
|
+
assert_equal(expected, doc.split_to_line)
|
57
|
+
end
|
58
|
+
|
59
|
+
# test eol split_to_line() method
|
60
|
+
def test_cr_split_to_line()
|
61
|
+
doc = Document.new("foo\rbar\r")
|
62
|
+
expected = ["foo\r", "bar\r"]
|
63
|
+
assert_equal(expected, doc.split_to_line)
|
64
|
+
end
|
65
|
+
def test_cr_split_to_line_chomped_lastline()
|
66
|
+
doc = Document.new("foo\rbar")
|
67
|
+
expected = ["foo\r", "bar"]
|
68
|
+
assert_equal(expected, doc.split_to_line)
|
69
|
+
end
|
70
|
+
def test_cr_split_to_line_empty_line()
|
71
|
+
doc = Document.new("foo\r\rbar\r")
|
72
|
+
expected = ["foo\r", "\r", "bar\r"]
|
73
|
+
assert_equal(expected, doc.split_to_line)
|
74
|
+
end
|
75
|
+
def test_lf_split_to_line()
|
76
|
+
doc = Document.new("foo\nbar\n")
|
77
|
+
expected = ["foo\n", "bar\n"]
|
78
|
+
assert_equal(expected, doc.split_to_line)
|
79
|
+
end
|
80
|
+
def test_lf_split_to_line_chomped_lastline()
|
81
|
+
doc = Document.new("foo\nbar")
|
82
|
+
expected = ["foo\n", "bar"]
|
83
|
+
assert_equal(expected, doc.split_to_line)
|
84
|
+
end
|
85
|
+
def test_lf_split_to_line_empty_line()
|
86
|
+
doc = Document.new("foo\n\nbar\n")
|
87
|
+
expected = ["foo\n", "\n", "bar\n"]
|
88
|
+
assert_equal(expected, doc.split_to_line)
|
89
|
+
end
|
90
|
+
def test_crlf_split_to_line()
|
91
|
+
doc = Document.new("foo\r\nbar\r\n")
|
92
|
+
expected = ["foo\r\n", "bar\r\n"]
|
93
|
+
assert_equal(expected, doc.split_to_line)
|
94
|
+
end
|
95
|
+
def test_crlf_split_to_line_chomped_lastline()
|
96
|
+
doc = Document.new("foo\r\nbar")
|
97
|
+
expected = ["foo\r\n", "bar"]
|
98
|
+
assert_equal(expected, doc.split_to_line)
|
99
|
+
end
|
100
|
+
def test_crlf_split_to_line_empty_line()
|
101
|
+
doc = Document.new("foo\r\n\r\nbar\r\n")
|
102
|
+
expected = ["foo\r\n", "\r\n", "bar\r\n"]
|
103
|
+
assert_equal(expected, doc.split_to_line)
|
104
|
+
end
|
105
|
+
|
106
|
+
# test ASCII module
|
107
|
+
def test_ascii_split_to_word()
|
108
|
+
doc = Document.new("foo bar")
|
109
|
+
expected = ["foo ", "bar"]
|
110
|
+
assert_equal(expected, doc.split_to_word)
|
111
|
+
end
|
112
|
+
def test_ascii_split_to_word_withsymbol()
|
113
|
+
doc = Document.new("foo (bar) baz-baz")
|
114
|
+
expected = ["foo ", "(bar) ", "baz-baz"]
|
115
|
+
assert_equal(expected, doc.split_to_word)
|
116
|
+
end
|
117
|
+
def test_ascii_split_to_word_withquote()
|
118
|
+
doc = Document.new("foo's 'foo' \"bar\" 'baz.'")
|
119
|
+
expected = ["foo's ", "'foo' ", "\"bar\" ", "'baz.'"]
|
120
|
+
assert_equal(expected, doc.split_to_word)
|
121
|
+
end
|
122
|
+
def test_ascii_split_to_word_withlongspace()
|
123
|
+
doc = Document.new(" foo bar")
|
124
|
+
expected = [" ", "foo ", " ", "bar"]
|
125
|
+
assert_equal(expected, doc.split_to_word)
|
126
|
+
end
|
127
|
+
def test_ascii_split_to_word_withdash()
|
128
|
+
doc = Document.new("foo -- bar, baz - quux")
|
129
|
+
expected = ["foo ", "-- ", "bar, ", "baz ", "- ", "quux"]
|
130
|
+
assert_equal(expected, doc.split_to_word)
|
131
|
+
end
|
132
|
+
def test_ascii_split_to_char()
|
133
|
+
doc = Document.new("foo bar")
|
134
|
+
expected = ["f","o","o"," ","b","a","r"]
|
135
|
+
assert_equal(expected, doc.split_to_char)
|
136
|
+
end
|
137
|
+
def test_ascii_split_to_char_with_eol_cr()
|
138
|
+
doc = Document.new("foo bar\r")
|
139
|
+
expected = ["f","o","o"," ","b","a","r","\r"]
|
140
|
+
assert_equal(expected, doc.split_to_char)
|
141
|
+
end
|
142
|
+
def test_ascii_split_to_char_with_eol_lf()
|
143
|
+
doc = Document.new("foo bar\n")
|
144
|
+
expected = ["f","o","o"," ","b","a","r","\n"]
|
145
|
+
assert_equal(expected, doc.split_to_char)
|
146
|
+
end
|
147
|
+
def test_ascii_split_to_char_with_eol_crlf()
|
148
|
+
doc = Document.new("foo bar\r\n")
|
149
|
+
expected = ["f","o","o"," ","b","a","r","\r\n"]
|
150
|
+
assert_equal(expected, doc.split_to_char)
|
151
|
+
end
|
152
|
+
def test_ascii_split_to_byte()
|
153
|
+
doc = Document.new("foo bar\r\n")
|
154
|
+
expected = ["f","o","o"," ","b","a","r","\r","\n"]
|
155
|
+
assert_equal(expected, doc.split_to_byte)
|
156
|
+
end
|
157
|
+
def test_ascii_count_byte()
|
158
|
+
doc = Document.new("foo bar\r\n")
|
159
|
+
expected = 9
|
160
|
+
assert_equal(expected, doc.count_byte)
|
161
|
+
end
|
162
|
+
def test_ascii_count_char()
|
163
|
+
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
164
|
+
expected = 17
|
165
|
+
assert_equal(expected, doc.count_char)
|
166
|
+
end
|
167
|
+
def test_ascii_count_latin_graph_char()
|
168
|
+
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
169
|
+
expected = 13
|
170
|
+
assert_equal(expected, doc.count_latin_graph_char)
|
171
|
+
end
|
172
|
+
def test_ascii_count_graph_char()
|
173
|
+
doc = Document.new("foo bar\r\nbaz quux\r\n")
|
174
|
+
expected = 13
|
175
|
+
assert_equal(expected, doc.count_graph_char)
|
176
|
+
end
|
177
|
+
def test_ascii_count_latin_blank_char()
|
178
|
+
doc = Document.new("foo bar\r\nbaz\tquux\r\n")
|
179
|
+
expected = 2
|
180
|
+
assert_equal(expected, doc.count_latin_blank_char)
|
181
|
+
end
|
182
|
+
def test_ascii_count_blank_char()
|
183
|
+
doc = Document.new("foo bar\r\nbaz\tquux\r\n")
|
184
|
+
expected = 2
|
185
|
+
assert_equal(expected, doc.count_blank_char)
|
186
|
+
end
|
187
|
+
def test_ascii_count_word()
|
188
|
+
doc = Document.new("foo bar \r\nbaz quux\r\n")
|
189
|
+
expected = 6
|
190
|
+
assert_equal(expected, doc.count_word)
|
191
|
+
end
|
192
|
+
def test_ascii_count_latin_word()
|
193
|
+
doc = Document.new("foo bar \r\nbaz quux\r\n")
|
194
|
+
expected = 5 # " " is also counted as a word
|
195
|
+
assert_equal(expected, doc.count_latin_word)
|
196
|
+
end
|
197
|
+
def test_ascii_count_latin_valid_word()
|
198
|
+
doc = Document.new("1 foo \r\n%%% ()\r\n")
|
199
|
+
expected = 2
|
200
|
+
assert_equal(expected, doc.count_latin_valid_word)
|
201
|
+
end
|
202
|
+
def test_ascii_count_line()
|
203
|
+
doc = Document.new("foo\r\nbar")
|
204
|
+
expected = 2
|
205
|
+
assert_equal(expected, doc.count_line)
|
206
|
+
end
|
207
|
+
def test_ascii_count_graph_line()
|
208
|
+
doc = Document.new("foo\r\n ")
|
209
|
+
expected = 1
|
210
|
+
assert_equal(expected, doc.count_graph_line)
|
211
|
+
end
|
212
|
+
def test_ascii_count_empty_line()
|
213
|
+
doc = Document.new("foo\r\n \r\n\t\r\n\r\n")
|
214
|
+
expected = 1
|
215
|
+
assert_equal(expected, doc.count_empty_line)
|
216
|
+
end
|
217
|
+
def test_ascii_count_blank_line()
|
218
|
+
doc = Document.new("\r\n \r\n\t\r\n ")
|
219
|
+
expected = 3
|
220
|
+
assert_equal(expected, doc.count_blank_line)
|
221
|
+
end
|
222
|
+
|
223
|
+
# test EUCJP module
|
224
|
+
def test_eucjp_split_to_word()
|
225
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��foo bar"))
|
226
|
+
expected = ["���ܸ��","ʸ��","foo ","bar"].collect{|c| NKF.nkf("-e", c)}
|
227
|
+
assert_equal(expected, doc.split_to_word)
|
228
|
+
end
|
229
|
+
def test_eucjp_split_to_word_kanhira()
|
230
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ��ʸ��"))
|
231
|
+
expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
|
232
|
+
assert_equal(expected, doc.split_to_word)
|
233
|
+
end
|
234
|
+
def test_eucjp_split_to_word_katahira()
|
235
|
+
doc = Document.new(NKF.nkf("-e", "�������ʤ�ʸ��"))
|
236
|
+
expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
|
237
|
+
assert_equal(expected, doc.split_to_word)
|
238
|
+
end
|
239
|
+
def test_eucjp_split_to_word_kataonbiki()
|
240
|
+
doc = Document.new(NKF.nkf("-e", "��ӡ�������"), "EUC-JP")
|
241
|
+
expected = ["��ӡ�", "����", "��"].collect{|c| NKF.nkf("-e", c)}
|
242
|
+
assert_equal(expected, doc.split_to_word)
|
243
|
+
end
|
244
|
+
def test_eucjp_split_to_word_hiraonbiki()
|
245
|
+
doc = Document.new(NKF.nkf("-e", "���ӡ���"), "EUC-JP")
|
246
|
+
expected = (["�", "��ӡ���"]).collect{|c| NKF.nkf("-e", c)}
|
247
|
+
assert_equal(expected, doc.split_to_word)
|
248
|
+
end
|
249
|
+
def test_eucjp_split_to_word_latinmix()
|
250
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ��Latin��ʸ��"))
|
251
|
+
expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-e", c)}
|
252
|
+
assert_equal(expected, doc.split_to_word)
|
253
|
+
end
|
254
|
+
def test_eucjp_split_to_char()
|
255
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b"))
|
256
|
+
expected = ["��","��","��","a"," ","b"].collect{|c|NKF.nkf("-e",c)}
|
257
|
+
assert_equal(expected, doc.split_to_char)
|
258
|
+
end
|
259
|
+
def test_eucjp_split_to_char_with_cr()
|
260
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r"))
|
261
|
+
expected = ["��","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-e",c)}
|
262
|
+
assert_equal(expected, doc.split_to_char)
|
263
|
+
end
|
264
|
+
def test_eucjp_split_to_char_with_lf()
|
265
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b\n"))
|
266
|
+
expected = ["��","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-e",c)}
|
267
|
+
assert_equal(expected, doc.split_to_char)
|
268
|
+
end
|
269
|
+
def test_eucjp_split_to_char_with_crlf()
|
270
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
|
271
|
+
expected = ["��","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-e",c)}
|
272
|
+
assert_equal(expected, doc.split_to_char)
|
273
|
+
end
|
274
|
+
def test_eucjp_count_char()
|
275
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
|
276
|
+
expected = 7
|
277
|
+
assert_equal(expected, doc.count_char)
|
278
|
+
end
|
279
|
+
def test_eucjp_count_latin_graph_char()
|
280
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
|
281
|
+
expected = 2
|
282
|
+
assert_equal(expected, doc.count_latin_graph_char)
|
283
|
+
end
|
284
|
+
def test_eucjp_count_ja_graph_char()
|
285
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
|
286
|
+
expected = 3
|
287
|
+
assert_equal(expected, doc.count_ja_graph_char)
|
288
|
+
end
|
289
|
+
def test_eucjp_count_graph_char()
|
290
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�a b\r\n"))
|
291
|
+
expected = 5
|
292
|
+
assert_equal(expected, doc.count_graph_char)
|
293
|
+
end
|
294
|
+
def test_eucjp_count_latin_blank_char()
|
295
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�\ta b\r\n"))
|
296
|
+
expected = 2
|
297
|
+
assert_equal(expected, doc.count_latin_blank_char)
|
298
|
+
end
|
299
|
+
def test_eucjp_count_ja_blank_char()
|
300
|
+
doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
|
301
|
+
expected = 1
|
302
|
+
assert_equal(expected, doc.count_ja_blank_char)
|
303
|
+
end
|
304
|
+
def test_eucjp_count_blank_char()
|
305
|
+
doc = Document.new(NKF.nkf("-e", "���ܡ���\ta b\r\n"))
|
306
|
+
expected = 3
|
307
|
+
assert_equal(expected, doc.count_blank_char)
|
308
|
+
end
|
309
|
+
def test_eucjp_count_word()
|
310
|
+
doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
|
311
|
+
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
312
|
+
assert_equal(expected, doc.count_word)
|
313
|
+
end
|
314
|
+
def test_eucjp_count_ja_word()
|
315
|
+
doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
|
316
|
+
expected = 3
|
317
|
+
assert_equal(expected, doc.count_ja_word)
|
318
|
+
end
|
319
|
+
def test_eucjp_count_latin_valid_word()
|
320
|
+
doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
|
321
|
+
expected = 2
|
322
|
+
assert_equal(expected, doc.count_latin_valid_word)
|
323
|
+
end
|
324
|
+
def test_eucjp_count_ja_valid_word()
|
325
|
+
doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
|
326
|
+
expected = 2
|
327
|
+
assert_equal(expected, doc.count_ja_valid_word)
|
328
|
+
end
|
329
|
+
def test_eucjp_count_valid_word()
|
330
|
+
doc = Document.new(NKF.nkf("-e", "���ܡ���a b --\r\n"))
|
331
|
+
expected = 4
|
332
|
+
assert_equal(expected, doc.count_valid_word)
|
333
|
+
end
|
334
|
+
def test_eucjp_count_line()
|
335
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
336
|
+
expected = 6
|
337
|
+
assert_equal(expected, doc.count_line)
|
338
|
+
end
|
339
|
+
def test_eucjp_count_graph_line()
|
340
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
341
|
+
expected = 3
|
342
|
+
assert_equal(expected, doc.count_graph_line)
|
343
|
+
end
|
344
|
+
def test_eucjp_count_empty_line()
|
345
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
346
|
+
expected = 1
|
347
|
+
assert_equal(expected, doc.count_empty_line)
|
348
|
+
end
|
349
|
+
def test_eucjp_count_blank_line()
|
350
|
+
doc = Document.new(NKF.nkf("-e", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
351
|
+
expected = 2
|
352
|
+
assert_equal(expected, doc.count_blank_line)
|
353
|
+
end
|
354
|
+
|
355
|
+
# test SJIS module
|
356
|
+
def test_sjis_split_to_word()
|
357
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��foo bar"))
|
358
|
+
expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c|NKF.nkf("-s",c)}
|
359
|
+
assert_equal(expected, doc.split_to_word)
|
360
|
+
end
|
361
|
+
def test_sjisplit_s_to_word_kanhira()
|
362
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ��ʸ��"))
|
363
|
+
expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
|
364
|
+
assert_equal(expected, doc.split_to_word)
|
365
|
+
end
|
366
|
+
def test_sjis_split_to_word_katahira()
|
367
|
+
doc = Document.new(NKF.nkf("-s", "�������ʤ�ʸ��"))
|
368
|
+
expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-s", c)}
|
369
|
+
assert_equal(expected, doc.split_to_word)
|
370
|
+
end
|
371
|
+
def test_sjis_split_to_word_kataonbiki()
|
372
|
+
doc = Document.new(NKF.nkf("-s", "��ӡ��λ���"))
|
373
|
+
expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-s", c)}
|
374
|
+
assert_equal(expected, doc.split_to_word)
|
375
|
+
end
|
376
|
+
def test_sjis_split_to_word_hiraonbiki()
|
377
|
+
doc = Document.new(NKF.nkf("-s", "���ӡ���"))
|
378
|
+
expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-s", c)}
|
379
|
+
assert_equal(expected, doc.split_to_word)
|
380
|
+
end
|
381
|
+
def test_sjis_split_to_word_latinmix()
|
382
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ��Latin��ʸ��"))
|
383
|
+
expected = ["���ܸ��","Latin","��","ʸ��"].collect{|c| NKF.nkf("-s", c)}
|
384
|
+
assert_equal(expected, doc.split_to_word)
|
385
|
+
end
|
386
|
+
def test_sjis_split_to_char()
|
387
|
+
doc = Document.new(NKF.nkf("-s", "ɽ��a b"))
|
388
|
+
expected = ["ɽ","��","��","a"," ","b"].collect{|c|NKF.nkf("-s",c)}
|
389
|
+
assert_equal(expected, doc.split_to_char)
|
390
|
+
end
|
391
|
+
def test_sjis_split_to_char_with_cr()
|
392
|
+
doc = Document.new(NKF.nkf("-s", "ɽ��a b\r"))
|
393
|
+
expected = ["ɽ","��","��","a"," ","b","\r"].collect{|c|NKF.nkf("-s",c)}
|
394
|
+
assert_equal(expected, doc.split_to_char)
|
395
|
+
end
|
396
|
+
def test_sjis_split_to_char_with_lf()
|
397
|
+
doc = Document.new(NKF.nkf("-s", "ɽ��a b\n"))
|
398
|
+
expected = ["ɽ","��","��","a"," ","b","\n"].collect{|c|NKF.nkf("-s",c)}
|
399
|
+
assert_equal(expected, doc.split_to_char)
|
400
|
+
end
|
401
|
+
def test_sjis_split_to_char_with_crlf()
|
402
|
+
doc = Document.new(NKF.nkf("-s", "ɽ��a b\r\n"))
|
403
|
+
expected = ["ɽ","��","��","a"," ","b","\r\n"].collect{|c|NKF.nkf("-s",c)}
|
404
|
+
assert_equal(expected, doc.split_to_char)
|
405
|
+
end
|
406
|
+
def test_sjis_count_char()
|
407
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
|
408
|
+
expected = 7
|
409
|
+
assert_equal(expected, doc.count_char)
|
410
|
+
end
|
411
|
+
def test_sjis_count_latin_graph_char()
|
412
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
|
413
|
+
expected = 2
|
414
|
+
assert_equal(expected, doc.count_latin_graph_char)
|
415
|
+
end
|
416
|
+
def test_sjis_count_ja_graph_char()
|
417
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
|
418
|
+
expected = 3
|
419
|
+
assert_equal(expected, doc.count_ja_graph_char)
|
420
|
+
end
|
421
|
+
def test_sjis_count_graph_char()
|
422
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�a b\r\n"))
|
423
|
+
expected = 5
|
424
|
+
assert_equal(expected, doc.count_graph_char)
|
425
|
+
end
|
426
|
+
def test_sjis_count_latin_blank_char()
|
427
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�\ta b\r\n"))
|
428
|
+
expected = 2
|
429
|
+
assert_equal(expected, doc.count_latin_blank_char)
|
430
|
+
end
|
431
|
+
def test_sjis_count_ja_blank_char()
|
432
|
+
doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
|
433
|
+
expected = 1
|
434
|
+
assert_equal(expected, doc.count_ja_blank_char)
|
435
|
+
end
|
436
|
+
def test_sjis_count_blank_char()
|
437
|
+
doc = Document.new(NKF.nkf("-s", "���ܡ���\ta b\r\n"))
|
438
|
+
expected = 3
|
439
|
+
assert_equal(expected, doc.count_blank_char)
|
440
|
+
end
|
441
|
+
def test_sjis_count_word()
|
442
|
+
doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
|
443
|
+
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
444
|
+
assert_equal(expected, doc.count_word)
|
445
|
+
end
|
446
|
+
def test_sjis_count_ja_word()
|
447
|
+
doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
|
448
|
+
expected = 3
|
449
|
+
assert_equal(expected, doc.count_ja_word)
|
450
|
+
end
|
451
|
+
def test_sjis_count_latin_valid_word()
|
452
|
+
doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
|
453
|
+
expected = 2
|
454
|
+
assert_equal(expected, doc.count_latin_valid_word)
|
455
|
+
end
|
456
|
+
def test_sjis_count_ja_valid_word()
|
457
|
+
doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
|
458
|
+
expected = 2
|
459
|
+
assert_equal(expected, doc.count_ja_valid_word)
|
460
|
+
end
|
461
|
+
def test_sjis_count_valid_word()
|
462
|
+
doc = Document.new(NKF.nkf("-s", "���ܡ���a b --\r\n"))
|
463
|
+
expected = 4
|
464
|
+
assert_equal(expected, doc.count_valid_word)
|
465
|
+
end
|
466
|
+
def test_sjis_count_line()
|
467
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
468
|
+
expected = 6
|
469
|
+
assert_equal(expected, doc.count_line)
|
470
|
+
end
|
471
|
+
def test_sjis_count_graph_line()
|
472
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
473
|
+
expected = 3
|
474
|
+
assert_equal(expected, doc.count_graph_line)
|
475
|
+
end
|
476
|
+
def test_sjis_count_empty_line()
|
477
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
478
|
+
expected = 1
|
479
|
+
assert_equal(expected, doc.count_empty_line)
|
480
|
+
end
|
481
|
+
def test_sjis_count_blank_line()
|
482
|
+
doc = Document.new(NKF.nkf("-s", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
483
|
+
expected = 2
|
484
|
+
assert_equal(expected, doc.count_blank_line)
|
485
|
+
end
|
486
|
+
|
487
|
+
# test UTF8 module
|
488
|
+
def test_utf8_split_to_word()
|
489
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��foo bar"))
|
490
|
+
expected = ["���ܸ��", "ʸ��", "foo ", "bar"].collect{|c| NKF.nkf("-E -w", c)}
|
491
|
+
assert_equal(expected, doc.split_to_word)
|
492
|
+
end
|
493
|
+
def test_utf8_split_to_word_kanhira()
|
494
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ��ʸ��"))
|
495
|
+
expected = ["���ܸ��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
|
496
|
+
assert_equal(expected, doc.split_to_word)
|
497
|
+
end
|
498
|
+
def test_utf8_split_to_word_katahira()
|
499
|
+
doc = Document.new(NKF.nkf("-E -w", "�������ʤ�ʸ��"))
|
500
|
+
expected = ["�������ʤ�", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
|
501
|
+
assert_equal(expected, doc.split_to_word)
|
502
|
+
end
|
503
|
+
def test_utf8_split_to_word_kataonbiki()
|
504
|
+
doc = Document.new(NKF.nkf("-E -w", "��ӡ��λ���"))
|
505
|
+
expected = ["��ӡ���", "����"].collect{|c| NKF.nkf("-E -w", c)}
|
506
|
+
assert_equal(expected, doc.split_to_word)
|
507
|
+
end
|
508
|
+
def test_utf8_split_to_word_hiraonbiki()
|
509
|
+
doc = Document.new(NKF.nkf("-E -w", "���ӡ���"))
|
510
|
+
expected = ["�", "��ӡ���"].collect{|c| NKF.nkf("-E -w", c)}
|
511
|
+
assert_equal(expected, doc.split_to_word)
|
512
|
+
end
|
513
|
+
def test_utf8_split_to_word_latinmix()
|
514
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ��Latin��ʸ��"))
|
515
|
+
expected = ["���ܸ��", "Latin", "��", "ʸ��"].collect{|c| NKF.nkf("-E -w", c)}
|
516
|
+
assert_equal(expected, doc.split_to_word)
|
517
|
+
end
|
518
|
+
def test_utf8_split_to_char()
|
519
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b"), "UTF-8")
|
520
|
+
expected = ["��", "��", "��", "a", " ", "b"].collect{|c| NKF.nkf("-E -w", c)}
|
521
|
+
assert_equal(expected, doc.split_to_char)
|
522
|
+
end
|
523
|
+
def test_utf8_split_to_char_with_cr()
|
524
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r"), "UTF-8")
|
525
|
+
expected = ["��","��","��","a"," ","b","\r"].collect{|c| NKF.nkf("-E -w", c)}
|
526
|
+
assert_equal(expected, doc.split_to_char)
|
527
|
+
end
|
528
|
+
def test_utf8_split_to_char_with_lf()
|
529
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\n"), "UTF-8")
|
530
|
+
expected = ["��","��","��","a"," ","b","\n"].collect{|c| NKF.nkf("-E -w", c)}
|
531
|
+
assert_equal(expected, doc.split_to_char)
|
532
|
+
end
|
533
|
+
def test_utf8_split_to_char_with_crlf()
|
534
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
|
535
|
+
expected = ["��","��","��","a"," ","b","\r\n"].collect{|c| NKF.nkf("-E -w", c)}
|
536
|
+
assert_equal(expected, doc.split_to_char)
|
537
|
+
end
|
538
|
+
def test_utf8_count_char()
|
539
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
|
540
|
+
expected = 7
|
541
|
+
assert_equal(expected, doc.count_char)
|
542
|
+
end
|
543
|
+
def test_utf8_count_latin_graph_char()
|
544
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
|
545
|
+
expected = 2
|
546
|
+
assert_equal(expected, doc.count_latin_graph_char)
|
547
|
+
end
|
548
|
+
def test_utf8_count_ja_graph_char()
|
549
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
|
550
|
+
expected = 3
|
551
|
+
assert_equal(expected, doc.count_ja_graph_char)
|
552
|
+
end
|
553
|
+
def test_utf8_count_graph_char()
|
554
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�a b\r\n"), "UTF-8")
|
555
|
+
expected = 5
|
556
|
+
assert_equal(expected, doc.count_graph_char)
|
557
|
+
end
|
558
|
+
def test_utf8_count_latin_blank_char()
|
559
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�\ta b\r\n"))
|
560
|
+
expected = 2
|
561
|
+
assert_equal(expected, doc.count_latin_blank_char)
|
562
|
+
end
|
563
|
+
def test_utf8_count_ja_blank_char()
|
564
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
|
565
|
+
expected = 1
|
566
|
+
assert_equal(expected, doc.count_ja_blank_char)
|
567
|
+
end
|
568
|
+
def test_utf8_count_blank_char()
|
569
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܡ���\ta b\r\n"))
|
570
|
+
expected = 3
|
571
|
+
assert_equal(expected, doc.count_blank_char)
|
572
|
+
end
|
573
|
+
def test_utf8_count_word()
|
574
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
|
575
|
+
expected = 7 # "--" and "\r\n" are counted as word here (though not "valid")
|
576
|
+
assert_equal(expected, doc.count_word)
|
577
|
+
end
|
578
|
+
def test_utf8_count_ja_word()
|
579
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
|
580
|
+
expected = 3
|
581
|
+
assert_equal(expected, doc.count_ja_word)
|
582
|
+
end
|
583
|
+
def test_utf8_count_latin_valid_word()
|
584
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
|
585
|
+
expected = 2
|
586
|
+
assert_equal(expected, doc.count_latin_valid_word)
|
587
|
+
end
|
588
|
+
def test_utf8_count_ja_valid_word()
|
589
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
|
590
|
+
expected = 2
|
591
|
+
assert_equal(expected, doc.count_ja_valid_word)
|
592
|
+
end
|
593
|
+
def test_utf8_count_valid_word()
|
594
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܡ���a b --\r\n"))
|
595
|
+
expected = 4
|
596
|
+
assert_equal(expected, doc.count_valid_word)
|
597
|
+
end
|
598
|
+
def test_utf8_count_line()
|
599
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
600
|
+
expected = 6
|
601
|
+
assert_equal(expected, doc.count_line)
|
602
|
+
end
|
603
|
+
def test_utf8_count_graph_line()
|
604
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
605
|
+
expected = 3
|
606
|
+
assert_equal(expected, doc.count_graph_line)
|
607
|
+
end
|
608
|
+
def test_utf8_count_empty_line()
|
609
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
610
|
+
expected = 1
|
611
|
+
assert_equal(expected, doc.count_empty_line)
|
612
|
+
end
|
613
|
+
def test_utf8_count_blank_line()
|
614
|
+
doc = Document.new(NKF.nkf("-E -w", "���ܸ�\r\n��\r\n \r\n\r\nfoo\r\nbar"))
|
615
|
+
expected = 2
|
616
|
+
assert_equal(expected, doc.count_blank_line)
|
617
|
+
end
|
618
|
+
|
619
|
+
|
620
|
+
|
621
|
+
|
622
|
+
def teardown()
|
623
|
+
#
|
624
|
+
end
|
625
|
+
|
626
|
+
end
|