docdiff 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby
2
+ # test character classes on ASCII characters.
3
+ # 2003-03-10 Hisashi MORITA
4
+
5
+ charclasses = ["[:cntrl:]",
6
+ "[:space:]", "[:blank:]",
7
+ "[:digit:]",
8
+ "[:alpha:]", "[:alnum:]",
9
+ "[:punct:]",
10
+ "[:lower:]", "[:upper:]",
11
+ "[:print:]", "[:graph:]",
12
+ "[:xdigit:]"]
13
+ chars = (0x00 .. 0xff).to_a
14
+
15
+ charclasses.each{|charclass|
16
+ member_chars = []
17
+ chars.each{|char|
18
+ if Regexp.new("[#{charclass}]") =~ char.to_a.pack("C*")
19
+ member_chars.push char
20
+ end
21
+ }
22
+ puts "#{charclass}\t#{member_chars.collect{|char|sprintf("\\x%02x", char)}.join}\n\t\t(#{member_chars.collect{|char|char.to_a.pack('C*').inspect[1..-2]}.join})"
23
+ }
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/ruby
2
+ # test character classes on ASCII characters.
3
+ # 2003-03-10 Hisashi MORITA
4
+
5
+ charclasses = ["[:cntrl:]",
6
+ "[:space:]", "[:blank:]",
7
+ "[:digit:]",
8
+ "[:alpha:]", "[:alnum:]",
9
+ "[:punct:]",
10
+ "[:lower:]", "[:upper:]",
11
+ "[:print:]", "[:graph:]",
12
+ "[:xdigit:]"]
13
+ (0x00 .. 0xff).to_a.each{|char|
14
+ attribute = []
15
+ charclasses.each{|charclass|
16
+ if Regexp.new("[#{charclass}]") =~ char.to_a.pack("C*")
17
+ attribute.push charclass
18
+ end
19
+ }
20
+ puts "#{sprintf("\\x%02x", char)} (#{char.to_a.pack('C*').inspect})\t#{attribute.join(', ')}"
21
+ }
@@ -0,0 +1,343 @@
1
+ #!/usr/bin/ruby
2
+ # Extracts multibyte characters from JIS0208.TXT.
3
+ # (ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT)
4
+ # 2003-03-03 .. 20xx-xx-xx, Hisashi MORITA. Use freely at your own risk.
5
+ # Usage: jis0208.rb <Ku> <Ten> <Codeset> #=> \xXX...
6
+ # Example: jis0208.rb 1 1 utf-8 #=> \xe3\x80\x80
7
+
8
+ class JIS0208
9
+ def utf16_to_utf8(utf16) # Convert UTF-16 to UTF-8N
10
+ utf16value = (utf16.unpack("C*")[0] * 256 + utf16.unpack("C*")[1])
11
+ if utf16value < 0x7f # 1-byte utf-8
12
+ utf8 = utf16value.to_a.pack("C*")
13
+ elsif utf16value < 0x800 # 2-byte utf-8
14
+ utf8 = [(0xC0 | (utf16value / 64)),
15
+ (0x80 | (utf16value % 64))].pack("C*")
16
+ else # 3-byte utf-8
17
+ utf8 = [(0xE0 | ((utf16value / 64) / 64)),
18
+ (0x80 | ((utf16value / 64) % 64)),
19
+ (0x80 | (utf16value % 64))].pack("C*")
20
+ end
21
+ end
22
+ def initialize()
23
+ @lines = File.readlines("JIS0208.TXT")
24
+ @lines = @lines.grep(/^[^\#]/) # remove comments
25
+ @lines = @lines.collect{|l| l.sub(/\s+\#[^\#]+$/,'')} # remove unicode names
26
+ @char_db = @lines.collect {|line|
27
+ sjis, jis, utf16 = line.split.collect{|string|
28
+ string.sub(/0x/, '').to_a.pack("H*") # "0xXXXX" to 8-bit byte string
29
+ }
30
+ jis_byte_pair = jis.unpack("C*")
31
+ # jis + 0x8080 => euc
32
+ euc = jis_byte_pair.collect {|byte| (byte + 0x80)}.pack("C*")
33
+ # jis - 0x2020 => ku, ten
34
+ ku, ten = jis_byte_pair.collect {|byte| (byte - 0x20)}
35
+ utf8 = utf16_to_utf8(utf16)
36
+ {:s=>sjis, :j=>jis, :u16=>utf16, :e=>euc, :u8=>utf8, :ku=>ku, :ten=>ten}
37
+ }
38
+ @characters = {}
39
+ @char_db.each{|char|
40
+ if @characters[char[:ku]].nil?
41
+ @characters[char[:ku]] = {}
42
+ end
43
+ if @characters[char[:ku]][char[:ten]].nil?
44
+ @characters[char[:ku]][char[:ten]] = {
45
+ :s=>char[:s],
46
+ :j=>char[:j],
47
+ :u16=>char[:u16],
48
+ :e=>char[:e],
49
+ :u8=>char[:u8]
50
+ }
51
+ end
52
+ }
53
+ end
54
+ attr_reader :char_db
55
+ attr_reader :characters
56
+ def char(ku, ten, codeset)
57
+ case
58
+ when /^[Ee]/ =~ codeset then codeset = :e
59
+ when /^[Ss]/ =~ codeset then codeset = :s
60
+ when /^[Jj]/ =~ codeset then codeset = :j
61
+ when /^[Uu].*16$/ =~ codeset then codeset = :u16
62
+ when /^[Uu].*8$/ =~ codeset then codeset = :u8
63
+ else
64
+ raise "invalid codeset name (#{codeset})\n"
65
+ end
66
+ characters[ku][ten][codeset].unpack('C*').collect{|byte|
67
+ sprintf("\\x%x",byte)
68
+ }.join
69
+ end
70
+
71
+
72
+ end
73
+
74
+ if __FILE__ == $0
75
+
76
+ # euc-jp
77
+ def euc_ja_alnum()
78
+ j = JIS0208.new
79
+ r = []
80
+ (3).to_a.each{|ku|(16..25).to_a.each{|ten|r << j.char(ku,ten,"e")}}
81
+ (3).to_a.each{|ku|(33..58).to_a.each{|ten|r << j.char(ku,ten,"e")}}
82
+ (3).to_a.each{|ku|(65..90).to_a.each{|ten|r << j.char(ku,ten,"e")}}
83
+ r
84
+ end
85
+ def euc_ja_blank()
86
+ j = JIS0208.new
87
+ r = []
88
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"e")}}
89
+ r
90
+ end
91
+ def euc_ja_print()
92
+ euc_ja_graph() + euc_ja_blank()
93
+ end
94
+ def euc_ja_graph()
95
+ euc_ja_alnum() + euc_ja_punct()
96
+ end
97
+ def euc_ja_punct()
98
+ j = JIS0208.new
99
+ r = []
100
+ (1).to_a.each{|ku|( 2..94).to_a.each{|ten|r << j.char(ku,ten,"e")}}
101
+ (2).to_a.each{|ku|( 1..14).to_a.each{|ten|r << j.char(ku,ten,"e")}}
102
+ (2).to_a.each{|ku|(26..33).to_a.each{|ten|r << j.char(ku,ten,"e")}}
103
+ (2).to_a.each{|ku|(42..48).to_a.each{|ten|r << j.char(ku,ten,"e")}}
104
+ (2).to_a.each{|ku|(60..74).to_a.each{|ten|r << j.char(ku,ten,"e")}}
105
+ (2).to_a.each{|ku|(82..89).to_a.each{|ten|r << j.char(ku,ten,"e")}}
106
+ (2).to_a.each{|ku|(94 ).to_a.each{|ten|r << j.char(ku,ten,"e")}}
107
+ (6).to_a.each{|ku|( 1..24).to_a.each{|ten|r << j.char(ku,ten,"e")}}
108
+ (6).to_a.each{|ku|(33..56).to_a.each{|ten|r << j.char(ku,ten,"e")}}
109
+ (7).to_a.each{|ku|( 1..33).to_a.each{|ten|r << j.char(ku,ten,"e")}}
110
+ (7).to_a.each{|ku|(49..81).to_a.each{|ten|r << j.char(ku,ten,"e")}}
111
+ (8).to_a.each{|ku|( 1..32).to_a.each{|ten|r << j.char(ku,ten,"e")}}
112
+ r
113
+ end
114
+ def euc_ja_space()
115
+ j = JIS0208.new
116
+ r = []
117
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"e")}}
118
+ r
119
+ end
120
+ def euc_hiragana()
121
+ j = JIS0208.new
122
+ r = []
123
+ (4).to_a.each{|ku|(1..83).to_a.each{|ten|r << j.char(ku,ten,"e")}}
124
+ r
125
+ end
126
+ def euc_katakana()
127
+ j = JIS0208.new
128
+ r = []
129
+ (5).to_a.each{|ku|(1..86).to_a.each{|ten|r << j.char(ku,ten,"e")}}
130
+ r
131
+ end
132
+ def euc_kanji()
133
+ j = JIS0208.new
134
+ r = []
135
+ (16..46).to_a.each{|ku| r << "#{j.char(ku,1,'e')}-#{j.char(ku,94,'e')}"}
136
+ (47).to_a.each{|ku|r << "#{j.char(ku,1,'e')}-#{j.char(ku,51,'e')}"}
137
+ (48..83).to_a.each{|ku|r << "#{j.char(ku,1,'e')}-#{j.char(ku,94,'e')}"}
138
+ (84).to_a.each{|ku|r << "#{j.char(ku,1,'e')}-#{j.char(ku,6,'e')}"}
139
+ r
140
+ end
141
+
142
+ # sjis (cp932)
143
+ def sjis_ja_alnum()
144
+ j = JIS0208.new
145
+ r = []
146
+ (3).to_a.each{|ku|(16..25).to_a.each{|ten|r << j.char(ku,ten,"s")}}
147
+ (3).to_a.each{|ku|(33..58).to_a.each{|ten|r << j.char(ku,ten,"s")}}
148
+ (3).to_a.each{|ku|(65..90).to_a.each{|ten|r << j.char(ku,ten,"s")}}
149
+ r
150
+ end
151
+ def sjis_ja_blank()
152
+ j = JIS0208.new
153
+ r = []
154
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"s")}}
155
+ r
156
+ end
157
+ def sjis_ja_print()
158
+ sjis_ja_graph() + sjis_ja_blank()
159
+ end
160
+ def sjis_ja_graph()
161
+ sjis_ja_alnum() + sjis_ja_punct()
162
+ end
163
+ def sjis_ja_punct()
164
+ j = JIS0208.new
165
+ r = []
166
+ (1).to_a.each{|ku|(2..94).to_a.each{|ten|r << j.char(ku,ten,"s")}}
167
+ (2).to_a.each{|ku|(1..14).to_a.each{|ten|r << j.char(ku,ten,"s")}}
168
+ (2).to_a.each{|ku|(26..33).to_a.each{|ten|r << j.char(ku,ten,"s")}}
169
+ (2).to_a.each{|ku|(42..48).to_a.each{|ten|r << j.char(ku,ten,"s")}}
170
+ (2).to_a.each{|ku|(60..74).to_a.each{|ten|r << j.char(ku,ten,"s")}}
171
+ (2).to_a.each{|ku|(82..89).to_a.each{|ten|r << j.char(ku,ten,"s")}}
172
+ (2).to_a.each{|ku|(94).to_a.each{|ten|r << j.char(ku,ten,"s")}}
173
+ (6).to_a.each{|ku|(1..24).to_a.each{|ten|r << j.char(ku,ten,"s")}}
174
+ (6).to_a.each{|ku|(33..56).to_a.each{|ten|r << j.char(ku,ten,"s")}}
175
+ (7).to_a.each{|ku|(1..33).to_a.each{|ten|r << j.char(ku,ten,"s")}}
176
+ (7).to_a.each{|ku|(49..81).to_a.each{|ten|r << j.char(ku,ten,"s")}}
177
+ (8).to_a.each{|ku|(1..32).to_a.each{|ten|r << j.char(ku,ten,"s")}}
178
+ #(13).to_a.each{|ku|(1..30).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
179
+ #(13).to_a.each{|ku|(32..54).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
180
+ #(13).to_a.each{|ku|(63..92).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
181
+ #(92).to_a.each{|ku|(81..94).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
182
+ r
183
+ end
184
+ def sjis_ja_space()
185
+ j = JIS0208.new
186
+ r = []
187
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"s")}}
188
+ r
189
+ end
190
+ def sjis_hiragana()
191
+ j = JIS0208.new
192
+ r = []
193
+ (4).to_a.each{|ku|(1..83).to_a.each{|ten|r << j.char(ku,ten,"s")}}
194
+ r
195
+ end
196
+ def sjis_katakana()
197
+ j = JIS0208.new
198
+ r = []
199
+ (5).to_a.each{|ku|(1..86).to_a.each{|ten|r << j.char(ku,ten,"s")}}
200
+ r
201
+ end
202
+ def sjis_kanji()
203
+ j = JIS0208.new
204
+ r = []
205
+ (16..46).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,94,'s')}"}
206
+ (47).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,51,'s')}"}
207
+ (48..83).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,94,'s')}"}
208
+ (84).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,6,'s')}"}
209
+ (89..91).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,94,'s')}"}#cp932
210
+ (92).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,78,'s')}"}#cp932
211
+ r
212
+ end
213
+
214
+ # utf8
215
+ def utf8_ja_alnum()
216
+ j = JIS0208.new
217
+ r = []
218
+ (3).to_a.each{|ku|(16..25).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
219
+ (3).to_a.each{|ku|(33..58).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
220
+ (3).to_a.each{|ku|(65..90).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
221
+ r
222
+ end
223
+ def utf8_ja_blank()
224
+ j = JIS0208.new
225
+ r = []
226
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
227
+ r
228
+ end
229
+ def utf8_ja_print()
230
+ utf8_ja_graph() + utf8_ja_blank()
231
+ end
232
+ def utf8_ja_graph()
233
+ utf8_ja_alnum() + utf8_ja_punct()
234
+ end
235
+ def utf8_ja_punct()
236
+ j = JIS0208.new
237
+ r = []
238
+ (1).to_a.each{|ku|( 2..94).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
239
+ (2).to_a.each{|ku|( 1..14).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
240
+ (2).to_a.each{|ku|(26..33).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
241
+ (2).to_a.each{|ku|(42..48).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
242
+ (2).to_a.each{|ku|(60..74).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
243
+ (2).to_a.each{|ku|(82..89).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
244
+ (2).to_a.each{|ku|(94 ).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
245
+ (6).to_a.each{|ku|( 1..24).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
246
+ (6).to_a.each{|ku|(33..56).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
247
+ (7).to_a.each{|ku|( 1..33).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
248
+ (7).to_a.each{|ku|(49..81).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
249
+ (8).to_a.each{|ku|( 1..32).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
250
+ r
251
+ end
252
+ def utf8_ja_space()
253
+ j = JIS0208.new
254
+ r = []
255
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
256
+ r
257
+ end
258
+ def utf8_hiragana()
259
+ j = JIS0208.new
260
+ r = []
261
+ (4).to_a.each{|ku|(1..83).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
262
+ r
263
+ end
264
+ def utf8_katakana()
265
+ j = JIS0208.new
266
+ r = []
267
+ (5).to_a.each{|ku|(1..86).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
268
+ r
269
+ end
270
+ def utf8_kanji()
271
+ j = JIS0208.new
272
+ r = []
273
+ (16..46).to_a.each{|ku|(1..94).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
274
+ (47).to_a.each{|ku|(1..51).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
275
+ (48..83).to_a.each{|ku|(1..94).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
276
+ (84).to_a.each{|ku|(1..6).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
277
+ r
278
+ end
279
+
280
+ jis0208 = JIS0208.new
281
+ if ARGV.size == 3
282
+ ku, ten, codeset = ARGV[0].to_i, ARGV[1].to_i, ARGV[2].to_s
283
+ puts jis0208.char(ku, ten, codeset)
284
+ exit(0)
285
+ elsif ARGV.size == 2
286
+ codeset, charclass = ARGV[0].to_s, ARGV[1].to_s
287
+ else
288
+ puts "Usage: jis0208.rb (<Ku> <Ten> <Codeset> | <Codeset> <CharClass>)"
289
+ puts "Supported codeset: EUC-JP, Shift_JIS, UTF-8"
290
+ puts "Supported charclass: blank, space, alnum, punct, print, graph, hiragana, katakana, kanji"
291
+ puts "Example 1: jis0208.rb 16 1 utf-8"
292
+ puts "Example 2: jis0208.rb euc-jp punct"
293
+ exit(0)
294
+ end
295
+
296
+ case
297
+ when (/^e/i.match codeset) # euc-jp
298
+ case
299
+ when (/^space/i.match charclass) then puts euc_ja_space()
300
+ when (/^blank/i.match charclass) then puts euc_ja_blank()
301
+ when (/^alnum/i.match charclass) then puts euc_ja_alnum()
302
+ when (/^punct/i.match charclass) then puts euc_ja_punct()
303
+ when (/^print/i.match charclass) then puts euc_ja_print()
304
+ when (/^graph/i.match charclass) then puts euc_ja_graph()
305
+ when (/^hira/i.match charclass) then puts euc_hiragana()
306
+ when (/^kata/i.match charclass) then puts euc_katakana()
307
+ when (/^kanji/i.match charclass) then puts euc_kanji()
308
+ else
309
+ raise "invalid charclass (#{charclass}).\n"
310
+ end
311
+ when (/^s/i.match codeset) # sjis
312
+ case
313
+ when (/^space/i.match charclass) then puts sjis_ja_space()
314
+ when (/^blank/i.match charclass) then puts sjis_ja_blank()
315
+ when (/^alnum/i.match charclass) then puts sjis_ja_alnum()
316
+ when (/^punct/i.match charclass) then puts sjis_ja_punct()
317
+ when (/^print/i.match charclass) then puts sjis_ja_print()
318
+ when (/^graph/i.match charclass) then puts sjis_ja_graph()
319
+ when (/^hira/i.match charclass) then puts sjis_hiragana()
320
+ when (/^kata/i.match charclass) then puts sjis_katakana()
321
+ when (/^kanji/i.match charclass) then puts sjis_kanji()
322
+ else
323
+ raise "invalid charclass (#{charclass}).\n"
324
+ end
325
+ when (/^u/i.match codeset) # utf-8
326
+ case
327
+ when (/^space/i.match charclass) then puts utf8_ja_space()
328
+ when (/^blank/i.match charclass) then puts utf8_ja_blank()
329
+ when (/^alnum/i.match charclass) then puts utf8_ja_alnum()
330
+ when (/^punct/i.match charclass) then puts utf8_ja_punct()
331
+ when (/^print/i.match charclass) then puts utf8_ja_print()
332
+ when (/^graph/i.match charclass) then puts utf8_ja_graph()
333
+ when (/^hira/i.match charclass) then puts utf8_hiragana()
334
+ when (/^kata/i.match charclass) then puts utf8_katakana()
335
+ when (/^kanji/i.match charclass) then puts utf8_kanji()
336
+ else
337
+ raise "invalid charclass (#{charclass}).\n"
338
+ end
339
+ else
340
+ raise "invalid codeset (#{codeset}) or charclass (#{charclass}).\n"
341
+ end
342
+
343
+ end
@@ -0,0 +1,38 @@
1
+ require 'test/unit'
2
+ require 'jis0208'
3
+ require 'nkf'
4
+ require 'iconv'
5
+
6
+ class TC_JIS0208 < Test::Unit::TestCase
7
+ def setup()
8
+ #
9
+ end
10
+
11
+ =begin obsolete
12
+ def test_string_to_array()
13
+ jis0208 = JIS0208.new
14
+ expected = [0xe1, 0xa1, 0xa8] # "ɽ"(4129) UTF-8 as array
15
+ # assert_equal(expected, Iconv.iconv("UTF-8", "EUC-JP", "ɽ").to_s)
16
+ assert_equal(expected, jis0208.string_to_array("\xe1\xa1\xa8"))
17
+ end
18
+ def test_array_to_string()
19
+ jis0208 = JIS0208.new
20
+ expected = "\xe1\xa1\xa8" # "ɽ"(4129) in UTF-8 string
21
+ # assert_equal(expected, Iconv.iconv("UTF-8", "EUC-JP", "ɽ").to_s)
22
+ assert_equal(expected, jis0208.array_to_string([0xe1, 0xa1, 0xa8]))
23
+ end
24
+ =end
25
+ =begin obsolete
26
+ def test_to_value_array()
27
+ expected = [0xe1, 0xa1, 0xa8] # "ɽ"(4129) UTF-8 as array
28
+ assert_equal(expected, "\xe1\xa1\xa8".to_value_array)
29
+ end
30
+ def test_to_binary_string()
31
+ expected = "\xe1\xa1\xa8" # "ɽ"(4129) in UTF-8 string
32
+ assert_equal(expected, [0xe1, 0xa1, 0xa8].to_binary_string)
33
+ end
34
+ def teardown()
35
+ #
36
+ end
37
+ =end
38
+ end
@@ -0,0 +1,22 @@
1
+ ## DocDiff configuration file
2
+ ## Comment out and modify the following lines as you like.
3
+ #
4
+ # resolution = word
5
+ # encoding = UTF-8
6
+ # eol = LF
7
+ # format = html
8
+ # digest = off
9
+ # cache = off # not implemented yet
10
+ # verbose = no # not implemented yet
11
+ #
12
+ ## user-defined tags (not well-supported yet)
13
+ # tag_common_start = '<=>'
14
+ # tag_common_end = '</=>'
15
+ # tag_del_start = '<->'
16
+ # tag_del_end = '</->'
17
+ # tag_add_start = '<+>'
18
+ # tag_add_end = '</+>'
19
+ # tag_change_before_start = '<!->'
20
+ # tag_change_before_end = '</!->'
21
+ # tag_change_after_start = '<!+>'
22
+ # tag_change_after_end = '</!+>'