docdiff 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby
2
+ # test character classes on ASCII characters.
3
+ # 2003-03-10 Hisashi MORITA
4
+
5
+ charclasses = ["[:cntrl:]",
6
+ "[:space:]", "[:blank:]",
7
+ "[:digit:]",
8
+ "[:alpha:]", "[:alnum:]",
9
+ "[:punct:]",
10
+ "[:lower:]", "[:upper:]",
11
+ "[:print:]", "[:graph:]",
12
+ "[:xdigit:]"]
13
+ chars = (0x00 .. 0xff).to_a
14
+
15
+ charclasses.each{|charclass|
16
+ member_chars = []
17
+ chars.each{|char|
18
+ if Regexp.new("[#{charclass}]") =~ char.to_a.pack("C*")
19
+ member_chars.push char
20
+ end
21
+ }
22
+ puts "#{charclass}\t#{member_chars.collect{|char|sprintf("\\x%02x", char)}.join}\n\t\t(#{member_chars.collect{|char|char.to_a.pack('C*').inspect[1..-2]}.join})"
23
+ }
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/ruby
2
+ # test character classes on ASCII characters.
3
+ # 2003-03-10 Hisashi MORITA
4
+
5
+ charclasses = ["[:cntrl:]",
6
+ "[:space:]", "[:blank:]",
7
+ "[:digit:]",
8
+ "[:alpha:]", "[:alnum:]",
9
+ "[:punct:]",
10
+ "[:lower:]", "[:upper:]",
11
+ "[:print:]", "[:graph:]",
12
+ "[:xdigit:]"]
13
+ (0x00 .. 0xff).to_a.each{|char|
14
+ attribute = []
15
+ charclasses.each{|charclass|
16
+ if Regexp.new("[#{charclass}]") =~ char.to_a.pack("C*")
17
+ attribute.push charclass
18
+ end
19
+ }
20
+ puts "#{sprintf("\\x%02x", char)} (#{char.to_a.pack('C*').inspect})\t#{attribute.join(', ')}"
21
+ }
@@ -0,0 +1,343 @@
1
+ #!/usr/bin/ruby
2
+ # Extracts multibyte characters from JIS0208.TXT.
3
+ # (ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT)
4
+ # 2003-03-03 .. 20xx-xx-xx, Hisashi MORITA. Use freely at your own risk.
5
+ # Usage: jis0208.rb <Ku> <Ten> <Codeset> #=> \xXX...
6
+ # Example: jis0208.rb 1 1 utf-8 #=> \xe3\x80\x80
7
+
8
+ class JIS0208
9
+ def utf16_to_utf8(utf16) # Convert UTF-16 to UTF-8N
10
+ utf16value = (utf16.unpack("C*")[0] * 256 + utf16.unpack("C*")[1])
11
+ if utf16value < 0x7f # 1-byte utf-8
12
+ utf8 = utf16value.to_a.pack("C*")
13
+ elsif utf16value < 0x800 # 2-byte utf-8
14
+ utf8 = [(0xC0 | (utf16value / 64)),
15
+ (0x80 | (utf16value % 64))].pack("C*")
16
+ else # 3-byte utf-8
17
+ utf8 = [(0xE0 | ((utf16value / 64) / 64)),
18
+ (0x80 | ((utf16value / 64) % 64)),
19
+ (0x80 | (utf16value % 64))].pack("C*")
20
+ end
21
+ end
22
+ def initialize()
23
+ @lines = File.readlines("JIS0208.TXT")
24
+ @lines = @lines.grep(/^[^\#]/) # remove comments
25
+ @lines = @lines.collect{|l| l.sub(/\s+\#[^\#]+$/,'')} # remove unicode names
26
+ @char_db = @lines.collect {|line|
27
+ sjis, jis, utf16 = line.split.collect{|string|
28
+ string.sub(/0x/, '').to_a.pack("H*") # "0xXXXX" to 8-bit byte string
29
+ }
30
+ jis_byte_pair = jis.unpack("C*")
31
+ # jis + 0x8080 => euc
32
+ euc = jis_byte_pair.collect {|byte| (byte + 0x80)}.pack("C*")
33
+ # jis - 0x2020 => ku, ten
34
+ ku, ten = jis_byte_pair.collect {|byte| (byte - 0x20)}
35
+ utf8 = utf16_to_utf8(utf16)
36
+ {:s=>sjis, :j=>jis, :u16=>utf16, :e=>euc, :u8=>utf8, :ku=>ku, :ten=>ten}
37
+ }
38
+ @characters = {}
39
+ @char_db.each{|char|
40
+ if @characters[char[:ku]].nil?
41
+ @characters[char[:ku]] = {}
42
+ end
43
+ if @characters[char[:ku]][char[:ten]].nil?
44
+ @characters[char[:ku]][char[:ten]] = {
45
+ :s=>char[:s],
46
+ :j=>char[:j],
47
+ :u16=>char[:u16],
48
+ :e=>char[:e],
49
+ :u8=>char[:u8]
50
+ }
51
+ end
52
+ }
53
+ end
54
+ attr_reader :char_db
55
+ attr_reader :characters
56
+ def char(ku, ten, codeset)
57
+ case
58
+ when /^[Ee]/ =~ codeset then codeset = :e
59
+ when /^[Ss]/ =~ codeset then codeset = :s
60
+ when /^[Jj]/ =~ codeset then codeset = :j
61
+ when /^[Uu].*16$/ =~ codeset then codeset = :u16
62
+ when /^[Uu].*8$/ =~ codeset then codeset = :u8
63
+ else
64
+ raise "invalid codeset name (#{codeset})\n"
65
+ end
66
+ characters[ku][ten][codeset].unpack('C*').collect{|byte|
67
+ sprintf("\\x%x",byte)
68
+ }.join
69
+ end
70
+
71
+
72
+ end
73
+
74
+ if __FILE__ == $0
75
+
76
+ # euc-jp
77
+ def euc_ja_alnum()
78
+ j = JIS0208.new
79
+ r = []
80
+ (3).to_a.each{|ku|(16..25).to_a.each{|ten|r << j.char(ku,ten,"e")}}
81
+ (3).to_a.each{|ku|(33..58).to_a.each{|ten|r << j.char(ku,ten,"e")}}
82
+ (3).to_a.each{|ku|(65..90).to_a.each{|ten|r << j.char(ku,ten,"e")}}
83
+ r
84
+ end
85
+ def euc_ja_blank()
86
+ j = JIS0208.new
87
+ r = []
88
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"e")}}
89
+ r
90
+ end
91
+ def euc_ja_print()
92
+ euc_ja_graph() + euc_ja_blank()
93
+ end
94
+ def euc_ja_graph()
95
+ euc_ja_alnum() + euc_ja_punct()
96
+ end
97
+ def euc_ja_punct()
98
+ j = JIS0208.new
99
+ r = []
100
+ (1).to_a.each{|ku|( 2..94).to_a.each{|ten|r << j.char(ku,ten,"e")}}
101
+ (2).to_a.each{|ku|( 1..14).to_a.each{|ten|r << j.char(ku,ten,"e")}}
102
+ (2).to_a.each{|ku|(26..33).to_a.each{|ten|r << j.char(ku,ten,"e")}}
103
+ (2).to_a.each{|ku|(42..48).to_a.each{|ten|r << j.char(ku,ten,"e")}}
104
+ (2).to_a.each{|ku|(60..74).to_a.each{|ten|r << j.char(ku,ten,"e")}}
105
+ (2).to_a.each{|ku|(82..89).to_a.each{|ten|r << j.char(ku,ten,"e")}}
106
+ (2).to_a.each{|ku|(94 ).to_a.each{|ten|r << j.char(ku,ten,"e")}}
107
+ (6).to_a.each{|ku|( 1..24).to_a.each{|ten|r << j.char(ku,ten,"e")}}
108
+ (6).to_a.each{|ku|(33..56).to_a.each{|ten|r << j.char(ku,ten,"e")}}
109
+ (7).to_a.each{|ku|( 1..33).to_a.each{|ten|r << j.char(ku,ten,"e")}}
110
+ (7).to_a.each{|ku|(49..81).to_a.each{|ten|r << j.char(ku,ten,"e")}}
111
+ (8).to_a.each{|ku|( 1..32).to_a.each{|ten|r << j.char(ku,ten,"e")}}
112
+ r
113
+ end
114
+ def euc_ja_space()
115
+ j = JIS0208.new
116
+ r = []
117
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"e")}}
118
+ r
119
+ end
120
+ def euc_hiragana()
121
+ j = JIS0208.new
122
+ r = []
123
+ (4).to_a.each{|ku|(1..83).to_a.each{|ten|r << j.char(ku,ten,"e")}}
124
+ r
125
+ end
126
+ def euc_katakana()
127
+ j = JIS0208.new
128
+ r = []
129
+ (5).to_a.each{|ku|(1..86).to_a.each{|ten|r << j.char(ku,ten,"e")}}
130
+ r
131
+ end
132
+ def euc_kanji()
133
+ j = JIS0208.new
134
+ r = []
135
+ (16..46).to_a.each{|ku| r << "#{j.char(ku,1,'e')}-#{j.char(ku,94,'e')}"}
136
+ (47).to_a.each{|ku|r << "#{j.char(ku,1,'e')}-#{j.char(ku,51,'e')}"}
137
+ (48..83).to_a.each{|ku|r << "#{j.char(ku,1,'e')}-#{j.char(ku,94,'e')}"}
138
+ (84).to_a.each{|ku|r << "#{j.char(ku,1,'e')}-#{j.char(ku,6,'e')}"}
139
+ r
140
+ end
141
+
142
+ # sjis (cp932)
143
+ def sjis_ja_alnum()
144
+ j = JIS0208.new
145
+ r = []
146
+ (3).to_a.each{|ku|(16..25).to_a.each{|ten|r << j.char(ku,ten,"s")}}
147
+ (3).to_a.each{|ku|(33..58).to_a.each{|ten|r << j.char(ku,ten,"s")}}
148
+ (3).to_a.each{|ku|(65..90).to_a.each{|ten|r << j.char(ku,ten,"s")}}
149
+ r
150
+ end
151
+ def sjis_ja_blank()
152
+ j = JIS0208.new
153
+ r = []
154
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"s")}}
155
+ r
156
+ end
157
+ def sjis_ja_print()
158
+ sjis_ja_graph() + sjis_ja_blank()
159
+ end
160
+ def sjis_ja_graph()
161
+ sjis_ja_alnum() + sjis_ja_punct()
162
+ end
163
+ def sjis_ja_punct()
164
+ j = JIS0208.new
165
+ r = []
166
+ (1).to_a.each{|ku|(2..94).to_a.each{|ten|r << j.char(ku,ten,"s")}}
167
+ (2).to_a.each{|ku|(1..14).to_a.each{|ten|r << j.char(ku,ten,"s")}}
168
+ (2).to_a.each{|ku|(26..33).to_a.each{|ten|r << j.char(ku,ten,"s")}}
169
+ (2).to_a.each{|ku|(42..48).to_a.each{|ten|r << j.char(ku,ten,"s")}}
170
+ (2).to_a.each{|ku|(60..74).to_a.each{|ten|r << j.char(ku,ten,"s")}}
171
+ (2).to_a.each{|ku|(82..89).to_a.each{|ten|r << j.char(ku,ten,"s")}}
172
+ (2).to_a.each{|ku|(94).to_a.each{|ten|r << j.char(ku,ten,"s")}}
173
+ (6).to_a.each{|ku|(1..24).to_a.each{|ten|r << j.char(ku,ten,"s")}}
174
+ (6).to_a.each{|ku|(33..56).to_a.each{|ten|r << j.char(ku,ten,"s")}}
175
+ (7).to_a.each{|ku|(1..33).to_a.each{|ten|r << j.char(ku,ten,"s")}}
176
+ (7).to_a.each{|ku|(49..81).to_a.each{|ten|r << j.char(ku,ten,"s")}}
177
+ (8).to_a.each{|ku|(1..32).to_a.each{|ten|r << j.char(ku,ten,"s")}}
178
+ #(13).to_a.each{|ku|(1..30).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
179
+ #(13).to_a.each{|ku|(32..54).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
180
+ #(13).to_a.each{|ku|(63..92).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
181
+ #(92).to_a.each{|ku|(81..94).to_a.each{|ten|r << j.char(ku,ten,"s")}}#cp932
182
+ r
183
+ end
184
+ def sjis_ja_space()
185
+ j = JIS0208.new
186
+ r = []
187
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"s")}}
188
+ r
189
+ end
190
+ def sjis_hiragana()
191
+ j = JIS0208.new
192
+ r = []
193
+ (4).to_a.each{|ku|(1..83).to_a.each{|ten|r << j.char(ku,ten,"s")}}
194
+ r
195
+ end
196
+ def sjis_katakana()
197
+ j = JIS0208.new
198
+ r = []
199
+ (5).to_a.each{|ku|(1..86).to_a.each{|ten|r << j.char(ku,ten,"s")}}
200
+ r
201
+ end
202
+ def sjis_kanji()
203
+ j = JIS0208.new
204
+ r = []
205
+ (16..46).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,94,'s')}"}
206
+ (47).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,51,'s')}"}
207
+ (48..83).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,94,'s')}"}
208
+ (84).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,6,'s')}"}
209
+ (89..91).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,94,'s')}"}#cp932
210
+ (92).to_a.each{|ku|r << "#{j.char(ku,1,'s')}-#{j.char(ku,78,'s')}"}#cp932
211
+ r
212
+ end
213
+
214
+ # utf8
215
+ def utf8_ja_alnum()
216
+ j = JIS0208.new
217
+ r = []
218
+ (3).to_a.each{|ku|(16..25).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
219
+ (3).to_a.each{|ku|(33..58).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
220
+ (3).to_a.each{|ku|(65..90).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
221
+ r
222
+ end
223
+ def utf8_ja_blank()
224
+ j = JIS0208.new
225
+ r = []
226
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
227
+ r
228
+ end
229
+ def utf8_ja_print()
230
+ utf8_ja_graph() + utf8_ja_blank()
231
+ end
232
+ def utf8_ja_graph()
233
+ utf8_ja_alnum() + utf8_ja_punct()
234
+ end
235
+ def utf8_ja_punct()
236
+ j = JIS0208.new
237
+ r = []
238
+ (1).to_a.each{|ku|( 2..94).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
239
+ (2).to_a.each{|ku|( 1..14).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
240
+ (2).to_a.each{|ku|(26..33).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
241
+ (2).to_a.each{|ku|(42..48).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
242
+ (2).to_a.each{|ku|(60..74).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
243
+ (2).to_a.each{|ku|(82..89).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
244
+ (2).to_a.each{|ku|(94 ).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
245
+ (6).to_a.each{|ku|( 1..24).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
246
+ (6).to_a.each{|ku|(33..56).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
247
+ (7).to_a.each{|ku|( 1..33).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
248
+ (7).to_a.each{|ku|(49..81).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
249
+ (8).to_a.each{|ku|( 1..32).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
250
+ r
251
+ end
252
+ def utf8_ja_space()
253
+ j = JIS0208.new
254
+ r = []
255
+ (1).to_a.each{|ku|(1).to_a.each{|ten|r << j.char(ku,ten,"u8")}}
256
+ r
257
+ end
258
+ def utf8_hiragana()
259
+ j = JIS0208.new
260
+ r = []
261
+ (4).to_a.each{|ku|(1..83).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
262
+ r
263
+ end
264
+ def utf8_katakana()
265
+ j = JIS0208.new
266
+ r = []
267
+ (5).to_a.each{|ku|(1..86).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
268
+ r
269
+ end
270
+ def utf8_kanji()
271
+ j = JIS0208.new
272
+ r = []
273
+ (16..46).to_a.each{|ku|(1..94).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
274
+ (47).to_a.each{|ku|(1..51).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
275
+ (48..83).to_a.each{|ku|(1..94).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
276
+ (84).to_a.each{|ku|(1..6).to_a.each{|ten|r << j.char(ku,ten,"utf-8")}}
277
+ r
278
+ end
279
+
280
+ jis0208 = JIS0208.new
281
+ if ARGV.size == 3
282
+ ku, ten, codeset = ARGV[0].to_i, ARGV[1].to_i, ARGV[2].to_s
283
+ puts jis0208.char(ku, ten, codeset)
284
+ exit(0)
285
+ elsif ARGV.size == 2
286
+ codeset, charclass = ARGV[0].to_s, ARGV[1].to_s
287
+ else
288
+ puts "Usage: jis0208.rb (<Ku> <Ten> <Codeset> | <Codeset> <CharClass>)"
289
+ puts "Supported codeset: EUC-JP, Shift_JIS, UTF-8"
290
+ puts "Supported charclass: blank, space, alnum, punct, print, graph, hiragana, katakana, kanji"
291
+ puts "Example 1: jis0208.rb 16 1 utf-8"
292
+ puts "Example 2: jis0208.rb euc-jp punct"
293
+ exit(0)
294
+ end
295
+
296
+ case
297
+ when (/^e/i.match codeset) # euc-jp
298
+ case
299
+ when (/^space/i.match charclass) then puts euc_ja_space()
300
+ when (/^blank/i.match charclass) then puts euc_ja_blank()
301
+ when (/^alnum/i.match charclass) then puts euc_ja_alnum()
302
+ when (/^punct/i.match charclass) then puts euc_ja_punct()
303
+ when (/^print/i.match charclass) then puts euc_ja_print()
304
+ when (/^graph/i.match charclass) then puts euc_ja_graph()
305
+ when (/^hira/i.match charclass) then puts euc_hiragana()
306
+ when (/^kata/i.match charclass) then puts euc_katakana()
307
+ when (/^kanji/i.match charclass) then puts euc_kanji()
308
+ else
309
+ raise "invalid charclass (#{charclass}).\n"
310
+ end
311
+ when (/^s/i.match codeset) # sjis
312
+ case
313
+ when (/^space/i.match charclass) then puts sjis_ja_space()
314
+ when (/^blank/i.match charclass) then puts sjis_ja_blank()
315
+ when (/^alnum/i.match charclass) then puts sjis_ja_alnum()
316
+ when (/^punct/i.match charclass) then puts sjis_ja_punct()
317
+ when (/^print/i.match charclass) then puts sjis_ja_print()
318
+ when (/^graph/i.match charclass) then puts sjis_ja_graph()
319
+ when (/^hira/i.match charclass) then puts sjis_hiragana()
320
+ when (/^kata/i.match charclass) then puts sjis_katakana()
321
+ when (/^kanji/i.match charclass) then puts sjis_kanji()
322
+ else
323
+ raise "invalid charclass (#{charclass}).\n"
324
+ end
325
+ when (/^u/i.match codeset) # utf-8
326
+ case
327
+ when (/^space/i.match charclass) then puts utf8_ja_space()
328
+ when (/^blank/i.match charclass) then puts utf8_ja_blank()
329
+ when (/^alnum/i.match charclass) then puts utf8_ja_alnum()
330
+ when (/^punct/i.match charclass) then puts utf8_ja_punct()
331
+ when (/^print/i.match charclass) then puts utf8_ja_print()
332
+ when (/^graph/i.match charclass) then puts utf8_ja_graph()
333
+ when (/^hira/i.match charclass) then puts utf8_hiragana()
334
+ when (/^kata/i.match charclass) then puts utf8_katakana()
335
+ when (/^kanji/i.match charclass) then puts utf8_kanji()
336
+ else
337
+ raise "invalid charclass (#{charclass}).\n"
338
+ end
339
+ else
340
+ raise "invalid codeset (#{codeset}) or charclass (#{charclass}).\n"
341
+ end
342
+
343
+ end
@@ -0,0 +1,38 @@
1
+ require 'test/unit'
2
+ require 'jis0208'
3
+ require 'nkf'
4
+ require 'iconv'
5
+
6
+ class TC_JIS0208 < Test::Unit::TestCase
7
+ def setup()
8
+ #
9
+ end
10
+
11
+ =begin obsolete
12
+ def test_string_to_array()
13
+ jis0208 = JIS0208.new
14
+ expected = [0xe1, 0xa1, 0xa8] # "ɽ"(4129) UTF-8 as array
15
+ # assert_equal(expected, Iconv.iconv("UTF-8", "EUC-JP", "ɽ").to_s)
16
+ assert_equal(expected, jis0208.string_to_array("\xe1\xa1\xa8"))
17
+ end
18
+ def test_array_to_string()
19
+ jis0208 = JIS0208.new
20
+ expected = "\xe1\xa1\xa8" # "ɽ"(4129) in UTF-8 string
21
+ # assert_equal(expected, Iconv.iconv("UTF-8", "EUC-JP", "ɽ").to_s)
22
+ assert_equal(expected, jis0208.array_to_string([0xe1, 0xa1, 0xa8]))
23
+ end
24
+ =end
25
+ =begin obsolete
26
+ def test_to_value_array()
27
+ expected = [0xe1, 0xa1, 0xa8] # "ɽ"(4129) UTF-8 as array
28
+ assert_equal(expected, "\xe1\xa1\xa8".to_value_array)
29
+ end
30
+ def test_to_binary_string()
31
+ expected = "\xe1\xa1\xa8" # "ɽ"(4129) in UTF-8 string
32
+ assert_equal(expected, [0xe1, 0xa1, 0xa8].to_binary_string)
33
+ end
34
+ def teardown()
35
+ #
36
+ end
37
+ =end
38
+ end
@@ -0,0 +1,22 @@
1
+ ## DocDiff configuration file
2
+ ## Comment out and modify the following lines as you like.
3
+ #
4
+ # resolution = word
5
+ # encoding = UTF-8
6
+ # eol = LF
7
+ # format = html
8
+ # digest = off
9
+ # cache = off # not implemented yet
10
+ # verbose = no # not implemented yet
11
+ #
12
+ ## user-defined tags (not well-supported yet)
13
+ # tag_common_start = '<=>'
14
+ # tag_common_end = '</=>'
15
+ # tag_del_start = '<->'
16
+ # tag_del_end = '</->'
17
+ # tag_add_start = '<+>'
18
+ # tag_add_end = '</+>'
19
+ # tag_change_before_start = '<!->'
20
+ # tag_change_before_end = '</!->'
21
+ # tag_change_after_start = '<!+>'
22
+ # tag_change_after_end = '</!+>'