docdiff 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -7
  3. data/Guardfile +4 -4
  4. data/Makefile +1 -1
  5. data/Rakefile +6 -6
  6. data/bin/docdiff +1 -1
  7. data/devutil/Rakefile +12 -5
  8. data/devutil/char_by_charclass.rb +43 -20
  9. data/devutil/charclass_by_char.rb +40 -19
  10. data/devutil/jis0208.rb +263 -231
  11. data/devutil/jis0208_test.rb +196 -0
  12. data/doc/news.md +8 -0
  13. data/docdiff.gemspec +12 -10
  14. data/lib/doc_diff.rb +59 -60
  15. data/lib/docdiff/charstring.rb +225 -241
  16. data/lib/docdiff/cli.rb +285 -250
  17. data/lib/docdiff/diff/contours.rb +1 -1
  18. data/lib/docdiff/diff/editscript.rb +1 -1
  19. data/lib/docdiff/diff/rcsdiff.rb +1 -1
  20. data/lib/docdiff/diff/shortestpath.rb +1 -1
  21. data/lib/docdiff/diff/speculative.rb +1 -1
  22. data/lib/docdiff/diff/subsequence.rb +1 -1
  23. data/lib/docdiff/diff/unidiff.rb +1 -1
  24. data/lib/docdiff/diff.rb +1 -1
  25. data/lib/docdiff/difference.rb +71 -70
  26. data/lib/docdiff/document.rb +129 -109
  27. data/lib/docdiff/encoding/en_ascii.rb +64 -58
  28. data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
  29. data/lib/docdiff/encoding/ja_sjis.rb +240 -226
  30. data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
  31. data/lib/docdiff/version.rb +1 -1
  32. data/lib/docdiff/view.rb +522 -438
  33. data/lib/docdiff.rb +2 -2
  34. data/test/charstring_test.rb +475 -351
  35. data/test/cli_test.rb +103 -101
  36. data/test/diff_test.rb +15 -16
  37. data/test/difference_test.rb +40 -31
  38. data/test/docdiff_test.rb +162 -136
  39. data/test/document_test.rb +280 -175
  40. data/test/test_helper.rb +2 -1
  41. data/test/view_test.rb +636 -497
  42. metadata +8 -8
  43. data/devutil/testjis0208.rb +0 -38
@@ -1,235 +1,249 @@
1
1
  # Japanese Shift_JIS encoding module for CharString
2
2
  # 2003- Hisashi MORITA
3
3
 
4
- # frozen_string_literal: false
4
+ # frozen_string_literal: true
5
5
 
6
6
  class DocDiff
7
- module CharString
8
- module Shift_JIS
7
+ module CharString
8
+ module ShiftJIS
9
+ ENCODING = "Shift_JIS"
9
10
 
10
- Encoding = "Shift_JIS"
11
+ # character table based on:
12
+ # ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT
11
13
 
12
- # character table based on:
13
- # ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT
14
+ CNTRL =
15
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" \
16
+ "\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13" \
17
+ "\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d" \
18
+ "\x1e\x1f\x7f"
19
+ SPACE =
20
+ "\x09\x0a\x0b\x0c\x0d\x20"
21
+ BLANK =
22
+ "\x09\x20"
23
+ DIGIT =
24
+ "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
25
+ UPPER =
26
+ "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
27
+ "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
28
+ "\x55\x56\x57\x58\x59\x5a"
29
+ LOWER =
30
+ "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
31
+ "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
32
+ "\x75\x76\x77\x78\x79\x7a"
33
+ ALPHA = UPPER + LOWER
34
+ ALNUM = DIGIT + ALPHA
35
+ PUNCT =
36
+ Regexp.quote(
37
+ "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
38
+ "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
39
+ "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
40
+ "\x7d\x7e",
41
+ )
42
+ GRAPH = DIGIT + UPPER + LOWER + PUNCT
43
+ PRINT = "\x20" + GRAPH
44
+ XDIGIT =
45
+ DIGIT +
46
+ "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
47
+ "\x65\x66"
48
+ JA_SPACE =
49
+ "\x81\x40"
50
+ JA_BLANK =
51
+ "\x81\x40"
52
+ JA_ALNUM =
53
+ "\x82\x4f\x82\x50\x82\x51\x82\x52\x82\x53" \
54
+ "\x82\x54\x82\x55\x82\x56\x82\x57\x82\x58" \
55
+ "\x82\x60\x82\x61\x82\x62\x82\x63\x82\x64" \
56
+ "\x82\x65\x82\x66\x82\x67\x82\x68\x82\x69" \
57
+ "\x82\x6a\x82\x6b\x82\x6c\x82\x6d\x82\x6e" \
58
+ "\x82\x6f\x82\x70\x82\x71\x82\x72\x82\x73" \
59
+ "\x82\x74\x82\x75\x82\x76\x82\x77\x82\x78" \
60
+ "\x82\x79\x82\x81\x82\x82\x82\x83\x82\x84" \
61
+ "\x82\x85\x82\x86\x82\x87\x82\x88\x82\x89" \
62
+ "\x82\x8a\x82\x8b\x82\x8c\x82\x8d\x82\x8e" \
63
+ "\x82\x8f\x82\x90\x82\x91\x82\x92\x82\x93" \
64
+ "\x82\x94\x82\x95\x82\x96\x82\x97\x82\x98" \
65
+ "\x82\x99\x82\x9a"
66
+ JA_PUNCT =
67
+ "\x81\x41\x81\x42\x81\x43\x81\x44\x81\x45\x81\x46" \
68
+ "\x81\x47\x81\x48\x81\x49\x81\x4a\x81\x4b\x81\x4c" \
69
+ "\x81\x4d\x81\x4e\x81\x4f\x81\x50\x81\x51\x81\x52" \
70
+ "\x81\x53\x81\x54\x81\x55\x81\x56\x81\x57\x81\x58" \
71
+ "\x81\x59\x81\x5a\x81\x5b\x81\x5c\x81\x5d\x81\x5e" \
72
+ "\x81\x5f\x81\x60\x81\x61\x81\x62\x81\x63\x81\x64" \
73
+ "\x81\x65\x81\x66\x81\x67\x81\x68\x81\x69\x81\x6a" \
74
+ "\x81\x6b\x81\x6c\x81\x6d\x81\x6e\x81\x6f\x81\x70" \
75
+ "\x81\x71\x81\x72\x81\x73\x81\x74\x81\x75\x81\x76" \
76
+ "\x81\x77\x81\x78\x81\x79\x81\x7a\x81\x7b\x81\x7c" \
77
+ "\x81\x7d\x81\x7e\x81\x80\x81\x81\x81\x82\x81\x83" \
78
+ "\x81\x84\x81\x85\x81\x86\x81\x87\x81\x88\x81\x89" \
79
+ "\x81\x8a\x81\x8b\x81\x8c\x81\x8d\x81\x8e\x81\x8f" \
80
+ "\x81\x90\x81\x91\x81\x92\x81\x93\x81\x94\x81\x95" \
81
+ "\x81\x96\x81\x97\x81\x98\x81\x99\x81\x9a\x81\x9b" \
82
+ "\x81\x9c\x81\x9d\x81\x9e\x81\x9f\x81\xa0\x81\xa1" \
83
+ "\x81\xa2\x81\xa3\x81\xa4\x81\xa5\x81\xa6\x81\xa7" \
84
+ "\x81\xa8\x81\xa9\x81\xaa\x81\xab\x81\xac\x81\xb8" \
85
+ "\x81\xb9\x81\xba\x81\xbb\x81\xbc\x81\xbd\x81\xbe" \
86
+ "\x81\xbf\x81\xc8\x81\xc9\x81\xca\x81\xcb\x81\xcc" \
87
+ "\x81\xcd\x81\xce\x81\xda\x81\xdb\x81\xdc\x81\xdd" \
88
+ "\x81\xde\x81\xdf\x81\xe0\x81\xe1\x81\xe2\x81\xe3" \
89
+ "\x81\xe4\x81\xe5\x81\xe6\x81\xe7\x81\xe8\x81\xf0" \
90
+ "\x81\xf1\x81\xf2\x81\xf3\x81\xf4\x81\xf5\x81\xf6" \
91
+ "\x81\xf7\x81\xfc\x83\x9f\x83\xa0\x83\xa1\x83\xa2" \
92
+ "\x83\xa3\x83\xa4\x83\xa5\x83\xa6\x83\xa7\x83\xa8" \
93
+ "\x83\xa9\x83\xaa\x83\xab\x83\xac\x83\xad\x83\xae" \
94
+ "\x83\xaf\x83\xb0\x83\xb1\x83\xb2\x83\xb3\x83\xb4" \
95
+ "\x83\xb5\x83\xb6\x83\xbf\x83\xc0\x83\xc1\x83\xc2" \
96
+ "\x83\xc3\x83\xc4\x83\xc5\x83\xc6\x83\xc7\x83\xc8" \
97
+ "\x83\xc9\x83\xca\x83\xcb\x83\xcc\x83\xcd\x83\xce" \
98
+ "\x83\xcf\x83\xd0\x83\xd1\x83\xd2\x83\xd3\x83\xd4" \
99
+ "\x83\xd5\x83\xd6\x84\x40\x84\x41\x84\x42\x84\x43" \
100
+ "\x84\x44\x84\x45\x84\x46\x84\x47\x84\x48\x84\x49" \
101
+ "\x84\x4a\x84\x4b\x84\x4c\x84\x4d\x84\x4e\x84\x4f" \
102
+ "\x84\x50\x84\x51\x84\x52\x84\x53\x84\x54\x84\x55" \
103
+ "\x84\x56\x84\x57\x84\x58\x84\x59\x84\x5a\x84\x5b" \
104
+ "\x84\x5c\x84\x5d\x84\x5e\x84\x5f\x84\x60\x84\x70" \
105
+ "\x84\x71\x84\x72\x84\x73\x84\x74\x84\x75\x84\x76" \
106
+ "\x84\x77\x84\x78\x84\x79\x84\x7a\x84\x7b\x84\x7c" \
107
+ "\x84\x7d\x84\x7e\x84\x80\x84\x81\x84\x82\x84\x83" \
108
+ "\x84\x84\x84\x85\x84\x86\x84\x87\x84\x88\x84\x89" \
109
+ "\x84\x8a\x84\x8b\x84\x8c\x84\x8d\x84\x8e\x84\x8f" \
110
+ "\x84\x90\x84\x91\x84\x9f\x84\xa0\x84\xa1\x84\xa2" \
111
+ "\x84\xa3\x84\xa4\x84\xa5\x84\xa6\x84\xa7\x84\xa8" \
112
+ "\x84\xa9\x84\xaa\x84\xab\x84\xac\x84\xad\x84\xae" \
113
+ "\x84\xaf\x84\xb0\x84\xb1\x84\xb2\x84\xb3\x84\xb4" \
114
+ "\x84\xb5\x84\xb6\x84\xb7\x84\xb8\x84\xb9\x84\xba" \
115
+ "\x84\xbb\x84\xbc\x84\xbd\x84\xbe"
116
+ HIRA =
117
+ "\x82\x9f\x82\xa0\x82\xa1\x82\xa2\x82\xa3" \
118
+ "\x82\xa4\x82\xa5\x82\xa6\x82\xa7\x82\xa8" \
119
+ "\x82\xa9\x82\xaa\x82\xab\x82\xac\x82\xad" \
120
+ "\x82\xae\x82\xaf\x82\xb0\x82\xb1\x82\xb2" \
121
+ "\x82\xb3\x82\xb4\x82\xb5\x82\xb6\x82\xb7" \
122
+ "\x82\xb8\x82\xb9\x82\xba\x82\xbb\x82\xbc" \
123
+ "\x82\xbd\x82\xbe\x82\xbf\x82\xc0\x82\xc1" \
124
+ "\x82\xc2\x82\xc3\x82\xc4\x82\xc5\x82\xc6" \
125
+ "\x82\xc7\x82\xc8\x82\xc9\x82\xca\x82\xcb" \
126
+ "\x82\xcc\x82\xcd\x82\xce\x82\xcf\x82\xd0" \
127
+ "\x82\xd1\x82\xd2\x82\xd3\x82\xd4\x82\xd5" \
128
+ "\x82\xd6\x82\xd7\x82\xd8\x82\xd9\x82\xda" \
129
+ "\x82\xdb\x82\xdc\x82\xdd\x82\xde\x82\xdf" \
130
+ "\x82\xe0\x82\xe1\x82\xe2\x82\xe3\x82\xe4" \
131
+ "\x82\xe5\x82\xe6\x82\xe7\x82\xe8\x82\xe9" \
132
+ "\x82\xea\x82\xeb\x82\xec\x82\xed\x82\xee" \
133
+ "\x82\xef\x82\xf0\x82\xf1" # [\x82\xf1-\x82\xf1]
134
+ HIRA_EX =
135
+ HIRA +
136
+ "\x81\x5b" \
137
+ "\x81\x52\x81\x53" # add onbiki and kanagaeshi(hira)
138
+ KATA =
139
+ "\x83\x40\x83\x41\x83\x42\x83\x43\x83\x44" \
140
+ "\x83\x45\x83\x46\x83\x47\x83\x48\x83\x49" \
141
+ "\x83\x4a\x83\x4b\x83\x4c\x83\x4d\x83\x4e" \
142
+ "\x83\x4f\x83\x50\x83\x51\x83\x52\x83\x53" \
143
+ "\x83\x54\x83\x55\x83\x56\x83\x57\x83\x58" \
144
+ "\x83\x59\x83\x5a\x83\x5b\x83\x5c\x83\x5d" \
145
+ "\x83\x5e\x83\x5f\x83\x60\x83\x61\x83\x62" \
146
+ "\x83\x63\x83\x64\x83\x65\x83\x66\x83\x67" \
147
+ "\x83\x68\x83\x69\x83\x6a\x83\x6b\x83\x6c" \
148
+ "\x83\x6d\x83\x6e\x83\x6f\x83\x70\x83\x71" \
149
+ "\x83\x72\x83\x73\x83\x74\x83\x75\x83\x76" \
150
+ "\x83\x77\x83\x78\x83\x79\x83\x7a\x83\x7b" \
151
+ "\x83\x7c\x83\x7d\x83\x7e\x83\x80\x83\x81" \
152
+ "\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86" \
153
+ "\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b" \
154
+ "\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90" \
155
+ "\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95" \
156
+ "\x83\x96" # [\x83\x40-\x83\x7e\x83\x80-\x83\x96] # Note that \x83\x7f is excluded.
157
+ KATA_EX =
158
+ KATA +
159
+ "\x81\x5b" \
160
+ "\x81\x54\x81\x55" # add onbiki and kanagaeshi(kata)
161
+ KANJI =
162
+ "\x88\x9f-\x88\xfc" \
163
+ "\x89\x40-\x89\x9e" \
164
+ "\x89\x9f-\x89\xfc" \
165
+ "\x8a\x40-\x8a\x9e" \
166
+ "\x8a\x9f-\x8a\xfc" \
167
+ "\x8b\x40-\x8b\x9e" \
168
+ "\x8b\x9f-\x8b\xfc" \
169
+ "\x8c\x40-\x8c\x9e" \
170
+ "\x8c\x9f-\x8c\xfc" \
171
+ "\x8d\x40-\x8d\x9e" \
172
+ "\x8d\x9f-\x8d\xfc" \
173
+ "\x8e\x40-\x8e\x9e" \
174
+ "\x8e\x9f-\x8e\xfc" \
175
+ "\x8f\x40-\x8f\x9e" \
176
+ "\x8f\x9f-\x8f\xfc" \
177
+ "\x90\x40-\x90\x9e" \
178
+ "\x90\x9f-\x90\xfc" \
179
+ "\x91\x40-\x91\x9e" \
180
+ "\x91\x9f-\x91\xfc" \
181
+ "\x92\x40-\x92\x9e" \
182
+ "\x92\x9f-\x92\xfc" \
183
+ "\x93\x40-\x93\x9e" \
184
+ "\x93\x9f-\x93\xfc" \
185
+ "\x94\x40-\x94\x9e" \
186
+ "\x94\x9f-\x94\xfc" \
187
+ "\x95\x40-\x95\x9e" \
188
+ "\x95\x9f-\x95\xfc" \
189
+ "\x96\x40-\x96\x9e" \
190
+ "\x96\x9f-\x96\xfc" \
191
+ "\x97\x40-\x97\x9e" \
192
+ "\x97\x9f-\x97\xfc" \
193
+ "\x98\x40-\x98\x72" \
194
+ "\x98\x9f-\x98\xfc" \
195
+ "\x99\x40-\x99\x9e" \
196
+ "\x99\x9f-\x99\xfc" \
197
+ "\x9a\x40-\x9a\x9e" \
198
+ "\x9a\x9f-\x9a\xfc" \
199
+ "\x9b\x40-\x9b\x9e" \
200
+ "\x9b\x9f-\x9b\xfc" \
201
+ "\x9c\x40-\x9c\x9e" \
202
+ "\x9c\x9f-\x9c\xfc" \
203
+ "\x9d\x40-\x9d\x9e" \
204
+ "\x9d\x9f-\x9d\xfc" \
205
+ "\x9e\x40-\x9e\x9e" \
206
+ "\x9e\x9f-\x9e\xfc" \
207
+ "\x9f\x40-\x9f\x9e" \
208
+ "\x9f\x9f-\x9f\xfc" \
209
+ "\xe0\x40-\xe0\x9e" \
210
+ "\xe0\x9f-\xe0\xfc" \
211
+ "\xe1\x40-\xe1\x9e" \
212
+ "\xe1\x9f-\xe1\xfc" \
213
+ "\xe2\x40-\xe2\x9e" \
214
+ "\xe2\x9f-\xe2\xfc" \
215
+ "\xe3\x40-\xe3\x9e" \
216
+ "\xe3\x9f-\xe3\xfc" \
217
+ "\xe4\x40-\xe4\x9e" \
218
+ "\xe4\x9f-\xe4\xfc" \
219
+ "\xe5\x40-\xe5\x9e" \
220
+ "\xe5\x9f-\xe5\xfc" \
221
+ "\xe6\x40-\xe6\x9e" \
222
+ "\xe6\x9f-\xe6\xfc" \
223
+ "\xe7\x40-\xe7\x9e" \
224
+ "\xe7\x9f-\xe7\xfc" \
225
+ "\xe8\x40-\xe8\x9e" \
226
+ "\xe8\x9f-\xe8\xfc" \
227
+ "\xe9\x40-\xe9\x9e" \
228
+ "\xe9\x9f-\xe9\xfc" \
229
+ "\xea\x40-\xea\x9e" \
230
+ "\xea\x9f-\xea\xa4"
231
+ KANJI_EX = KANJI + "\x81\x58" # + noma
232
+ JA_GRAPH = JA_ALNUM + JA_PUNCT + HIRA + KATA + KANJI
233
+ JA_PRINT = JA_GRAPH + JA_BLANK
14
234
 
15
- CNTRL = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" \
16
- "\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13" \
17
- "\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d" \
18
- "\x1e\x1f\x7f"
19
- SPACE = "\x09\x0a\x0b\x0c\x0d\x20"
20
- BLANK = "\x09\x20"
21
- DIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
22
- UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
23
- "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
24
- "\x55\x56\x57\x58\x59\x5a"
25
- LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
26
- "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
27
- "\x75\x76\x77\x78\x79\x7a"
28
- ALPHA = UPPER + LOWER
29
- ALNUM = DIGIT + ALPHA
30
- PUNCT = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
31
- "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
32
- "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
33
- "\x7d\x7e"
34
- GRAPH = DIGIT + UPPER + LOWER + PUNCT
35
- PRINT = "\x20" + GRAPH
36
- XDIGIT = DIGIT +
37
- "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
38
- "\x65\x66"
39
- JA_SPACE = "\x81\x40"
40
- JA_BLANK = "\x81\x40"
41
- JA_ALNUM = "\x82\x4f\x82\x50\x82\x51\x82\x52\x82\x53" \
42
- "\x82\x54\x82\x55\x82\x56\x82\x57\x82\x58" \
43
- "\x82\x60\x82\x61\x82\x62\x82\x63\x82\x64" \
44
- "\x82\x65\x82\x66\x82\x67\x82\x68\x82\x69" \
45
- "\x82\x6a\x82\x6b\x82\x6c\x82\x6d\x82\x6e" \
46
- "\x82\x6f\x82\x70\x82\x71\x82\x72\x82\x73" \
47
- "\x82\x74\x82\x75\x82\x76\x82\x77\x82\x78" \
48
- "\x82\x79\x82\x81\x82\x82\x82\x83\x82\x84" \
49
- "\x82\x85\x82\x86\x82\x87\x82\x88\x82\x89" \
50
- "\x82\x8a\x82\x8b\x82\x8c\x82\x8d\x82\x8e" \
51
- "\x82\x8f\x82\x90\x82\x91\x82\x92\x82\x93" \
52
- "\x82\x94\x82\x95\x82\x96\x82\x97\x82\x98" \
53
- "\x82\x99\x82\x9a"
54
- JA_PUNCT = "\x81\x41\x81\x42\x81\x43\x81\x44\x81\x45\x81\x46" \
55
- "\x81\x47\x81\x48\x81\x49\x81\x4a\x81\x4b\x81\x4c" \
56
- "\x81\x4d\x81\x4e\x81\x4f\x81\x50\x81\x51\x81\x52" \
57
- "\x81\x53\x81\x54\x81\x55\x81\x56\x81\x57\x81\x58" \
58
- "\x81\x59\x81\x5a\x81\x5b\x81\x5c\x81\x5d\x81\x5e" \
59
- "\x81\x5f\x81\x60\x81\x61\x81\x62\x81\x63\x81\x64" \
60
- "\x81\x65\x81\x66\x81\x67\x81\x68\x81\x69\x81\x6a" \
61
- "\x81\x6b\x81\x6c\x81\x6d\x81\x6e\x81\x6f\x81\x70" \
62
- "\x81\x71\x81\x72\x81\x73\x81\x74\x81\x75\x81\x76" \
63
- "\x81\x77\x81\x78\x81\x79\x81\x7a\x81\x7b\x81\x7c" \
64
- "\x81\x7d\x81\x7e\x81\x80\x81\x81\x81\x82\x81\x83" \
65
- "\x81\x84\x81\x85\x81\x86\x81\x87\x81\x88\x81\x89" \
66
- "\x81\x8a\x81\x8b\x81\x8c\x81\x8d\x81\x8e\x81\x8f" \
67
- "\x81\x90\x81\x91\x81\x92\x81\x93\x81\x94\x81\x95" \
68
- "\x81\x96\x81\x97\x81\x98\x81\x99\x81\x9a\x81\x9b" \
69
- "\x81\x9c\x81\x9d\x81\x9e\x81\x9f\x81\xa0\x81\xa1" \
70
- "\x81\xa2\x81\xa3\x81\xa4\x81\xa5\x81\xa6\x81\xa7" \
71
- "\x81\xa8\x81\xa9\x81\xaa\x81\xab\x81\xac\x81\xb8" \
72
- "\x81\xb9\x81\xba\x81\xbb\x81\xbc\x81\xbd\x81\xbe" \
73
- "\x81\xbf\x81\xc8\x81\xc9\x81\xca\x81\xcb\x81\xcc" \
74
- "\x81\xcd\x81\xce\x81\xda\x81\xdb\x81\xdc\x81\xdd" \
75
- "\x81\xde\x81\xdf\x81\xe0\x81\xe1\x81\xe2\x81\xe3" \
76
- "\x81\xe4\x81\xe5\x81\xe6\x81\xe7\x81\xe8\x81\xf0" \
77
- "\x81\xf1\x81\xf2\x81\xf3\x81\xf4\x81\xf5\x81\xf6" \
78
- "\x81\xf7\x81\xfc\x83\x9f\x83\xa0\x83\xa1\x83\xa2" \
79
- "\x83\xa3\x83\xa4\x83\xa5\x83\xa6\x83\xa7\x83\xa8" \
80
- "\x83\xa9\x83\xaa\x83\xab\x83\xac\x83\xad\x83\xae" \
81
- "\x83\xaf\x83\xb0\x83\xb1\x83\xb2\x83\xb3\x83\xb4" \
82
- "\x83\xb5\x83\xb6\x83\xbf\x83\xc0\x83\xc1\x83\xc2" \
83
- "\x83\xc3\x83\xc4\x83\xc5\x83\xc6\x83\xc7\x83\xc8" \
84
- "\x83\xc9\x83\xca\x83\xcb\x83\xcc\x83\xcd\x83\xce" \
85
- "\x83\xcf\x83\xd0\x83\xd1\x83\xd2\x83\xd3\x83\xd4" \
86
- "\x83\xd5\x83\xd6\x84\x40\x84\x41\x84\x42\x84\x43" \
87
- "\x84\x44\x84\x45\x84\x46\x84\x47\x84\x48\x84\x49" \
88
- "\x84\x4a\x84\x4b\x84\x4c\x84\x4d\x84\x4e\x84\x4f" \
89
- "\x84\x50\x84\x51\x84\x52\x84\x53\x84\x54\x84\x55" \
90
- "\x84\x56\x84\x57\x84\x58\x84\x59\x84\x5a\x84\x5b" \
91
- "\x84\x5c\x84\x5d\x84\x5e\x84\x5f\x84\x60\x84\x70" \
92
- "\x84\x71\x84\x72\x84\x73\x84\x74\x84\x75\x84\x76" \
93
- "\x84\x77\x84\x78\x84\x79\x84\x7a\x84\x7b\x84\x7c" \
94
- "\x84\x7d\x84\x7e\x84\x80\x84\x81\x84\x82\x84\x83" \
95
- "\x84\x84\x84\x85\x84\x86\x84\x87\x84\x88\x84\x89" \
96
- "\x84\x8a\x84\x8b\x84\x8c\x84\x8d\x84\x8e\x84\x8f" \
97
- "\x84\x90\x84\x91\x84\x9f\x84\xa0\x84\xa1\x84\xa2" \
98
- "\x84\xa3\x84\xa4\x84\xa5\x84\xa6\x84\xa7\x84\xa8" \
99
- "\x84\xa9\x84\xaa\x84\xab\x84\xac\x84\xad\x84\xae" \
100
- "\x84\xaf\x84\xb0\x84\xb1\x84\xb2\x84\xb3\x84\xb4" \
101
- "\x84\xb5\x84\xb6\x84\xb7\x84\xb8\x84\xb9\x84\xba" \
102
- "\x84\xbb\x84\xbc\x84\xbd\x84\xbe"
103
- HIRA = "\x82\x9f\x82\xa0\x82\xa1\x82\xa2\x82\xa3" \
104
- "\x82\xa4\x82\xa5\x82\xa6\x82\xa7\x82\xa8" \
105
- "\x82\xa9\x82\xaa\x82\xab\x82\xac\x82\xad" \
106
- "\x82\xae\x82\xaf\x82\xb0\x82\xb1\x82\xb2" \
107
- "\x82\xb3\x82\xb4\x82\xb5\x82\xb6\x82\xb7" \
108
- "\x82\xb8\x82\xb9\x82\xba\x82\xbb\x82\xbc" \
109
- "\x82\xbd\x82\xbe\x82\xbf\x82\xc0\x82\xc1" \
110
- "\x82\xc2\x82\xc3\x82\xc4\x82\xc5\x82\xc6" \
111
- "\x82\xc7\x82\xc8\x82\xc9\x82\xca\x82\xcb" \
112
- "\x82\xcc\x82\xcd\x82\xce\x82\xcf\x82\xd0" \
113
- "\x82\xd1\x82\xd2\x82\xd3\x82\xd4\x82\xd5" \
114
- "\x82\xd6\x82\xd7\x82\xd8\x82\xd9\x82\xda" \
115
- "\x82\xdb\x82\xdc\x82\xdd\x82\xde\x82\xdf" \
116
- "\x82\xe0\x82\xe1\x82\xe2\x82\xe3\x82\xe4" \
117
- "\x82\xe5\x82\xe6\x82\xe7\x82\xe8\x82\xe9" \
118
- "\x82\xea\x82\xeb\x82\xec\x82\xed\x82\xee" \
119
- "\x82\xef\x82\xf0\x82\xf1" # [\x82\xf1-\x82\xf1]
120
- HIRA_EX = HIRA +
121
- "\x81\x5b" +
122
- "\x81\x52\x81\x53" # add onbiki and kanagaeshi(hira)
123
- KATA = "\x83\x40\x83\x41\x83\x42\x83\x43\x83\x44" \
124
- "\x83\x45\x83\x46\x83\x47\x83\x48\x83\x49" \
125
- "\x83\x4a\x83\x4b\x83\x4c\x83\x4d\x83\x4e" \
126
- "\x83\x4f\x83\x50\x83\x51\x83\x52\x83\x53" \
127
- "\x83\x54\x83\x55\x83\x56\x83\x57\x83\x58" \
128
- "\x83\x59\x83\x5a\x83\x5b\x83\x5c\x83\x5d" \
129
- "\x83\x5e\x83\x5f\x83\x60\x83\x61\x83\x62" \
130
- "\x83\x63\x83\x64\x83\x65\x83\x66\x83\x67" \
131
- "\x83\x68\x83\x69\x83\x6a\x83\x6b\x83\x6c" \
132
- "\x83\x6d\x83\x6e\x83\x6f\x83\x70\x83\x71" \
133
- "\x83\x72\x83\x73\x83\x74\x83\x75\x83\x76" \
134
- "\x83\x77\x83\x78\x83\x79\x83\x7a\x83\x7b" \
135
- "\x83\x7c\x83\x7d\x83\x7e\x83\x80\x83\x81" \
136
- "\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86" \
137
- "\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b" \
138
- "\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90" \
139
- "\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95" \
140
- "\x83\x96" # [\x83\x40-\x83\x7e\x83\x80-\x83\x96]
141
- # Note that \x83\x7f is excluded.
142
- KATA_EX = KATA +
143
- "\x81\x5b" +
144
- "\x81\x54\x81\x55" # add onbiki and kanagaeshi(kata)
145
- KANJI = "\x88\x9f-\x88\xfc" \
146
- "\x89\x40-\x89\x9e" \
147
- "\x89\x9f-\x89\xfc" \
148
- "\x8a\x40-\x8a\x9e" \
149
- "\x8a\x9f-\x8a\xfc" \
150
- "\x8b\x40-\x8b\x9e" \
151
- "\x8b\x9f-\x8b\xfc" \
152
- "\x8c\x40-\x8c\x9e" \
153
- "\x8c\x9f-\x8c\xfc" \
154
- "\x8d\x40-\x8d\x9e" \
155
- "\x8d\x9f-\x8d\xfc" \
156
- "\x8e\x40-\x8e\x9e" \
157
- "\x8e\x9f-\x8e\xfc" \
158
- "\x8f\x40-\x8f\x9e" \
159
- "\x8f\x9f-\x8f\xfc" \
160
- "\x90\x40-\x90\x9e" \
161
- "\x90\x9f-\x90\xfc" \
162
- "\x91\x40-\x91\x9e" \
163
- "\x91\x9f-\x91\xfc" \
164
- "\x92\x40-\x92\x9e" \
165
- "\x92\x9f-\x92\xfc" \
166
- "\x93\x40-\x93\x9e" \
167
- "\x93\x9f-\x93\xfc" \
168
- "\x94\x40-\x94\x9e" \
169
- "\x94\x9f-\x94\xfc" \
170
- "\x95\x40-\x95\x9e" \
171
- "\x95\x9f-\x95\xfc" \
172
- "\x96\x40-\x96\x9e" \
173
- "\x96\x9f-\x96\xfc" \
174
- "\x97\x40-\x97\x9e" \
175
- "\x97\x9f-\x97\xfc" \
176
- "\x98\x40-\x98\x72" \
177
- "\x98\x9f-\x98\xfc" \
178
- "\x99\x40-\x99\x9e" \
179
- "\x99\x9f-\x99\xfc" \
180
- "\x9a\x40-\x9a\x9e" \
181
- "\x9a\x9f-\x9a\xfc" \
182
- "\x9b\x40-\x9b\x9e" \
183
- "\x9b\x9f-\x9b\xfc" \
184
- "\x9c\x40-\x9c\x9e" \
185
- "\x9c\x9f-\x9c\xfc" \
186
- "\x9d\x40-\x9d\x9e" \
187
- "\x9d\x9f-\x9d\xfc" \
188
- "\x9e\x40-\x9e\x9e" \
189
- "\x9e\x9f-\x9e\xfc" \
190
- "\x9f\x40-\x9f\x9e" \
191
- "\x9f\x9f-\x9f\xfc" \
192
- "\xe0\x40-\xe0\x9e" \
193
- "\xe0\x9f-\xe0\xfc" \
194
- "\xe1\x40-\xe1\x9e" \
195
- "\xe1\x9f-\xe1\xfc" \
196
- "\xe2\x40-\xe2\x9e" \
197
- "\xe2\x9f-\xe2\xfc" \
198
- "\xe3\x40-\xe3\x9e" \
199
- "\xe3\x9f-\xe3\xfc" \
200
- "\xe4\x40-\xe4\x9e" \
201
- "\xe4\x9f-\xe4\xfc" \
202
- "\xe5\x40-\xe5\x9e" \
203
- "\xe5\x9f-\xe5\xfc" \
204
- "\xe6\x40-\xe6\x9e" \
205
- "\xe6\x9f-\xe6\xfc" \
206
- "\xe7\x40-\xe7\x9e" \
207
- "\xe7\x9f-\xe7\xfc" \
208
- "\xe8\x40-\xe8\x9e" \
209
- "\xe8\x9f-\xe8\xfc" \
210
- "\xe9\x40-\xe9\x9e" \
211
- "\xe9\x9f-\xe9\xfc" \
212
- "\xea\x40-\xea\x9e" \
213
- "\xea\x9f-\xea\xa4"
214
- KANJI_EX = KANJI + "\x81\x58" # + noma
215
- JA_GRAPH = JA_ALNUM + JA_PUNCT + HIRA + KATA + KANJI
216
- JA_PRINT = JA_GRAPH + JA_BLANK
235
+ WORD_REGEXP_SRC = [
236
+ "(?:[#{GRAPH}]+[#{BLANK}]?)",
237
+ "|(?:[#{SPACE}]+)",
238
+ "|(?:[#{KANJI_EX}]+[#{HIRA}]+)",
239
+ "|(?:[#{KATA_EX}]+[#{HIRA}]+)",
240
+ "|(?:[#{KANJI_EX}]+)",
241
+ "|(?:[#{KATA_EX}]+)",
242
+ "|(?:[#{HIRA_EX}]+)",
243
+ "|(?:.+?)",
244
+ ].join
217
245
 
218
- PUNCT.replace(Regexp.quote(PUNCT)) # kludge to avoid warning "character class has `[' without escape"
219
- PRINT.replace(Regexp.quote(PRINT)) # kludge to avoid warning "character class has `[' without escape"
220
- GRAPH.replace(Regexp.quote(GRAPH)) # kludge to avoid warning "character class has `[' without escape"
221
-
222
- WORD_REGEXP_SRC = ["(?:[#{GRAPH}]+[#{BLANK}]?)",
223
- "|(?:[#{SPACE}]+)",
224
- "|(?:[#{KANJI_EX}]+[#{HIRA}]+)",
225
- "|(?:[#{KATA_EX}]+[#{HIRA}]+)",
226
- "|(?:[#{KANJI_EX}]+)",
227
- "|(?:[#{KATA_EX}]+)",
228
- "|(?:[#{HIRA_EX}]+)",
229
- "|(?:.+?)"].join
230
-
231
- CharString.register_encoding(self)
232
-
233
- end # module SJIS
234
- end # module CharString
235
- end # class DocDiff
246
+ CharString.register_encoding(self)
247
+ end
248
+ end
249
+ end