tmail_es 1.2.7.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES +83 -0
  3. data/LICENSE +21 -0
  4. data/NOTES +7 -0
  5. data/README +182 -0
  6. data/Rakefile +2 -0
  7. data/ext/Makefile +20 -0
  8. data/ext/tmailscanner/tmail/MANIFEST +4 -0
  9. data/ext/tmailscanner/tmail/depend +1 -0
  10. data/ext/tmailscanner/tmail/extconf.rb +33 -0
  11. data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
  12. data/lib/tmail/Makefile +18 -0
  13. data/lib/tmail/address.rb +392 -0
  14. data/lib/tmail/attachments.rb +65 -0
  15. data/lib/tmail/base64.rb +46 -0
  16. data/lib/tmail/compat.rb +41 -0
  17. data/lib/tmail/config.rb +67 -0
  18. data/lib/tmail/core_extensions.rb +63 -0
  19. data/lib/tmail/encode.rb +590 -0
  20. data/lib/tmail/header.rb +962 -0
  21. data/lib/tmail/index.rb +9 -0
  22. data/lib/tmail/interface.rb +1162 -0
  23. data/lib/tmail/loader.rb +3 -0
  24. data/lib/tmail/mail.rb +578 -0
  25. data/lib/tmail/mailbox.rb +496 -0
  26. data/lib/tmail/main.rb +6 -0
  27. data/lib/tmail/mbox.rb +3 -0
  28. data/lib/tmail/net.rb +250 -0
  29. data/lib/tmail/obsolete.rb +132 -0
  30. data/lib/tmail/parser.rb +1060 -0
  31. data/lib/tmail/parser.y +416 -0
  32. data/lib/tmail/port.rb +379 -0
  33. data/lib/tmail/quoting.rb +164 -0
  34. data/lib/tmail/require_arch.rb +58 -0
  35. data/lib/tmail/scanner.rb +49 -0
  36. data/lib/tmail/scanner_r.rb +261 -0
  37. data/lib/tmail/stringio.rb +280 -0
  38. data/lib/tmail/utils.rb +361 -0
  39. data/lib/tmail/vendor/rchardet-1.3/COPYING +504 -0
  40. data/lib/tmail/vendor/rchardet-1.3/README +12 -0
  41. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  42. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  43. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  44. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  45. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  46. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  47. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  48. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  49. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  50. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  51. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  52. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  53. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  54. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  55. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  56. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  57. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  58. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  59. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  60. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  61. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  62. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  63. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  64. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  65. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  66. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  67. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  68. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  69. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  70. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  71. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  72. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  73. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +167 -0
  74. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  75. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  76. data/lib/tmail/version.rb +40 -0
  77. data/lib/tmail.rb +6 -0
  78. data/setup.rb +1482 -0
  79. data/test/extctrl.rb +6 -0
  80. data/test/fixtures/apple_unquoted_content_type +44 -0
  81. data/test/fixtures/inline_attachment.txt +2095 -0
  82. data/test/fixtures/iso_8859_1_email_without_encoding_and_message_id.txt +16 -0
  83. data/test/fixtures/mailbox +414 -0
  84. data/test/fixtures/mailbox.zip +0 -0
  85. data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
  86. data/test/fixtures/mailbox_without_from +11 -0
  87. data/test/fixtures/mailbox_without_return_path +12 -0
  88. data/test/fixtures/marked_as_iso_8859_1_but_it_is_utf_8.txt +33 -0
  89. data/test/fixtures/marked_as_utf_8_but_it_is_iso_8859_1.txt +56 -0
  90. data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
  91. data/test/fixtures/raw_base64_decoded_string +0 -0
  92. data/test/fixtures/raw_base64_email +83 -0
  93. data/test/fixtures/raw_base64_encoded_string +1 -0
  94. data/test/fixtures/raw_email +14 -0
  95. data/test/fixtures/raw_email10 +20 -0
  96. data/test/fixtures/raw_email11 +34 -0
  97. data/test/fixtures/raw_email12 +32 -0
  98. data/test/fixtures/raw_email13 +29 -0
  99. data/test/fixtures/raw_email2 +114 -0
  100. data/test/fixtures/raw_email3 +70 -0
  101. data/test/fixtures/raw_email4 +59 -0
  102. data/test/fixtures/raw_email5 +19 -0
  103. data/test/fixtures/raw_email6 +20 -0
  104. data/test/fixtures/raw_email7 +66 -0
  105. data/test/fixtures/raw_email8 +47 -0
  106. data/test/fixtures/raw_email9 +28 -0
  107. data/test/fixtures/raw_email_bad_time +62 -0
  108. data/test/fixtures/raw_email_double_at_in_header +14 -0
  109. data/test/fixtures/raw_email_multiple_from +30 -0
  110. data/test/fixtures/raw_email_only_attachment +17 -0
  111. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  112. data/test/fixtures/raw_email_reply +32 -0
  113. data/test/fixtures/raw_email_simple +11 -0
  114. data/test/fixtures/raw_email_string_in_date_field +17 -0
  115. data/test/fixtures/raw_email_trailing_dot +21 -0
  116. data/test/fixtures/raw_email_with_bad_date +48 -0
  117. data/test/fixtures/raw_email_with_illegal_boundary +58 -0
  118. data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
  119. data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
  120. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  121. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  122. data/test/fixtures/raw_email_with_quoted_attachment_filename +60 -0
  123. data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
  124. data/test/fixtures/raw_email_with_wrong_splitted_multibyte_encoded_word_subject +15 -0
  125. data/test/fixtures/the_only_part_is_a_word_document.txt +425 -0
  126. data/test/fixtures/unquoted_filename_in_attachment +177 -0
  127. data/test/kcode.rb +14 -0
  128. data/test/temp_test_one.rb +46 -0
  129. data/test/test_address.rb +1216 -0
  130. data/test/test_attachments.rb +133 -0
  131. data/test/test_base64.rb +64 -0
  132. data/test/test_encode.rb +139 -0
  133. data/test/test_header.rb +1021 -0
  134. data/test/test_helper.rb +9 -0
  135. data/test/test_mail.rb +756 -0
  136. data/test/test_mbox.rb +184 -0
  137. data/test/test_port.rb +440 -0
  138. data/test/test_quote.rb +107 -0
  139. data/test/test_scanner.rb +209 -0
  140. data/test/test_utils.rb +36 -0
  141. data/tmail_es.gemspec +35 -0
  142. metadata +257 -0
@@ -0,0 +1,244 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Mark Pilgrim - port to Python
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2.1 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25
+ # 02110-1301 USA
26
+ ######################### END LICENSE BLOCK #########################
27
+
28
+ module CharDet
29
+ HZ_cls = [
30
+ 1,0,0,0,0,0,0,0, # 00 - 07
31
+ 0,0,0,0,0,0,0,0, # 08 - 0f
32
+ 0,0,0,0,0,0,0,0, # 10 - 17
33
+ 0,0,0,1,0,0,0,0, # 18 - 1f
34
+ 0,0,0,0,0,0,0,0, # 20 - 27
35
+ 0,0,0,0,0,0,0,0, # 28 - 2f
36
+ 0,0,0,0,0,0,0,0, # 30 - 37
37
+ 0,0,0,0,0,0,0,0, # 38 - 3f
38
+ 0,0,0,0,0,0,0,0, # 40 - 47
39
+ 0,0,0,0,0,0,0,0, # 48 - 4f
40
+ 0,0,0,0,0,0,0,0, # 50 - 57
41
+ 0,0,0,0,0,0,0,0, # 58 - 5f
42
+ 0,0,0,0,0,0,0,0, # 60 - 67
43
+ 0,0,0,0,0,0,0,0, # 68 - 6f
44
+ 0,0,0,0,0,0,0,0, # 70 - 77
45
+ 0,0,0,4,0,5,2,0, # 78 - 7f
46
+ 1,1,1,1,1,1,1,1, # 80 - 87
47
+ 1,1,1,1,1,1,1,1, # 88 - 8f
48
+ 1,1,1,1,1,1,1,1, # 90 - 97
49
+ 1,1,1,1,1,1,1,1, # 98 - 9f
50
+ 1,1,1,1,1,1,1,1, # a0 - a7
51
+ 1,1,1,1,1,1,1,1, # a8 - af
52
+ 1,1,1,1,1,1,1,1, # b0 - b7
53
+ 1,1,1,1,1,1,1,1, # b8 - bf
54
+ 1,1,1,1,1,1,1,1, # c0 - c7
55
+ 1,1,1,1,1,1,1,1, # c8 - cf
56
+ 1,1,1,1,1,1,1,1, # d0 - d7
57
+ 1,1,1,1,1,1,1,1, # d8 - df
58
+ 1,1,1,1,1,1,1,1, # e0 - e7
59
+ 1,1,1,1,1,1,1,1, # e8 - ef
60
+ 1,1,1,1,1,1,1,1, # f0 - f7
61
+ 1,1,1,1,1,1,1,1, # f8 - ff
62
+ ]
63
+
64
+ HZ_st = [
65
+ EStart,EError, 3,EStart,EStart,EStart,EError,EError,# 00-07
66
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,# 08-0f
67
+ EItsMe,EItsMe,EError,EError,EStart,EStart, 4,EError,# 10-17
68
+ 5,EError, 6,EError, 5, 5, 4,EError,# 18-1f
69
+ 4,EError, 4, 4, 4,EError, 4,EError,# 20-27
70
+ 4,EItsMe,EStart,EStart,EStart,EStart,EStart,EStart,# 28-2f
71
+ ]
72
+
73
+ HZCharLenTable = [0, 0, 0, 0, 0, 0]
74
+
75
+ HZSMModel = {'classTable' => HZ_cls,
76
+ 'classFactor' => 6,
77
+ 'stateTable' => HZ_st,
78
+ 'charLenTable' => HZCharLenTable,
79
+ 'name' => "HZ-GB-2312"
80
+ }
81
+
82
+ ISO2022CN_cls = [
83
+ 2,0,0,0,0,0,0,0, # 00 - 07
84
+ 0,0,0,0,0,0,0,0, # 08 - 0f
85
+ 0,0,0,0,0,0,0,0, # 10 - 17
86
+ 0,0,0,1,0,0,0,0, # 18 - 1f
87
+ 0,0,0,0,0,0,0,0, # 20 - 27
88
+ 0,3,0,0,0,0,0,0, # 28 - 2f
89
+ 0,0,0,0,0,0,0,0, # 30 - 37
90
+ 0,0,0,0,0,0,0,0, # 38 - 3f
91
+ 0,0,0,4,0,0,0,0, # 40 - 47
92
+ 0,0,0,0,0,0,0,0, # 48 - 4f
93
+ 0,0,0,0,0,0,0,0, # 50 - 57
94
+ 0,0,0,0,0,0,0,0, # 58 - 5f
95
+ 0,0,0,0,0,0,0,0, # 60 - 67
96
+ 0,0,0,0,0,0,0,0, # 68 - 6f
97
+ 0,0,0,0,0,0,0,0, # 70 - 77
98
+ 0,0,0,0,0,0,0,0, # 78 - 7f
99
+ 2,2,2,2,2,2,2,2, # 80 - 87
100
+ 2,2,2,2,2,2,2,2, # 88 - 8f
101
+ 2,2,2,2,2,2,2,2, # 90 - 97
102
+ 2,2,2,2,2,2,2,2, # 98 - 9f
103
+ 2,2,2,2,2,2,2,2, # a0 - a7
104
+ 2,2,2,2,2,2,2,2, # a8 - af
105
+ 2,2,2,2,2,2,2,2, # b0 - b7
106
+ 2,2,2,2,2,2,2,2, # b8 - bf
107
+ 2,2,2,2,2,2,2,2, # c0 - c7
108
+ 2,2,2,2,2,2,2,2, # c8 - cf
109
+ 2,2,2,2,2,2,2,2, # d0 - d7
110
+ 2,2,2,2,2,2,2,2, # d8 - df
111
+ 2,2,2,2,2,2,2,2, # e0 - e7
112
+ 2,2,2,2,2,2,2,2, # e8 - ef
113
+ 2,2,2,2,2,2,2,2, # f0 - f7
114
+ 2,2,2,2,2,2,2,2, # f8 - ff
115
+ ]
116
+
117
+ ISO2022CN_st = [
118
+ EStart, 3,EError,EStart,EStart,EStart,EStart,EStart,# 00-07
119
+ EStart,EError,EError,EError,EError,EError,EError,EError,# 08-0f
120
+ EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,# 10-17
121
+ EItsMe,EItsMe,EItsMe,EError,EError,EError, 4,EError,# 18-1f
122
+ EError,EError,EError,EItsMe,EError,EError,EError,EError,# 20-27
123
+ 5, 6,EError,EError,EError,EError,EError,EError,# 28-2f
124
+ EError,EError,EError,EItsMe,EError,EError,EError,EError,# 30-37
125
+ EError,EError,EError,EError,EError,EItsMe,EError,EStart,# 38-3f
126
+ ]
127
+
128
+ ISO2022CNCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0]
129
+
130
+ ISO2022CNSMModel = {'classTable' => ISO2022CN_cls,
131
+ 'classFactor' => 9,
132
+ 'stateTable' => ISO2022CN_st,
133
+ 'charLenTable' => ISO2022CNCharLenTable,
134
+ 'name' => "ISO-2022-CN"
135
+ }
136
+
137
+ ISO2022JP_cls = [
138
+ 2,0,0,0,0,0,0,0, # 00 - 07
139
+ 0,0,0,0,0,0,2,2, # 08 - 0f
140
+ 0,0,0,0,0,0,0,0, # 10 - 17
141
+ 0,0,0,1,0,0,0,0, # 18 - 1f
142
+ 0,0,0,0,7,0,0,0, # 20 - 27
143
+ 3,0,0,0,0,0,0,0, # 28 - 2f
144
+ 0,0,0,0,0,0,0,0, # 30 - 37
145
+ 0,0,0,0,0,0,0,0, # 38 - 3f
146
+ 6,0,4,0,8,0,0,0, # 40 - 47
147
+ 0,9,5,0,0,0,0,0, # 48 - 4f
148
+ 0,0,0,0,0,0,0,0, # 50 - 57
149
+ 0,0,0,0,0,0,0,0, # 58 - 5f
150
+ 0,0,0,0,0,0,0,0, # 60 - 67
151
+ 0,0,0,0,0,0,0,0, # 68 - 6f
152
+ 0,0,0,0,0,0,0,0, # 70 - 77
153
+ 0,0,0,0,0,0,0,0, # 78 - 7f
154
+ 2,2,2,2,2,2,2,2, # 80 - 87
155
+ 2,2,2,2,2,2,2,2, # 88 - 8f
156
+ 2,2,2,2,2,2,2,2, # 90 - 97
157
+ 2,2,2,2,2,2,2,2, # 98 - 9f
158
+ 2,2,2,2,2,2,2,2, # a0 - a7
159
+ 2,2,2,2,2,2,2,2, # a8 - af
160
+ 2,2,2,2,2,2,2,2, # b0 - b7
161
+ 2,2,2,2,2,2,2,2, # b8 - bf
162
+ 2,2,2,2,2,2,2,2, # c0 - c7
163
+ 2,2,2,2,2,2,2,2, # c8 - cf
164
+ 2,2,2,2,2,2,2,2, # d0 - d7
165
+ 2,2,2,2,2,2,2,2, # d8 - df
166
+ 2,2,2,2,2,2,2,2, # e0 - e7
167
+ 2,2,2,2,2,2,2,2, # e8 - ef
168
+ 2,2,2,2,2,2,2,2, # f0 - f7
169
+ 2,2,2,2,2,2,2,2, # f8 - ff
170
+ ]
171
+
172
+ ISO2022JP_st = [
173
+ EStart, 3,EError,EStart,EStart,EStart,EStart,EStart,# 00-07
174
+ EStart,EStart,EError,EError,EError,EError,EError,EError,# 08-0f
175
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,# 10-17
176
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,# 18-1f
177
+ EError, 5,EError,EError,EError, 4,EError,EError,# 20-27
178
+ EError,EError,EError, 6,EItsMe,EError,EItsMe,EError,# 28-2f
179
+ EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,# 30-37
180
+ EError,EError,EError,EItsMe,EError,EError,EError,EError,# 38-3f
181
+ EError,EError,EError,EError,EItsMe,EError,EStart,EStart,# 40-47
182
+ ]
183
+
184
+ ISO2022JPCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
185
+
186
+ ISO2022JPSMModel = {'classTable' => ISO2022JP_cls,
187
+ 'classFactor' => 10,
188
+ 'stateTable' => ISO2022JP_st,
189
+ 'charLenTable' => ISO2022JPCharLenTable,
190
+ 'name' => "ISO-2022-JP"
191
+ }
192
+
193
+ ISO2022KR_cls = [
194
+ 2,0,0,0,0,0,0,0, # 00 - 07
195
+ 0,0,0,0,0,0,0,0, # 08 - 0f
196
+ 0,0,0,0,0,0,0,0, # 10 - 17
197
+ 0,0,0,1,0,0,0,0, # 18 - 1f
198
+ 0,0,0,0,3,0,0,0, # 20 - 27
199
+ 0,4,0,0,0,0,0,0, # 28 - 2f
200
+ 0,0,0,0,0,0,0,0, # 30 - 37
201
+ 0,0,0,0,0,0,0,0, # 38 - 3f
202
+ 0,0,0,5,0,0,0,0, # 40 - 47
203
+ 0,0,0,0,0,0,0,0, # 48 - 4f
204
+ 0,0,0,0,0,0,0,0, # 50 - 57
205
+ 0,0,0,0,0,0,0,0, # 58 - 5f
206
+ 0,0,0,0,0,0,0,0, # 60 - 67
207
+ 0,0,0,0,0,0,0,0, # 68 - 6f
208
+ 0,0,0,0,0,0,0,0, # 70 - 77
209
+ 0,0,0,0,0,0,0,0, # 78 - 7f
210
+ 2,2,2,2,2,2,2,2, # 80 - 87
211
+ 2,2,2,2,2,2,2,2, # 88 - 8f
212
+ 2,2,2,2,2,2,2,2, # 90 - 97
213
+ 2,2,2,2,2,2,2,2, # 98 - 9f
214
+ 2,2,2,2,2,2,2,2, # a0 - a7
215
+ 2,2,2,2,2,2,2,2, # a8 - af
216
+ 2,2,2,2,2,2,2,2, # b0 - b7
217
+ 2,2,2,2,2,2,2,2, # b8 - bf
218
+ 2,2,2,2,2,2,2,2, # c0 - c7
219
+ 2,2,2,2,2,2,2,2, # c8 - cf
220
+ 2,2,2,2,2,2,2,2, # d0 - d7
221
+ 2,2,2,2,2,2,2,2, # d8 - df
222
+ 2,2,2,2,2,2,2,2, # e0 - e7
223
+ 2,2,2,2,2,2,2,2, # e8 - ef
224
+ 2,2,2,2,2,2,2,2, # f0 - f7
225
+ 2,2,2,2,2,2,2,2, # f8 - ff
226
+ ]
227
+
228
+ ISO2022KR_st = [
229
+ EStart, 3,EError,EStart,EStart,EStart,EError,EError,# 00-07
230
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,# 08-0f
231
+ EItsMe,EItsMe,EError,EError,EError, 4,EError,EError,# 10-17
232
+ EError,EError,EError,EError, 5,EError,EError,EError,# 18-1f
233
+ EError,EError,EError,EItsMe,EStart,EStart,EStart,EStart,# 20-27
234
+ ]
235
+
236
+ ISO2022KRCharLenTable = [0, 0, 0, 0, 0, 0]
237
+
238
+ ISO2022KRSMModel = {'classTable' => ISO2022KR_cls,
239
+ 'classFactor' => 6,
240
+ 'stateTable' => ISO2022KR_st,
241
+ 'charLenTable' => ISO2022KRCharLenTable,
242
+ 'name' => "ISO-2022-KR"
243
+ }
244
+ end
@@ -0,0 +1,88 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ class EUCJPProber < MultiByteCharSetProber
31
+ def initialize
32
+ super()
33
+ @_mCodingSM = CodingStateMachine.new(EUCJPSMModel)
34
+ @_mDistributionAnalyzer = EUCJPDistributionAnalysis.new()
35
+ @_mContextAnalyzer = EUCJPContextAnalysis.new()
36
+ reset
37
+ end
38
+
39
+ def reset
40
+ super()
41
+ @_mContextAnalyzer.reset()
42
+ end
43
+
44
+ def get_charset_name
45
+ return "EUC-JP"
46
+ end
47
+
48
+ def feed(aBuf)
49
+ aLen = aBuf.length
50
+ for i in (0...aLen)
51
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
52
+ if codingState == EError
53
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ charLen = @_mCodingSM.get_current_charlen()
61
+ if i == 0
62
+ @_mLastChar[1] = aBuf[0..0]
63
+ @_mContextAnalyzer.feed(@_mLastChar, charLen)
64
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
+ else
66
+ @_mContextAnalyzer.feed(aBuf[i-1...i+1], charLen)
67
+ @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
68
+ end
69
+ end
70
+ end
71
+
72
+ @_mLastChar[0] = aBuf[aLen-1..aLen-1]
73
+
74
+ if get_state() == EDetecting
75
+ if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
+ @_mState = EFoundIt
77
+ end
78
+ end
79
+
80
+ return get_state()
81
+ end
82
+
83
+ def get_confidence
84
+ l = [@_mContextAnalyzer.get_confidence,@_mDistributionAnalyzer.get_confidence]
85
+ return l.max
86
+ end
87
+ end
88
+ end