tmail_es 1.2.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES +83 -0
  3. data/LICENSE +21 -0
  4. data/NOTES +7 -0
  5. data/README +182 -0
  6. data/Rakefile +2 -0
  7. data/ext/Makefile +20 -0
  8. data/ext/tmailscanner/tmail/MANIFEST +4 -0
  9. data/ext/tmailscanner/tmail/depend +1 -0
  10. data/ext/tmailscanner/tmail/extconf.rb +33 -0
  11. data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
  12. data/lib/tmail/Makefile +18 -0
  13. data/lib/tmail/address.rb +392 -0
  14. data/lib/tmail/attachments.rb +65 -0
  15. data/lib/tmail/base64.rb +46 -0
  16. data/lib/tmail/compat.rb +41 -0
  17. data/lib/tmail/config.rb +67 -0
  18. data/lib/tmail/core_extensions.rb +63 -0
  19. data/lib/tmail/encode.rb +590 -0
  20. data/lib/tmail/header.rb +962 -0
  21. data/lib/tmail/index.rb +9 -0
  22. data/lib/tmail/interface.rb +1162 -0
  23. data/lib/tmail/loader.rb +3 -0
  24. data/lib/tmail/mail.rb +578 -0
  25. data/lib/tmail/mailbox.rb +496 -0
  26. data/lib/tmail/main.rb +6 -0
  27. data/lib/tmail/mbox.rb +3 -0
  28. data/lib/tmail/net.rb +250 -0
  29. data/lib/tmail/obsolete.rb +132 -0
  30. data/lib/tmail/parser.rb +1060 -0
  31. data/lib/tmail/parser.y +416 -0
  32. data/lib/tmail/port.rb +379 -0
  33. data/lib/tmail/quoting.rb +164 -0
  34. data/lib/tmail/require_arch.rb +58 -0
  35. data/lib/tmail/scanner.rb +49 -0
  36. data/lib/tmail/scanner_r.rb +261 -0
  37. data/lib/tmail/stringio.rb +280 -0
  38. data/lib/tmail/utils.rb +361 -0
  39. data/lib/tmail/vendor/rchardet-1.3/COPYING +504 -0
  40. data/lib/tmail/vendor/rchardet-1.3/README +12 -0
  41. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  42. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  43. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  44. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  45. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  46. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  47. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  48. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  49. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  50. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  51. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  52. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  53. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  54. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  55. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  56. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  57. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  58. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  59. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  60. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  61. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  62. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  63. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  64. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  65. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  66. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  67. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  68. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  69. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  70. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  71. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  72. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  73. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +167 -0
  74. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  75. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  76. data/lib/tmail/version.rb +40 -0
  77. data/lib/tmail.rb +6 -0
  78. data/setup.rb +1482 -0
  79. data/test/extctrl.rb +6 -0
  80. data/test/fixtures/apple_unquoted_content_type +44 -0
  81. data/test/fixtures/inline_attachment.txt +2095 -0
  82. data/test/fixtures/iso_8859_1_email_without_encoding_and_message_id.txt +16 -0
  83. data/test/fixtures/mailbox +414 -0
  84. data/test/fixtures/mailbox.zip +0 -0
  85. data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
  86. data/test/fixtures/mailbox_without_from +11 -0
  87. data/test/fixtures/mailbox_without_return_path +12 -0
  88. data/test/fixtures/marked_as_iso_8859_1_but_it_is_utf_8.txt +33 -0
  89. data/test/fixtures/marked_as_utf_8_but_it_is_iso_8859_1.txt +56 -0
  90. data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
  91. data/test/fixtures/raw_base64_decoded_string +0 -0
  92. data/test/fixtures/raw_base64_email +83 -0
  93. data/test/fixtures/raw_base64_encoded_string +1 -0
  94. data/test/fixtures/raw_email +14 -0
  95. data/test/fixtures/raw_email10 +20 -0
  96. data/test/fixtures/raw_email11 +34 -0
  97. data/test/fixtures/raw_email12 +32 -0
  98. data/test/fixtures/raw_email13 +29 -0
  99. data/test/fixtures/raw_email2 +114 -0
  100. data/test/fixtures/raw_email3 +70 -0
  101. data/test/fixtures/raw_email4 +59 -0
  102. data/test/fixtures/raw_email5 +19 -0
  103. data/test/fixtures/raw_email6 +20 -0
  104. data/test/fixtures/raw_email7 +66 -0
  105. data/test/fixtures/raw_email8 +47 -0
  106. data/test/fixtures/raw_email9 +28 -0
  107. data/test/fixtures/raw_email_bad_time +62 -0
  108. data/test/fixtures/raw_email_double_at_in_header +14 -0
  109. data/test/fixtures/raw_email_multiple_from +30 -0
  110. data/test/fixtures/raw_email_only_attachment +17 -0
  111. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  112. data/test/fixtures/raw_email_reply +32 -0
  113. data/test/fixtures/raw_email_simple +11 -0
  114. data/test/fixtures/raw_email_string_in_date_field +17 -0
  115. data/test/fixtures/raw_email_trailing_dot +21 -0
  116. data/test/fixtures/raw_email_with_bad_date +48 -0
  117. data/test/fixtures/raw_email_with_illegal_boundary +58 -0
  118. data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
  119. data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
  120. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  121. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  122. data/test/fixtures/raw_email_with_quoted_attachment_filename +60 -0
  123. data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
  124. data/test/fixtures/raw_email_with_wrong_splitted_multibyte_encoded_word_subject +15 -0
  125. data/test/fixtures/the_only_part_is_a_word_document.txt +425 -0
  126. data/test/fixtures/unquoted_filename_in_attachment +177 -0
  127. data/test/kcode.rb +14 -0
  128. data/test/temp_test_one.rb +46 -0
  129. data/test/test_address.rb +1216 -0
  130. data/test/test_attachments.rb +133 -0
  131. data/test/test_base64.rb +64 -0
  132. data/test/test_encode.rb +139 -0
  133. data/test/test_header.rb +1021 -0
  134. data/test/test_helper.rb +9 -0
  135. data/test/test_mail.rb +756 -0
  136. data/test/test_mbox.rb +184 -0
  137. data/test/test_port.rb +440 -0
  138. data/test/test_quote.rb +107 -0
  139. data/test/test_scanner.rb +209 -0
  140. data/test/test_utils.rb +36 -0
  141. data/tmail_es.gemspec +35 -0
  142. metadata +257 -0
@@ -0,0 +1,244 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Mark Pilgrim - port to Python
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2.1 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25
+ # 02110-1301 USA
26
+ ######################### END LICENSE BLOCK #########################
27
+
28
+ module CharDet
29
+ HZ_cls = [
30
+ 1,0,0,0,0,0,0,0, # 00 - 07
31
+ 0,0,0,0,0,0,0,0, # 08 - 0f
32
+ 0,0,0,0,0,0,0,0, # 10 - 17
33
+ 0,0,0,1,0,0,0,0, # 18 - 1f
34
+ 0,0,0,0,0,0,0,0, # 20 - 27
35
+ 0,0,0,0,0,0,0,0, # 28 - 2f
36
+ 0,0,0,0,0,0,0,0, # 30 - 37
37
+ 0,0,0,0,0,0,0,0, # 38 - 3f
38
+ 0,0,0,0,0,0,0,0, # 40 - 47
39
+ 0,0,0,0,0,0,0,0, # 48 - 4f
40
+ 0,0,0,0,0,0,0,0, # 50 - 57
41
+ 0,0,0,0,0,0,0,0, # 58 - 5f
42
+ 0,0,0,0,0,0,0,0, # 60 - 67
43
+ 0,0,0,0,0,0,0,0, # 68 - 6f
44
+ 0,0,0,0,0,0,0,0, # 70 - 77
45
+ 0,0,0,4,0,5,2,0, # 78 - 7f
46
+ 1,1,1,1,1,1,1,1, # 80 - 87
47
+ 1,1,1,1,1,1,1,1, # 88 - 8f
48
+ 1,1,1,1,1,1,1,1, # 90 - 97
49
+ 1,1,1,1,1,1,1,1, # 98 - 9f
50
+ 1,1,1,1,1,1,1,1, # a0 - a7
51
+ 1,1,1,1,1,1,1,1, # a8 - af
52
+ 1,1,1,1,1,1,1,1, # b0 - b7
53
+ 1,1,1,1,1,1,1,1, # b8 - bf
54
+ 1,1,1,1,1,1,1,1, # c0 - c7
55
+ 1,1,1,1,1,1,1,1, # c8 - cf
56
+ 1,1,1,1,1,1,1,1, # d0 - d7
57
+ 1,1,1,1,1,1,1,1, # d8 - df
58
+ 1,1,1,1,1,1,1,1, # e0 - e7
59
+ 1,1,1,1,1,1,1,1, # e8 - ef
60
+ 1,1,1,1,1,1,1,1, # f0 - f7
61
+ 1,1,1,1,1,1,1,1, # f8 - ff
62
+ ]
63
+
64
+ HZ_st = [
65
+ EStart,EError, 3,EStart,EStart,EStart,EError,EError,# 00-07
66
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,# 08-0f
67
+ EItsMe,EItsMe,EError,EError,EStart,EStart, 4,EError,# 10-17
68
+ 5,EError, 6,EError, 5, 5, 4,EError,# 18-1f
69
+ 4,EError, 4, 4, 4,EError, 4,EError,# 20-27
70
+ 4,EItsMe,EStart,EStart,EStart,EStart,EStart,EStart,# 28-2f
71
+ ]
72
+
73
+ HZCharLenTable = [0, 0, 0, 0, 0, 0]
74
+
75
+ HZSMModel = {'classTable' => HZ_cls,
76
+ 'classFactor' => 6,
77
+ 'stateTable' => HZ_st,
78
+ 'charLenTable' => HZCharLenTable,
79
+ 'name' => "HZ-GB-2312"
80
+ }
81
+
82
+ ISO2022CN_cls = [
83
+ 2,0,0,0,0,0,0,0, # 00 - 07
84
+ 0,0,0,0,0,0,0,0, # 08 - 0f
85
+ 0,0,0,0,0,0,0,0, # 10 - 17
86
+ 0,0,0,1,0,0,0,0, # 18 - 1f
87
+ 0,0,0,0,0,0,0,0, # 20 - 27
88
+ 0,3,0,0,0,0,0,0, # 28 - 2f
89
+ 0,0,0,0,0,0,0,0, # 30 - 37
90
+ 0,0,0,0,0,0,0,0, # 38 - 3f
91
+ 0,0,0,4,0,0,0,0, # 40 - 47
92
+ 0,0,0,0,0,0,0,0, # 48 - 4f
93
+ 0,0,0,0,0,0,0,0, # 50 - 57
94
+ 0,0,0,0,0,0,0,0, # 58 - 5f
95
+ 0,0,0,0,0,0,0,0, # 60 - 67
96
+ 0,0,0,0,0,0,0,0, # 68 - 6f
97
+ 0,0,0,0,0,0,0,0, # 70 - 77
98
+ 0,0,0,0,0,0,0,0, # 78 - 7f
99
+ 2,2,2,2,2,2,2,2, # 80 - 87
100
+ 2,2,2,2,2,2,2,2, # 88 - 8f
101
+ 2,2,2,2,2,2,2,2, # 90 - 97
102
+ 2,2,2,2,2,2,2,2, # 98 - 9f
103
+ 2,2,2,2,2,2,2,2, # a0 - a7
104
+ 2,2,2,2,2,2,2,2, # a8 - af
105
+ 2,2,2,2,2,2,2,2, # b0 - b7
106
+ 2,2,2,2,2,2,2,2, # b8 - bf
107
+ 2,2,2,2,2,2,2,2, # c0 - c7
108
+ 2,2,2,2,2,2,2,2, # c8 - cf
109
+ 2,2,2,2,2,2,2,2, # d0 - d7
110
+ 2,2,2,2,2,2,2,2, # d8 - df
111
+ 2,2,2,2,2,2,2,2, # e0 - e7
112
+ 2,2,2,2,2,2,2,2, # e8 - ef
113
+ 2,2,2,2,2,2,2,2, # f0 - f7
114
+ 2,2,2,2,2,2,2,2, # f8 - ff
115
+ ]
116
+
117
+ ISO2022CN_st = [
118
+ EStart, 3,EError,EStart,EStart,EStart,EStart,EStart,# 00-07
119
+ EStart,EError,EError,EError,EError,EError,EError,EError,# 08-0f
120
+ EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,# 10-17
121
+ EItsMe,EItsMe,EItsMe,EError,EError,EError, 4,EError,# 18-1f
122
+ EError,EError,EError,EItsMe,EError,EError,EError,EError,# 20-27
123
+ 5, 6,EError,EError,EError,EError,EError,EError,# 28-2f
124
+ EError,EError,EError,EItsMe,EError,EError,EError,EError,# 30-37
125
+ EError,EError,EError,EError,EError,EItsMe,EError,EStart,# 38-3f
126
+ ]
127
+
128
+ ISO2022CNCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0]
129
+
130
+ ISO2022CNSMModel = {'classTable' => ISO2022CN_cls,
131
+ 'classFactor' => 9,
132
+ 'stateTable' => ISO2022CN_st,
133
+ 'charLenTable' => ISO2022CNCharLenTable,
134
+ 'name' => "ISO-2022-CN"
135
+ }
136
+
137
+ ISO2022JP_cls = [
138
+ 2,0,0,0,0,0,0,0, # 00 - 07
139
+ 0,0,0,0,0,0,2,2, # 08 - 0f
140
+ 0,0,0,0,0,0,0,0, # 10 - 17
141
+ 0,0,0,1,0,0,0,0, # 18 - 1f
142
+ 0,0,0,0,7,0,0,0, # 20 - 27
143
+ 3,0,0,0,0,0,0,0, # 28 - 2f
144
+ 0,0,0,0,0,0,0,0, # 30 - 37
145
+ 0,0,0,0,0,0,0,0, # 38 - 3f
146
+ 6,0,4,0,8,0,0,0, # 40 - 47
147
+ 0,9,5,0,0,0,0,0, # 48 - 4f
148
+ 0,0,0,0,0,0,0,0, # 50 - 57
149
+ 0,0,0,0,0,0,0,0, # 58 - 5f
150
+ 0,0,0,0,0,0,0,0, # 60 - 67
151
+ 0,0,0,0,0,0,0,0, # 68 - 6f
152
+ 0,0,0,0,0,0,0,0, # 70 - 77
153
+ 0,0,0,0,0,0,0,0, # 78 - 7f
154
+ 2,2,2,2,2,2,2,2, # 80 - 87
155
+ 2,2,2,2,2,2,2,2, # 88 - 8f
156
+ 2,2,2,2,2,2,2,2, # 90 - 97
157
+ 2,2,2,2,2,2,2,2, # 98 - 9f
158
+ 2,2,2,2,2,2,2,2, # a0 - a7
159
+ 2,2,2,2,2,2,2,2, # a8 - af
160
+ 2,2,2,2,2,2,2,2, # b0 - b7
161
+ 2,2,2,2,2,2,2,2, # b8 - bf
162
+ 2,2,2,2,2,2,2,2, # c0 - c7
163
+ 2,2,2,2,2,2,2,2, # c8 - cf
164
+ 2,2,2,2,2,2,2,2, # d0 - d7
165
+ 2,2,2,2,2,2,2,2, # d8 - df
166
+ 2,2,2,2,2,2,2,2, # e0 - e7
167
+ 2,2,2,2,2,2,2,2, # e8 - ef
168
+ 2,2,2,2,2,2,2,2, # f0 - f7
169
+ 2,2,2,2,2,2,2,2, # f8 - ff
170
+ ]
171
+
172
+ ISO2022JP_st = [
173
+ EStart, 3,EError,EStart,EStart,EStart,EStart,EStart,# 00-07
174
+ EStart,EStart,EError,EError,EError,EError,EError,EError,# 08-0f
175
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,# 10-17
176
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,# 18-1f
177
+ EError, 5,EError,EError,EError, 4,EError,EError,# 20-27
178
+ EError,EError,EError, 6,EItsMe,EError,EItsMe,EError,# 28-2f
179
+ EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,# 30-37
180
+ EError,EError,EError,EItsMe,EError,EError,EError,EError,# 38-3f
181
+ EError,EError,EError,EError,EItsMe,EError,EStart,EStart,# 40-47
182
+ ]
183
+
184
+ ISO2022JPCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
185
+
186
+ ISO2022JPSMModel = {'classTable' => ISO2022JP_cls,
187
+ 'classFactor' => 10,
188
+ 'stateTable' => ISO2022JP_st,
189
+ 'charLenTable' => ISO2022JPCharLenTable,
190
+ 'name' => "ISO-2022-JP"
191
+ }
192
+
193
+ ISO2022KR_cls = [
194
+ 2,0,0,0,0,0,0,0, # 00 - 07
195
+ 0,0,0,0,0,0,0,0, # 08 - 0f
196
+ 0,0,0,0,0,0,0,0, # 10 - 17
197
+ 0,0,0,1,0,0,0,0, # 18 - 1f
198
+ 0,0,0,0,3,0,0,0, # 20 - 27
199
+ 0,4,0,0,0,0,0,0, # 28 - 2f
200
+ 0,0,0,0,0,0,0,0, # 30 - 37
201
+ 0,0,0,0,0,0,0,0, # 38 - 3f
202
+ 0,0,0,5,0,0,0,0, # 40 - 47
203
+ 0,0,0,0,0,0,0,0, # 48 - 4f
204
+ 0,0,0,0,0,0,0,0, # 50 - 57
205
+ 0,0,0,0,0,0,0,0, # 58 - 5f
206
+ 0,0,0,0,0,0,0,0, # 60 - 67
207
+ 0,0,0,0,0,0,0,0, # 68 - 6f
208
+ 0,0,0,0,0,0,0,0, # 70 - 77
209
+ 0,0,0,0,0,0,0,0, # 78 - 7f
210
+ 2,2,2,2,2,2,2,2, # 80 - 87
211
+ 2,2,2,2,2,2,2,2, # 88 - 8f
212
+ 2,2,2,2,2,2,2,2, # 90 - 97
213
+ 2,2,2,2,2,2,2,2, # 98 - 9f
214
+ 2,2,2,2,2,2,2,2, # a0 - a7
215
+ 2,2,2,2,2,2,2,2, # a8 - af
216
+ 2,2,2,2,2,2,2,2, # b0 - b7
217
+ 2,2,2,2,2,2,2,2, # b8 - bf
218
+ 2,2,2,2,2,2,2,2, # c0 - c7
219
+ 2,2,2,2,2,2,2,2, # c8 - cf
220
+ 2,2,2,2,2,2,2,2, # d0 - d7
221
+ 2,2,2,2,2,2,2,2, # d8 - df
222
+ 2,2,2,2,2,2,2,2, # e0 - e7
223
+ 2,2,2,2,2,2,2,2, # e8 - ef
224
+ 2,2,2,2,2,2,2,2, # f0 - f7
225
+ 2,2,2,2,2,2,2,2, # f8 - ff
226
+ ]
227
+
228
+ ISO2022KR_st = [
229
+ EStart, 3,EError,EStart,EStart,EStart,EError,EError,# 00-07
230
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,# 08-0f
231
+ EItsMe,EItsMe,EError,EError,EError, 4,EError,EError,# 10-17
232
+ EError,EError,EError,EError, 5,EError,EError,EError,# 18-1f
233
+ EError,EError,EError,EItsMe,EStart,EStart,EStart,EStart,# 20-27
234
+ ]
235
+
236
+ ISO2022KRCharLenTable = [0, 0, 0, 0, 0, 0]
237
+
238
+ ISO2022KRSMModel = {'classTable' => ISO2022KR_cls,
239
+ 'classFactor' => 6,
240
+ 'stateTable' => ISO2022KR_st,
241
+ 'charLenTable' => ISO2022KRCharLenTable,
242
+ 'name' => "ISO-2022-KR"
243
+ }
244
+ end
@@ -0,0 +1,88 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ class EUCJPProber < MultiByteCharSetProber
31
+ def initialize
32
+ super()
33
+ @_mCodingSM = CodingStateMachine.new(EUCJPSMModel)
34
+ @_mDistributionAnalyzer = EUCJPDistributionAnalysis.new()
35
+ @_mContextAnalyzer = EUCJPContextAnalysis.new()
36
+ reset
37
+ end
38
+
39
+ def reset
40
+ super()
41
+ @_mContextAnalyzer.reset()
42
+ end
43
+
44
+ def get_charset_name
45
+ return "EUC-JP"
46
+ end
47
+
48
+ def feed(aBuf)
49
+ aLen = aBuf.length
50
+ for i in (0...aLen)
51
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
52
+ if codingState == EError
53
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ charLen = @_mCodingSM.get_current_charlen()
61
+ if i == 0
62
+ @_mLastChar[1] = aBuf[0..0]
63
+ @_mContextAnalyzer.feed(@_mLastChar, charLen)
64
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
+ else
66
+ @_mContextAnalyzer.feed(aBuf[i-1...i+1], charLen)
67
+ @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
68
+ end
69
+ end
70
+ end
71
+
72
+ @_mLastChar[0] = aBuf[aLen-1..aLen-1]
73
+
74
+ if get_state() == EDetecting
75
+ if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
+ @_mState = EFoundIt
77
+ end
78
+ end
79
+
80
+ return get_state()
81
+ end
82
+
83
+ def get_confidence
84
+ l = [@_mContextAnalyzer.get_confidence,@_mDistributionAnalyzer.get_confidence]
85
+ return l.max
86
+ end
87
+ end
88
+ end