tmail_es 1.2.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES +83 -0
  3. data/LICENSE +21 -0
  4. data/NOTES +7 -0
  5. data/README +182 -0
  6. data/Rakefile +2 -0
  7. data/ext/Makefile +20 -0
  8. data/ext/tmailscanner/tmail/MANIFEST +4 -0
  9. data/ext/tmailscanner/tmail/depend +1 -0
  10. data/ext/tmailscanner/tmail/extconf.rb +33 -0
  11. data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
  12. data/lib/tmail/Makefile +18 -0
  13. data/lib/tmail/address.rb +392 -0
  14. data/lib/tmail/attachments.rb +65 -0
  15. data/lib/tmail/base64.rb +46 -0
  16. data/lib/tmail/compat.rb +41 -0
  17. data/lib/tmail/config.rb +67 -0
  18. data/lib/tmail/core_extensions.rb +63 -0
  19. data/lib/tmail/encode.rb +590 -0
  20. data/lib/tmail/header.rb +962 -0
  21. data/lib/tmail/index.rb +9 -0
  22. data/lib/tmail/interface.rb +1162 -0
  23. data/lib/tmail/loader.rb +3 -0
  24. data/lib/tmail/mail.rb +578 -0
  25. data/lib/tmail/mailbox.rb +496 -0
  26. data/lib/tmail/main.rb +6 -0
  27. data/lib/tmail/mbox.rb +3 -0
  28. data/lib/tmail/net.rb +250 -0
  29. data/lib/tmail/obsolete.rb +132 -0
  30. data/lib/tmail/parser.rb +1060 -0
  31. data/lib/tmail/parser.y +416 -0
  32. data/lib/tmail/port.rb +379 -0
  33. data/lib/tmail/quoting.rb +164 -0
  34. data/lib/tmail/require_arch.rb +58 -0
  35. data/lib/tmail/scanner.rb +49 -0
  36. data/lib/tmail/scanner_r.rb +261 -0
  37. data/lib/tmail/stringio.rb +280 -0
  38. data/lib/tmail/utils.rb +361 -0
  39. data/lib/tmail/vendor/rchardet-1.3/COPYING +504 -0
  40. data/lib/tmail/vendor/rchardet-1.3/README +12 -0
  41. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  42. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  43. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  44. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  45. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  46. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  47. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  48. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  49. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  50. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  51. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  52. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  53. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  54. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  55. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  56. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  57. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  58. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  59. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  60. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  61. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  62. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  63. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  64. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  65. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  66. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  67. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  68. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  69. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  70. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  71. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  72. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  73. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +167 -0
  74. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  75. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  76. data/lib/tmail/version.rb +40 -0
  77. data/lib/tmail.rb +6 -0
  78. data/setup.rb +1482 -0
  79. data/test/extctrl.rb +6 -0
  80. data/test/fixtures/apple_unquoted_content_type +44 -0
  81. data/test/fixtures/inline_attachment.txt +2095 -0
  82. data/test/fixtures/iso_8859_1_email_without_encoding_and_message_id.txt +16 -0
  83. data/test/fixtures/mailbox +414 -0
  84. data/test/fixtures/mailbox.zip +0 -0
  85. data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
  86. data/test/fixtures/mailbox_without_from +11 -0
  87. data/test/fixtures/mailbox_without_return_path +12 -0
  88. data/test/fixtures/marked_as_iso_8859_1_but_it_is_utf_8.txt +33 -0
  89. data/test/fixtures/marked_as_utf_8_but_it_is_iso_8859_1.txt +56 -0
  90. data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
  91. data/test/fixtures/raw_base64_decoded_string +0 -0
  92. data/test/fixtures/raw_base64_email +83 -0
  93. data/test/fixtures/raw_base64_encoded_string +1 -0
  94. data/test/fixtures/raw_email +14 -0
  95. data/test/fixtures/raw_email10 +20 -0
  96. data/test/fixtures/raw_email11 +34 -0
  97. data/test/fixtures/raw_email12 +32 -0
  98. data/test/fixtures/raw_email13 +29 -0
  99. data/test/fixtures/raw_email2 +114 -0
  100. data/test/fixtures/raw_email3 +70 -0
  101. data/test/fixtures/raw_email4 +59 -0
  102. data/test/fixtures/raw_email5 +19 -0
  103. data/test/fixtures/raw_email6 +20 -0
  104. data/test/fixtures/raw_email7 +66 -0
  105. data/test/fixtures/raw_email8 +47 -0
  106. data/test/fixtures/raw_email9 +28 -0
  107. data/test/fixtures/raw_email_bad_time +62 -0
  108. data/test/fixtures/raw_email_double_at_in_header +14 -0
  109. data/test/fixtures/raw_email_multiple_from +30 -0
  110. data/test/fixtures/raw_email_only_attachment +17 -0
  111. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  112. data/test/fixtures/raw_email_reply +32 -0
  113. data/test/fixtures/raw_email_simple +11 -0
  114. data/test/fixtures/raw_email_string_in_date_field +17 -0
  115. data/test/fixtures/raw_email_trailing_dot +21 -0
  116. data/test/fixtures/raw_email_with_bad_date +48 -0
  117. data/test/fixtures/raw_email_with_illegal_boundary +58 -0
  118. data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
  119. data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
  120. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  121. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  122. data/test/fixtures/raw_email_with_quoted_attachment_filename +60 -0
  123. data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
  124. data/test/fixtures/raw_email_with_wrong_splitted_multibyte_encoded_word_subject +15 -0
  125. data/test/fixtures/the_only_part_is_a_word_document.txt +425 -0
  126. data/test/fixtures/unquoted_filename_in_attachment +177 -0
  127. data/test/kcode.rb +14 -0
  128. data/test/temp_test_one.rb +46 -0
  129. data/test/test_address.rb +1216 -0
  130. data/test/test_attachments.rb +133 -0
  131. data/test/test_base64.rb +64 -0
  132. data/test/test_encode.rb +139 -0
  133. data/test/test_header.rb +1021 -0
  134. data/test/test_helper.rb +9 -0
  135. data/test/test_mail.rb +756 -0
  136. data/test/test_mbox.rb +184 -0
  137. data/test/test_port.rb +440 -0
  138. data/test/test_quote.rb +107 -0
  139. data/test/test_scanner.rb +209 -0
  140. data/test/test_utils.rb +36 -0
  141. data/tmail_es.gemspec +35 -0
  142. metadata +257 -0
@@ -0,0 +1,542 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ # BIG5
31
+
32
+ BIG5_cls = [
33
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
34
+ 1,1,1,1,1,1,0,0, # 08 - 0f
35
+ 1,1,1,1,1,1,1,1, # 10 - 17
36
+ 1,1,1,0,1,1,1,1, # 18 - 1f
37
+ 1,1,1,1,1,1,1,1, # 20 - 27
38
+ 1,1,1,1,1,1,1,1, # 28 - 2f
39
+ 1,1,1,1,1,1,1,1, # 30 - 37
40
+ 1,1,1,1,1,1,1,1, # 38 - 3f
41
+ 2,2,2,2,2,2,2,2, # 40 - 47
42
+ 2,2,2,2,2,2,2,2, # 48 - 4f
43
+ 2,2,2,2,2,2,2,2, # 50 - 57
44
+ 2,2,2,2,2,2,2,2, # 58 - 5f
45
+ 2,2,2,2,2,2,2,2, # 60 - 67
46
+ 2,2,2,2,2,2,2,2, # 68 - 6f
47
+ 2,2,2,2,2,2,2,2, # 70 - 77
48
+ 2,2,2,2,2,2,2,1, # 78 - 7f
49
+ 4,4,4,4,4,4,4,4, # 80 - 87
50
+ 4,4,4,4,4,4,4,4, # 88 - 8f
51
+ 4,4,4,4,4,4,4,4, # 90 - 97
52
+ 4,4,4,4,4,4,4,4, # 98 - 9f
53
+ 4,3,3,3,3,3,3,3, # a0 - a7
54
+ 3,3,3,3,3,3,3,3, # a8 - af
55
+ 3,3,3,3,3,3,3,3, # b0 - b7
56
+ 3,3,3,3,3,3,3,3, # b8 - bf
57
+ 3,3,3,3,3,3,3,3, # c0 - c7
58
+ 3,3,3,3,3,3,3,3, # c8 - cf
59
+ 3,3,3,3,3,3,3,3, # d0 - d7
60
+ 3,3,3,3,3,3,3,3, # d8 - df
61
+ 3,3,3,3,3,3,3,3, # e0 - e7
62
+ 3,3,3,3,3,3,3,3, # e8 - ef
63
+ 3,3,3,3,3,3,3,3, # f0 - f7
64
+ 3,3,3,3,3,3,3,0 # f8 - ff
65
+ ]
66
+
67
+ BIG5_st = [
68
+ EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
69
+ EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,#08-0f
70
+ EError,EStart,EStart,EStart,EStart,EStart,EStart,EStart #10-17
71
+ ]
72
+
73
+ Big5CharLenTable = [0, 1, 1, 2, 0]
74
+
75
+ Big5SMModel = {'classTable' => BIG5_cls,
76
+ 'classFactor' => 5,
77
+ 'stateTable' => BIG5_st,
78
+ 'charLenTable' => Big5CharLenTable,
79
+ 'name' => 'Big5'
80
+ }
81
+
82
+ # EUC-JP
83
+
84
+ EUCJP_cls = [
85
+ 4,4,4,4,4,4,4,4, # 00 - 07
86
+ 4,4,4,4,4,4,5,5, # 08 - 0f
87
+ 4,4,4,4,4,4,4,4, # 10 - 17
88
+ 4,4,4,5,4,4,4,4, # 18 - 1f
89
+ 4,4,4,4,4,4,4,4, # 20 - 27
90
+ 4,4,4,4,4,4,4,4, # 28 - 2f
91
+ 4,4,4,4,4,4,4,4, # 30 - 37
92
+ 4,4,4,4,4,4,4,4, # 38 - 3f
93
+ 4,4,4,4,4,4,4,4, # 40 - 47
94
+ 4,4,4,4,4,4,4,4, # 48 - 4f
95
+ 4,4,4,4,4,4,4,4, # 50 - 57
96
+ 4,4,4,4,4,4,4,4, # 58 - 5f
97
+ 4,4,4,4,4,4,4,4, # 60 - 67
98
+ 4,4,4,4,4,4,4,4, # 68 - 6f
99
+ 4,4,4,4,4,4,4,4, # 70 - 77
100
+ 4,4,4,4,4,4,4,4, # 78 - 7f
101
+ 5,5,5,5,5,5,5,5, # 80 - 87
102
+ 5,5,5,5,5,5,1,3, # 88 - 8f
103
+ 5,5,5,5,5,5,5,5, # 90 - 97
104
+ 5,5,5,5,5,5,5,5, # 98 - 9f
105
+ 5,2,2,2,2,2,2,2, # a0 - a7
106
+ 2,2,2,2,2,2,2,2, # a8 - af
107
+ 2,2,2,2,2,2,2,2, # b0 - b7
108
+ 2,2,2,2,2,2,2,2, # b8 - bf
109
+ 2,2,2,2,2,2,2,2, # c0 - c7
110
+ 2,2,2,2,2,2,2,2, # c8 - cf
111
+ 2,2,2,2,2,2,2,2, # d0 - d7
112
+ 2,2,2,2,2,2,2,2, # d8 - df
113
+ 0,0,0,0,0,0,0,0, # e0 - e7
114
+ 0,0,0,0,0,0,0,0, # e8 - ef
115
+ 0,0,0,0,0,0,0,0, # f0 - f7
116
+ 0,0,0,0,0,0,0,5 # f8 - ff
117
+ ]
118
+
119
+ EUCJP_st = [
120
+ 3, 4, 3, 5,EStart,EError,EError,EError,#00-07
121
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
122
+ EItsMe,EItsMe,EStart,EError,EStart,EError,EError,EError,#10-17
123
+ EError,EError,EStart,EError,EError,EError, 3,EError,#18-1f
124
+ 3,EError,EError,EError,EStart,EStart,EStart,EStart #20-27
125
+ ]
126
+
127
+ EUCJPCharLenTable = [2, 2, 2, 3, 1, 0]
128
+
129
+ EUCJPSMModel = {'classTable' => EUCJP_cls,
130
+ 'classFactor' => 6,
131
+ 'stateTable' => EUCJP_st,
132
+ 'charLenTable' => EUCJPCharLenTable,
133
+ 'name' => 'EUC-JP'
134
+ }
135
+
136
+ # EUC-KR
137
+
138
+ EUCKR_cls = [
139
+ 1,1,1,1,1,1,1,1, # 00 - 07
140
+ 1,1,1,1,1,1,0,0, # 08 - 0f
141
+ 1,1,1,1,1,1,1,1, # 10 - 17
142
+ 1,1,1,0,1,1,1,1, # 18 - 1f
143
+ 1,1,1,1,1,1,1,1, # 20 - 27
144
+ 1,1,1,1,1,1,1,1, # 28 - 2f
145
+ 1,1,1,1,1,1,1,1, # 30 - 37
146
+ 1,1,1,1,1,1,1,1, # 38 - 3f
147
+ 1,1,1,1,1,1,1,1, # 40 - 47
148
+ 1,1,1,1,1,1,1,1, # 48 - 4f
149
+ 1,1,1,1,1,1,1,1, # 50 - 57
150
+ 1,1,1,1,1,1,1,1, # 58 - 5f
151
+ 1,1,1,1,1,1,1,1, # 60 - 67
152
+ 1,1,1,1,1,1,1,1, # 68 - 6f
153
+ 1,1,1,1,1,1,1,1, # 70 - 77
154
+ 1,1,1,1,1,1,1,1, # 78 - 7f
155
+ 0,0,0,0,0,0,0,0, # 80 - 87
156
+ 0,0,0,0,0,0,0,0, # 88 - 8f
157
+ 0,0,0,0,0,0,0,0, # 90 - 97
158
+ 0,0,0,0,0,0,0,0, # 98 - 9f
159
+ 0,2,2,2,2,2,2,2, # a0 - a7
160
+ 2,2,2,2,2,3,3,3, # a8 - af
161
+ 2,2,2,2,2,2,2,2, # b0 - b7
162
+ 2,2,2,2,2,2,2,2, # b8 - bf
163
+ 2,2,2,2,2,2,2,2, # c0 - c7
164
+ 2,3,2,2,2,2,2,2, # c8 - cf
165
+ 2,2,2,2,2,2,2,2, # d0 - d7
166
+ 2,2,2,2,2,2,2,2, # d8 - df
167
+ 2,2,2,2,2,2,2,2, # e0 - e7
168
+ 2,2,2,2,2,2,2,2, # e8 - ef
169
+ 2,2,2,2,2,2,2,2, # f0 - f7
170
+ 2,2,2,2,2,2,2,0 # f8 - ff
171
+ ]
172
+
173
+ EUCKR_st = [
174
+ EError,EStart, 3,EError,EError,EError,EError,EError,#00-07
175
+ EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,EStart#08-0f
176
+ ]
177
+
178
+ EUCKRCharLenTable = [0, 1, 2, 0]
179
+
180
+ EUCKRSMModel = {'classTable' => EUCKR_cls,
181
+ 'classFactor' => 4,
182
+ 'stateTable' => EUCKR_st,
183
+ 'charLenTable' => EUCKRCharLenTable,
184
+ 'name' => 'EUC-KR'
185
+ }
186
+
187
+ # EUC-TW
188
+
189
+ EUCTW_cls = [
190
+ 2,2,2,2,2,2,2,2, # 00 - 07
191
+ 2,2,2,2,2,2,0,0, # 08 - 0f
192
+ 2,2,2,2,2,2,2,2, # 10 - 17
193
+ 2,2,2,0,2,2,2,2, # 18 - 1f
194
+ 2,2,2,2,2,2,2,2, # 20 - 27
195
+ 2,2,2,2,2,2,2,2, # 28 - 2f
196
+ 2,2,2,2,2,2,2,2, # 30 - 37
197
+ 2,2,2,2,2,2,2,2, # 38 - 3f
198
+ 2,2,2,2,2,2,2,2, # 40 - 47
199
+ 2,2,2,2,2,2,2,2, # 48 - 4f
200
+ 2,2,2,2,2,2,2,2, # 50 - 57
201
+ 2,2,2,2,2,2,2,2, # 58 - 5f
202
+ 2,2,2,2,2,2,2,2, # 60 - 67
203
+ 2,2,2,2,2,2,2,2, # 68 - 6f
204
+ 2,2,2,2,2,2,2,2, # 70 - 77
205
+ 2,2,2,2,2,2,2,2, # 78 - 7f
206
+ 0,0,0,0,0,0,0,0, # 80 - 87
207
+ 0,0,0,0,0,0,6,0, # 88 - 8f
208
+ 0,0,0,0,0,0,0,0, # 90 - 97
209
+ 0,0,0,0,0,0,0,0, # 98 - 9f
210
+ 0,3,4,4,4,4,4,4, # a0 - a7
211
+ 5,5,1,1,1,1,1,1, # a8 - af
212
+ 1,1,1,1,1,1,1,1, # b0 - b7
213
+ 1,1,1,1,1,1,1,1, # b8 - bf
214
+ 1,1,3,1,3,3,3,3, # c0 - c7
215
+ 3,3,3,3,3,3,3,3, # c8 - cf
216
+ 3,3,3,3,3,3,3,3, # d0 - d7
217
+ 3,3,3,3,3,3,3,3, # d8 - df
218
+ 3,3,3,3,3,3,3,3, # e0 - e7
219
+ 3,3,3,3,3,3,3,3, # e8 - ef
220
+ 3,3,3,3,3,3,3,3, # f0 - f7
221
+ 3,3,3,3,3,3,3,0 # f8 - ff
222
+ ]
223
+
224
+ EUCTW_st = [
225
+ EError,EError,EStart, 3, 3, 3, 4,EError,#00-07
226
+ EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
227
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EStart,EError,#10-17
228
+ EStart,EStart,EStart,EError,EError,EError,EError,EError,#18-1f
229
+ 5,EError,EError,EError,EStart,EError,EStart,EStart,#20-27
230
+ EStart,EError,EStart,EStart,EStart,EStart,EStart,EStart #28-2f
231
+ ]
232
+
233
+ EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3]
234
+
235
+ EUCTWSMModel = {'classTable' => EUCTW_cls,
236
+ 'classFactor' => 7,
237
+ 'stateTable' => EUCTW_st,
238
+ 'charLenTable' => EUCTWCharLenTable,
239
+ 'name' => 'x-euc-tw'
240
+ }
241
+
242
+ # GB2312
243
+
244
+ GB2312_cls = [
245
+ 1,1,1,1,1,1,1,1, # 00 - 07
246
+ 1,1,1,1,1,1,0,0, # 08 - 0f
247
+ 1,1,1,1,1,1,1,1, # 10 - 17
248
+ 1,1,1,0,1,1,1,1, # 18 - 1f
249
+ 1,1,1,1,1,1,1,1, # 20 - 27
250
+ 1,1,1,1,1,1,1,1, # 28 - 2f
251
+ 3,3,3,3,3,3,3,3, # 30 - 37
252
+ 3,3,1,1,1,1,1,1, # 38 - 3f
253
+ 2,2,2,2,2,2,2,2, # 40 - 47
254
+ 2,2,2,2,2,2,2,2, # 48 - 4f
255
+ 2,2,2,2,2,2,2,2, # 50 - 57
256
+ 2,2,2,2,2,2,2,2, # 58 - 5f
257
+ 2,2,2,2,2,2,2,2, # 60 - 67
258
+ 2,2,2,2,2,2,2,2, # 68 - 6f
259
+ 2,2,2,2,2,2,2,2, # 70 - 77
260
+ 2,2,2,2,2,2,2,4, # 78 - 7f
261
+ 5,6,6,6,6,6,6,6, # 80 - 87
262
+ 6,6,6,6,6,6,6,6, # 88 - 8f
263
+ 6,6,6,6,6,6,6,6, # 90 - 97
264
+ 6,6,6,6,6,6,6,6, # 98 - 9f
265
+ 6,6,6,6,6,6,6,6, # a0 - a7
266
+ 6,6,6,6,6,6,6,6, # a8 - af
267
+ 6,6,6,6,6,6,6,6, # b0 - b7
268
+ 6,6,6,6,6,6,6,6, # b8 - bf
269
+ 6,6,6,6,6,6,6,6, # c0 - c7
270
+ 6,6,6,6,6,6,6,6, # c8 - cf
271
+ 6,6,6,6,6,6,6,6, # d0 - d7
272
+ 6,6,6,6,6,6,6,6, # d8 - df
273
+ 6,6,6,6,6,6,6,6, # e0 - e7
274
+ 6,6,6,6,6,6,6,6, # e8 - ef
275
+ 6,6,6,6,6,6,6,6, # f0 - f7
276
+ 6,6,6,6,6,6,6,0 # f8 - ff
277
+ ]
278
+
279
+ GB2312_st = [
280
+ EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
281
+ EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
282
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,#10-17
283
+ 4,EError,EStart,EStart,EError,EError,EError,EError,#18-1f
284
+ EError,EError, 5,EError,EError,EError,EItsMe,EError,#20-27
285
+ EError,EError,EStart,EStart,EStart,EStart,EStart,EStart#28-2f
286
+ ]
287
+
288
+ # To be accurate, the length of class 6 can be either 2 or 4.
289
+ # But it is not necessary to discriminate between the two since
290
+ # it is used for frequency analysis only, and we are validing
291
+ # each code range there as well. So it is safe to set it to be
292
+ # 2 here.
293
+ GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2]
294
+
295
+ GB2312SMModel = {'classTable' => GB2312_cls,
296
+ 'classFactor' => 7,
297
+ 'stateTable' => GB2312_st,
298
+ 'charLenTable' => GB2312CharLenTable,
299
+ 'name' => 'GB2312'
300
+ }
301
+
302
+ # Shift_JIS
303
+
304
+ SJIS_cls = [
305
+ 1,1,1,1,1,1,1,1, # 00 - 07
306
+ 1,1,1,1,1,1,0,0, # 08 - 0f
307
+ 1,1,1,1,1,1,1,1, # 10 - 17
308
+ 1,1,1,0,1,1,1,1, # 18 - 1f
309
+ 1,1,1,1,1,1,1,1, # 20 - 27
310
+ 1,1,1,1,1,1,1,1, # 28 - 2f
311
+ 1,1,1,1,1,1,1,1, # 30 - 37
312
+ 1,1,1,1,1,1,1,1, # 38 - 3f
313
+ 2,2,2,2,2,2,2,2, # 40 - 47
314
+ 2,2,2,2,2,2,2,2, # 48 - 4f
315
+ 2,2,2,2,2,2,2,2, # 50 - 57
316
+ 2,2,2,2,2,2,2,2, # 58 - 5f
317
+ 2,2,2,2,2,2,2,2, # 60 - 67
318
+ 2,2,2,2,2,2,2,2, # 68 - 6f
319
+ 2,2,2,2,2,2,2,2, # 70 - 77
320
+ 2,2,2,2,2,2,2,1, # 78 - 7f
321
+ 3,3,3,3,3,3,3,3, # 80 - 87
322
+ 3,3,3,3,3,3,3,3, # 88 - 8f
323
+ 3,3,3,3,3,3,3,3, # 90 - 97
324
+ 3,3,3,3,3,3,3,3, # 98 - 9f
325
+ #0xa0 is illegal in sjis encoding, but some pages does
326
+ #contain such byte. We need to be more error forgiven.
327
+ 2,2,2,2,2,2,2,2, # a0 - a7
328
+ 2,2,2,2,2,2,2,2, # a8 - af
329
+ 2,2,2,2,2,2,2,2, # b0 - b7
330
+ 2,2,2,2,2,2,2,2, # b8 - bf
331
+ 2,2,2,2,2,2,2,2, # c0 - c7
332
+ 2,2,2,2,2,2,2,2, # c8 - cf
333
+ 2,2,2,2,2,2,2,2, # d0 - d7
334
+ 2,2,2,2,2,2,2,2, # d8 - df
335
+ 3,3,3,3,3,3,3,3, # e0 - e7
336
+ 3,3,3,3,3,4,4,4, # e8 - ef
337
+ 4,4,4,4,4,4,4,4, # f0 - f7
338
+ 4,4,4,4,4,0,0,0 # f8 - ff
339
+ ]
340
+
341
+ SJIS_st = [
342
+ EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
343
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
344
+ EItsMe,EItsMe,EError,EError,EStart,EStart,EStart,EStart#10-17
345
+ ]
346
+
347
+ SJISCharLenTable = [0, 1, 1, 2, 0, 0]
348
+
349
+ SJISSMModel = {'classTable' => SJIS_cls,
350
+ 'classFactor' => 6,
351
+ 'stateTable' => SJIS_st,
352
+ 'charLenTable' => SJISCharLenTable,
353
+ 'name' => 'Shift_JIS'
354
+ }
355
+
356
+ # UCS2-BE
357
+
358
+ UCS2BE_cls = [
359
+ 0,0,0,0,0,0,0,0, # 00 - 07
360
+ 0,0,1,0,0,2,0,0, # 08 - 0f
361
+ 0,0,0,0,0,0,0,0, # 10 - 17
362
+ 0,0,0,3,0,0,0,0, # 18 - 1f
363
+ 0,0,0,0,0,0,0,0, # 20 - 27
364
+ 0,3,3,3,3,3,0,0, # 28 - 2f
365
+ 0,0,0,0,0,0,0,0, # 30 - 37
366
+ 0,0,0,0,0,0,0,0, # 38 - 3f
367
+ 0,0,0,0,0,0,0,0, # 40 - 47
368
+ 0,0,0,0,0,0,0,0, # 48 - 4f
369
+ 0,0,0,0,0,0,0,0, # 50 - 57
370
+ 0,0,0,0,0,0,0,0, # 58 - 5f
371
+ 0,0,0,0,0,0,0,0, # 60 - 67
372
+ 0,0,0,0,0,0,0,0, # 68 - 6f
373
+ 0,0,0,0,0,0,0,0, # 70 - 77
374
+ 0,0,0,0,0,0,0,0, # 78 - 7f
375
+ 0,0,0,0,0,0,0,0, # 80 - 87
376
+ 0,0,0,0,0,0,0,0, # 88 - 8f
377
+ 0,0,0,0,0,0,0,0, # 90 - 97
378
+ 0,0,0,0,0,0,0,0, # 98 - 9f
379
+ 0,0,0,0,0,0,0,0, # a0 - a7
380
+ 0,0,0,0,0,0,0,0, # a8 - af
381
+ 0,0,0,0,0,0,0,0, # b0 - b7
382
+ 0,0,0,0,0,0,0,0, # b8 - bf
383
+ 0,0,0,0,0,0,0,0, # c0 - c7
384
+ 0,0,0,0,0,0,0,0, # c8 - cf
385
+ 0,0,0,0,0,0,0,0, # d0 - d7
386
+ 0,0,0,0,0,0,0,0, # d8 - df
387
+ 0,0,0,0,0,0,0,0, # e0 - e7
388
+ 0,0,0,0,0,0,0,0, # e8 - ef
389
+ 0,0,0,0,0,0,0,0, # f0 - f7
390
+ 0,0,0,0,0,0,4,5 # f8 - ff
391
+ ]
392
+
393
+ UCS2BE_st = [
394
+ 5, 7, 7,EError, 4, 3,EError,EError,#00-07
395
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
396
+ EItsMe,EItsMe, 6, 6, 6, 6,EError,EError,#10-17
397
+ 6, 6, 6, 6, 6,EItsMe, 6, 6,#18-1f
398
+ 6, 6, 6, 6, 5, 7, 7,EError,#20-27
399
+ 5, 8, 6, 6,EError, 6, 6, 6,#28-2f
400
+ 6, 6, 6, 6,EError,EError,EStart,EStart#30-37
401
+ ]
402
+
403
+ UCS2BECharLenTable = [2, 2, 2, 0, 2, 2]
404
+
405
+ UCS2BESMModel = {'classTable' => UCS2BE_cls,
406
+ 'classFactor' => 6,
407
+ 'stateTable' => UCS2BE_st,
408
+ 'charLenTable' => UCS2BECharLenTable,
409
+ 'name' => 'UTF-16BE'
410
+ }
411
+
412
+ # UCS2-LE
413
+
414
+ UCS2LE_cls = [
415
+ 0,0,0,0,0,0,0,0, # 00 - 07
416
+ 0,0,1,0,0,2,0,0, # 08 - 0f
417
+ 0,0,0,0,0,0,0,0, # 10 - 17
418
+ 0,0,0,3,0,0,0,0, # 18 - 1f
419
+ 0,0,0,0,0,0,0,0, # 20 - 27
420
+ 0,3,3,3,3,3,0,0, # 28 - 2f
421
+ 0,0,0,0,0,0,0,0, # 30 - 37
422
+ 0,0,0,0,0,0,0,0, # 38 - 3f
423
+ 0,0,0,0,0,0,0,0, # 40 - 47
424
+ 0,0,0,0,0,0,0,0, # 48 - 4f
425
+ 0,0,0,0,0,0,0,0, # 50 - 57
426
+ 0,0,0,0,0,0,0,0, # 58 - 5f
427
+ 0,0,0,0,0,0,0,0, # 60 - 67
428
+ 0,0,0,0,0,0,0,0, # 68 - 6f
429
+ 0,0,0,0,0,0,0,0, # 70 - 77
430
+ 0,0,0,0,0,0,0,0, # 78 - 7f
431
+ 0,0,0,0,0,0,0,0, # 80 - 87
432
+ 0,0,0,0,0,0,0,0, # 88 - 8f
433
+ 0,0,0,0,0,0,0,0, # 90 - 97
434
+ 0,0,0,0,0,0,0,0, # 98 - 9f
435
+ 0,0,0,0,0,0,0,0, # a0 - a7
436
+ 0,0,0,0,0,0,0,0, # a8 - af
437
+ 0,0,0,0,0,0,0,0, # b0 - b7
438
+ 0,0,0,0,0,0,0,0, # b8 - bf
439
+ 0,0,0,0,0,0,0,0, # c0 - c7
440
+ 0,0,0,0,0,0,0,0, # c8 - cf
441
+ 0,0,0,0,0,0,0,0, # d0 - d7
442
+ 0,0,0,0,0,0,0,0, # d8 - df
443
+ 0,0,0,0,0,0,0,0, # e0 - e7
444
+ 0,0,0,0,0,0,0,0, # e8 - ef
445
+ 0,0,0,0,0,0,0,0, # f0 - f7
446
+ 0,0,0,0,0,0,4,5 # f8 - ff
447
+ ]
448
+
449
+ UCS2LE_st = [
450
+ 6, 6, 7, 6, 4, 3,EError,EError,#00-07
451
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
452
+ EItsMe,EItsMe, 5, 5, 5,EError,EItsMe,EError,#10-17
453
+ 5, 5, 5,EError, 5,EError, 6, 6,#18-1f
454
+ 7, 6, 8, 8, 5, 5, 5,EError,#20-27
455
+ 5, 5, 5,EError,EError,EError, 5, 5,#28-2f
456
+ 5, 5, 5,EError, 5,EError,EStart,EStart#30-37
457
+ ]
458
+
459
+ UCS2LECharLenTable = [2, 2, 2, 2, 2, 2]
460
+
461
+ UCS2LESMModel = {'classTable' => UCS2LE_cls,
462
+ 'classFactor' => 6,
463
+ 'stateTable' => UCS2LE_st,
464
+ 'charLenTable' => UCS2LECharLenTable,
465
+ 'name' => 'UTF-16LE'
466
+ }
467
+
468
+ # UTF-8
469
+
470
+ UTF8_cls = [
471
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
472
+ 1,1,1,1,1,1,0,0, # 08 - 0f
473
+ 1,1,1,1,1,1,1,1, # 10 - 17
474
+ 1,1,1,0,1,1,1,1, # 18 - 1f
475
+ 1,1,1,1,1,1,1,1, # 20 - 27
476
+ 1,1,1,1,1,1,1,1, # 28 - 2f
477
+ 1,1,1,1,1,1,1,1, # 30 - 37
478
+ 1,1,1,1,1,1,1,1, # 38 - 3f
479
+ 1,1,1,1,1,1,1,1, # 40 - 47
480
+ 1,1,1,1,1,1,1,1, # 48 - 4f
481
+ 1,1,1,1,1,1,1,1, # 50 - 57
482
+ 1,1,1,1,1,1,1,1, # 58 - 5f
483
+ 1,1,1,1,1,1,1,1, # 60 - 67
484
+ 1,1,1,1,1,1,1,1, # 68 - 6f
485
+ 1,1,1,1,1,1,1,1, # 70 - 77
486
+ 1,1,1,1,1,1,1,1, # 78 - 7f
487
+ 2,2,2,2,3,3,3,3, # 80 - 87
488
+ 4,4,4,4,4,4,4,4, # 88 - 8f
489
+ 4,4,4,4,4,4,4,4, # 90 - 97
490
+ 4,4,4,4,4,4,4,4, # 98 - 9f
491
+ 5,5,5,5,5,5,5,5, # a0 - a7
492
+ 5,5,5,5,5,5,5,5, # a8 - af
493
+ 5,5,5,5,5,5,5,5, # b0 - b7
494
+ 5,5,5,5,5,5,5,5, # b8 - bf
495
+ 0,0,6,6,6,6,6,6, # c0 - c7
496
+ 6,6,6,6,6,6,6,6, # c8 - cf
497
+ 6,6,6,6,6,6,6,6, # d0 - d7
498
+ 6,6,6,6,6,6,6,6, # d8 - df
499
+ 7,8,8,8,8,8,8,8, # e0 - e7
500
+ 8,8,8,8,8,9,8,8, # e8 - ef
501
+ 10,11,11,11,11,11,11,11, # f0 - f7
502
+ 12,13,13,13,14,15,0,0 # f8 - ff
503
+ ]
504
+
505
+ UTF8_st = [
506
+ EError,EStart,EError,EError,EError,EError, 12, 10,#00-07
507
+ 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
508
+ EError,EError,EError,EError,EError,EError,EError,EError,#10-17
509
+ EError,EError,EError,EError,EError,EError,EError,EError,#18-1f
510
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#20-27
511
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#28-2f
512
+ EError,EError, 5, 5, 5, 5,EError,EError,#30-37
513
+ EError,EError,EError,EError,EError,EError,EError,EError,#38-3f
514
+ EError,EError,EError, 5, 5, 5,EError,EError,#40-47
515
+ EError,EError,EError,EError,EError,EError,EError,EError,#48-4f
516
+ EError,EError, 7, 7, 7, 7,EError,EError,#50-57
517
+ EError,EError,EError,EError,EError,EError,EError,EError,#58-5f
518
+ EError,EError,EError,EError, 7, 7,EError,EError,#60-67
519
+ EError,EError,EError,EError,EError,EError,EError,EError,#68-6f
520
+ EError,EError, 9, 9, 9, 9,EError,EError,#70-77
521
+ EError,EError,EError,EError,EError,EError,EError,EError,#78-7f
522
+ EError,EError,EError,EError,EError, 9,EError,EError,#80-87
523
+ EError,EError,EError,EError,EError,EError,EError,EError,#88-8f
524
+ EError,EError, 12, 12, 12, 12,EError,EError,#90-97
525
+ EError,EError,EError,EError,EError,EError,EError,EError,#98-9f
526
+ EError,EError,EError,EError,EError, 12,EError,EError,#a0-a7
527
+ EError,EError,EError,EError,EError,EError,EError,EError,#a8-af
528
+ EError,EError, 12, 12, 12,EError,EError,EError,#b0-b7
529
+ EError,EError,EError,EError,EError,EError,EError,EError,#b8-bf
530
+ EError,EError,EStart,EStart,EStart,EStart,EError,EError,#c0-c7
531
+ EError,EError,EError,EError,EError,EError,EError,EError#c8-cf
532
+ ]
533
+
534
+ UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6]
535
+
536
+ UTF8SMModel = {'classTable' => UTF8_cls,
537
+ 'classFactor' => 16,
538
+ 'stateTable' => UTF8_st,
539
+ 'charLenTable' => UTF8CharLenTable,
540
+ 'name' => 'UTF-8'
541
+ }
542
+ end
@@ -0,0 +1,124 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ #
14
+ # This library is free software; you can redistribute it and/or
15
+ # modify it under the terms of the GNU Lesser General Public
16
+ # License as published by the Free Software Foundation; either
17
+ # version 2.1 of the License, or (at your option) any later version.
18
+ #
19
+ # This library is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ # Lesser General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU Lesser General Public
25
+ # License along with this library; if not, write to the Free Software
26
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ # 02110-1301 USA
28
+ ######################### END LICENSE BLOCK #########################
29
+
30
+ module CharDet
31
+ SAMPLE_SIZE = 64
32
+ SB_ENOUGH_REL_THRESHOLD = 1024
33
+ POSITIVE_SHORTCUT_THRESHOLD = 0.95
34
+ NEGATIVE_SHORTCUT_THRESHOLD = 0.05
35
+ SYMBOL_CAT_ORDER = 250
36
+ NUMBER_OF_SEQ_CAT = 4
37
+ POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
38
+ #NEGATIVE_CAT = 0
39
+
40
+ class SingleByteCharSetProber < CharSetProber
41
+ def initialize(model, reversed=false, nameProber=nil)
42
+ super()
43
+ @_mModel = model
44
+ @_mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
45
+ @_mNameProber = nameProber # Optional auxiliary prober for name decision
46
+ reset()
47
+ end
48
+
49
+ def reset
50
+ super()
51
+ @_mLastOrder = 255 # char order of last character
52
+ @_mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
53
+ @_mTotalSeqs = 0
54
+ @_mTotalChar = 0
55
+ @_mFreqChar = 0 # characters that fall in our sampling range
56
+ end
57
+
58
+ def get_charset_name
59
+ if @_mNameProber
60
+ return @_mNameProber.get_charset_name()
61
+ else
62
+ return @_mModel['charsetName']
63
+ end
64
+ end
65
+
66
+ def feed(aBuf)
67
+ if not @_mModel['keepEnglishLetter']
68
+ aBuf = filter_without_english_letters(aBuf)
69
+ end
70
+ aLen = aBuf.length
71
+ if not aLen
72
+ return get_state()
73
+ end
74
+ aBuf.each_byte do |b|
75
+ c = b.chr
76
+ order = @_mModel['charToOrderMap'][c[0]]
77
+ if order < SYMBOL_CAT_ORDER
78
+ @_mTotalChar += 1
79
+ end
80
+ if order < SAMPLE_SIZE
81
+ @_mFreqChar += 1
82
+ if @_mLastOrder < SAMPLE_SIZE
83
+ @_mTotalSeqs += 1
84
+ if not @_mReversed
85
+ @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
86
+ else # reverse the order of the letters in the lookup
87
+ @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
88
+ end
89
+ end
90
+ end
91
+ @_mLastOrder = order
92
+ end
93
+
94
+ if get_state() == EDetecting
95
+ if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
96
+ cf = get_confidence()
97
+ if cf > POSITIVE_SHORTCUT_THRESHOLD
98
+ $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
99
+ @_mState = EFoundIt
100
+ elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
101
+ $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
102
+ @_mState = ENotMe
103
+ end
104
+ end
105
+ end
106
+
107
+ return get_state()
108
+ end
109
+
110
+ def get_confidence
111
+ r = 0.01
112
+ if @_mTotalSeqs > 0
113
+ # print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
114
+ r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
115
+ # print r, self._mFreqChar, self._mTotalChar
116
+ r = r * @_mFreqChar / @_mTotalChar
117
+ if r >= 1.0
118
+ r = 0.99
119
+ end
120
+ end
121
+ return r
122
+ end
123
+ end
124
+ end