tmail_es 1.2.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES +83 -0
  3. data/LICENSE +21 -0
  4. data/NOTES +7 -0
  5. data/README +182 -0
  6. data/Rakefile +2 -0
  7. data/ext/Makefile +20 -0
  8. data/ext/tmailscanner/tmail/MANIFEST +4 -0
  9. data/ext/tmailscanner/tmail/depend +1 -0
  10. data/ext/tmailscanner/tmail/extconf.rb +33 -0
  11. data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
  12. data/lib/tmail/Makefile +18 -0
  13. data/lib/tmail/address.rb +392 -0
  14. data/lib/tmail/attachments.rb +65 -0
  15. data/lib/tmail/base64.rb +46 -0
  16. data/lib/tmail/compat.rb +41 -0
  17. data/lib/tmail/config.rb +67 -0
  18. data/lib/tmail/core_extensions.rb +63 -0
  19. data/lib/tmail/encode.rb +590 -0
  20. data/lib/tmail/header.rb +962 -0
  21. data/lib/tmail/index.rb +9 -0
  22. data/lib/tmail/interface.rb +1162 -0
  23. data/lib/tmail/loader.rb +3 -0
  24. data/lib/tmail/mail.rb +578 -0
  25. data/lib/tmail/mailbox.rb +496 -0
  26. data/lib/tmail/main.rb +6 -0
  27. data/lib/tmail/mbox.rb +3 -0
  28. data/lib/tmail/net.rb +250 -0
  29. data/lib/tmail/obsolete.rb +132 -0
  30. data/lib/tmail/parser.rb +1060 -0
  31. data/lib/tmail/parser.y +416 -0
  32. data/lib/tmail/port.rb +379 -0
  33. data/lib/tmail/quoting.rb +164 -0
  34. data/lib/tmail/require_arch.rb +58 -0
  35. data/lib/tmail/scanner.rb +49 -0
  36. data/lib/tmail/scanner_r.rb +261 -0
  37. data/lib/tmail/stringio.rb +280 -0
  38. data/lib/tmail/utils.rb +361 -0
  39. data/lib/tmail/vendor/rchardet-1.3/COPYING +504 -0
  40. data/lib/tmail/vendor/rchardet-1.3/README +12 -0
  41. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  42. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  43. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  44. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  45. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  46. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  47. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  48. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  49. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  50. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  51. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  52. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  53. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  54. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  55. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  56. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  57. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  58. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  59. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  60. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  61. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  62. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  63. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  64. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  65. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  66. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  67. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  68. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  69. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  70. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  71. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  72. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  73. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +167 -0
  74. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  75. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  76. data/lib/tmail/version.rb +40 -0
  77. data/lib/tmail.rb +6 -0
  78. data/setup.rb +1482 -0
  79. data/test/extctrl.rb +6 -0
  80. data/test/fixtures/apple_unquoted_content_type +44 -0
  81. data/test/fixtures/inline_attachment.txt +2095 -0
  82. data/test/fixtures/iso_8859_1_email_without_encoding_and_message_id.txt +16 -0
  83. data/test/fixtures/mailbox +414 -0
  84. data/test/fixtures/mailbox.zip +0 -0
  85. data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
  86. data/test/fixtures/mailbox_without_from +11 -0
  87. data/test/fixtures/mailbox_without_return_path +12 -0
  88. data/test/fixtures/marked_as_iso_8859_1_but_it_is_utf_8.txt +33 -0
  89. data/test/fixtures/marked_as_utf_8_but_it_is_iso_8859_1.txt +56 -0
  90. data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
  91. data/test/fixtures/raw_base64_decoded_string +0 -0
  92. data/test/fixtures/raw_base64_email +83 -0
  93. data/test/fixtures/raw_base64_encoded_string +1 -0
  94. data/test/fixtures/raw_email +14 -0
  95. data/test/fixtures/raw_email10 +20 -0
  96. data/test/fixtures/raw_email11 +34 -0
  97. data/test/fixtures/raw_email12 +32 -0
  98. data/test/fixtures/raw_email13 +29 -0
  99. data/test/fixtures/raw_email2 +114 -0
  100. data/test/fixtures/raw_email3 +70 -0
  101. data/test/fixtures/raw_email4 +59 -0
  102. data/test/fixtures/raw_email5 +19 -0
  103. data/test/fixtures/raw_email6 +20 -0
  104. data/test/fixtures/raw_email7 +66 -0
  105. data/test/fixtures/raw_email8 +47 -0
  106. data/test/fixtures/raw_email9 +28 -0
  107. data/test/fixtures/raw_email_bad_time +62 -0
  108. data/test/fixtures/raw_email_double_at_in_header +14 -0
  109. data/test/fixtures/raw_email_multiple_from +30 -0
  110. data/test/fixtures/raw_email_only_attachment +17 -0
  111. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  112. data/test/fixtures/raw_email_reply +32 -0
  113. data/test/fixtures/raw_email_simple +11 -0
  114. data/test/fixtures/raw_email_string_in_date_field +17 -0
  115. data/test/fixtures/raw_email_trailing_dot +21 -0
  116. data/test/fixtures/raw_email_with_bad_date +48 -0
  117. data/test/fixtures/raw_email_with_illegal_boundary +58 -0
  118. data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
  119. data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
  120. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  121. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  122. data/test/fixtures/raw_email_with_quoted_attachment_filename +60 -0
  123. data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
  124. data/test/fixtures/raw_email_with_wrong_splitted_multibyte_encoded_word_subject +15 -0
  125. data/test/fixtures/the_only_part_is_a_word_document.txt +425 -0
  126. data/test/fixtures/unquoted_filename_in_attachment +177 -0
  127. data/test/kcode.rb +14 -0
  128. data/test/temp_test_one.rb +46 -0
  129. data/test/test_address.rb +1216 -0
  130. data/test/test_attachments.rb +133 -0
  131. data/test/test_base64.rb +64 -0
  132. data/test/test_encode.rb +139 -0
  133. data/test/test_header.rb +1021 -0
  134. data/test/test_helper.rb +9 -0
  135. data/test/test_mail.rb +756 -0
  136. data/test/test_mbox.rb +184 -0
  137. data/test/test_port.rb +440 -0
  138. data/test/test_quote.rb +107 -0
  139. data/test/test_scanner.rb +209 -0
  140. data/test/test_utils.rb +36 -0
  141. data/tmail_es.gemspec +35 -0
  142. metadata +257 -0
@@ -0,0 +1,56 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ #
14
+ # This library is free software; you can redistribute it and/or
15
+ # modify it under the terms of the GNU Lesser General Public
16
+ # License as published by the Free Software Foundation; either
17
+ # version 2.1 of the License, or (at your option) any later version.
18
+ #
19
+ # This library is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ # Lesser General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU Lesser General Public
25
+ # License along with this library; if not, write to the Free Software
26
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ # 02110-1301 USA
28
+ ######################### END LICENSE BLOCK #########################
29
+
30
+ module CharDet
31
+ class SBCSGroupProber < CharSetGroupProber
32
+ def initialize
33
+ super
34
+ @_mProbers = [ SingleByteCharSetProber.new(Win1251CyrillicModel),
35
+ SingleByteCharSetProber.new(Koi8rModel),
36
+ SingleByteCharSetProber.new(Latin5CyrillicModel),
37
+ SingleByteCharSetProber.new(MacCyrillicModel),
38
+ SingleByteCharSetProber.new(Ibm866Model),
39
+ SingleByteCharSetProber.new(Ibm855Model),
40
+ SingleByteCharSetProber.new(Latin7GreekModel),
41
+ SingleByteCharSetProber.new(Win1253GreekModel),
42
+ SingleByteCharSetProber.new(Latin5BulgarianModel),
43
+ SingleByteCharSetProber.new(Win1251BulgarianModel),
44
+ SingleByteCharSetProber.new(Latin2HungarianModel),
45
+ SingleByteCharSetProber.new(Win1250HungarianModel),
46
+ SingleByteCharSetProber.new(TIS620ThaiModel) ]
47
+ hebrewProber = HebrewProber.new()
48
+ logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
49
+ visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
50
+ hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
51
+ @_mProbers += [hebrewProber, logicalHebrewProber, visualHebrewProber]
52
+
53
+ reset()
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,88 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ class SJISProber < MultiByteCharSetProber
31
+ def initialize
32
+ super()
33
+ @_mCodingSM = CodingStateMachine.new(SJISSMModel)
34
+ @_mDistributionAnalyzer = SJISDistributionAnalysis.new()
35
+ @_mContextAnalyzer = SJISContextAnalysis.new()
36
+ reset()
37
+ end
38
+
39
+ def reset
40
+ super()
41
+ @_mContextAnalyzer.reset()
42
+ end
43
+
44
+ def get_charset_name
45
+ return "SHIFT_JIS"
46
+ end
47
+
48
+ def feed(aBuf)
49
+ aLen = aBuf.length
50
+ for i in (0...aLen)
51
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
52
+ if codingState == EError
53
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ charLen = @_mCodingSM.get_current_charlen()
61
+ if i == 0
62
+ @_mLastChar[1] = aBuf[0..0]
63
+ @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
64
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
+ else
66
+ @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
67
+ @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
68
+ end
69
+ end
70
+ end
71
+
72
+ @_mLastChar[0] = aBuf[aLen - 1.. aLen-1]
73
+
74
+ if get_state() == EDetecting
75
+ if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
+ @_mState = EFoundIt
77
+ end
78
+ end
79
+
80
+ return get_state()
81
+ end
82
+
83
+ def get_confidence
84
+ l = [@_mContextAnalyzer.get_confidence(), @_mDistributionAnalyzer.get_confidence()]
85
+ return l.max
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,167 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ #
14
+ # This library is free software; you can redistribute it and/or
15
+ # modify it under the terms of the GNU Lesser General Public
16
+ # License as published by the Free Software Foundation; either
17
+ # version 2.1 of the License, or (at your option) any later version.
18
+ #
19
+ # This library is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ # Lesser General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU Lesser General Public
25
+ # License along with this library; if not, write to the Free Software
26
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ # 02110-1301 USA
28
+ ######################### END LICENSE BLOCK #########################
29
+
30
+ module CharDet
31
+ MINIMUM_THRESHOLD = 0.20
32
+ EPureAscii = 0
33
+ EEscAscii = 1
34
+ EHighbyte = 2
35
+
36
+ class UniversalDetector
37
+ attr_accessor :result
38
+ def initialize
39
+ @_highBitDetector = /[\x80-\xFF]/
40
+ @_escDetector = /(\033|\~\{)/
41
+ @_mEscCharSetProber = nil
42
+ @_mCharSetProbers = []
43
+ reset()
44
+ end
45
+
46
+ def reset
47
+ @result = {'encoding' => nil, 'confidence' => 0.0}
48
+ @done = false
49
+ @_mStart = true
50
+ @_mGotData = false
51
+ @_mInputState = EPureAscii
52
+ @_mLastChar = ''
53
+ if @_mEscCharSetProber
54
+ @_mEscCharSetProber.reset()
55
+ end
56
+ for prober in @_mCharSetProbers
57
+ prober.reset()
58
+ end
59
+ end
60
+
61
+ def feed(aBuf)
62
+ return if @done
63
+
64
+ aLen = aBuf.length
65
+ return if not aLen
66
+
67
+ if not @_mGotData
68
+ # If the data starts with BOM, we know it is UTF
69
+ if aBuf[0...3] == "\xEF\xBB\xBF"
70
+ # EF BB BF UTF-8 with BOM
71
+ @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
72
+ elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
73
+ # FF FE 00 00 UTF-32, little-endian BOM
74
+ @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
75
+ elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
76
+ # 00 00 FE FF UTF-32, big-endian BOM
77
+ @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
78
+ elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
79
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
80
+ @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
81
+ elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
82
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
83
+ @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
84
+ elsif aBuf[0...2] == "\xFF\xFE"
85
+ # FF FE UTF-16, little endian BOM
86
+ @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
87
+ elsif aBuf[0...2] == "\xFE\xFF"
88
+ # FE FF UTF-16, big endian BOM
89
+ @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
90
+ end
91
+ end
92
+
93
+ @_mGotData = true
94
+ if @result['encoding'] and (@result['confidence'] > 0.0)
95
+ @done = true
96
+ return
97
+ end
98
+
99
+ if @_mInputState == EPureAscii
100
+ if @_highBitDetector =~ (aBuf)
101
+ @_mInputState = EHighbyte
102
+ elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
103
+ @_mInputState = EEscAscii
104
+ end
105
+ end
106
+
107
+ @_mLastChar = aBuf[-1..-1]
108
+ if @_mInputState == EEscAscii
109
+ if not @_mEscCharSetProber
110
+ @_mEscCharSetProber = EscCharSetProber.new()
111
+ end
112
+ if @_mEscCharSetProber.feed(aBuf) == EFoundIt
113
+ @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
114
+ 'confidence' => @_mEscCharSetProber.get_confidence()
115
+ }
116
+ @done = true
117
+ end
118
+ elsif @_mInputState == EHighbyte
119
+ if not @_mCharSetProbers or @_mCharSetProbers.empty?
120
+ @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
121
+ end
122
+ for prober in @_mCharSetProbers
123
+ if prober.feed(aBuf) == EFoundIt
124
+ @result = {'encoding' => prober.get_charset_name(),
125
+ 'confidence' => prober.get_confidence()}
126
+ @done = true
127
+ break
128
+ end
129
+ end
130
+ end
131
+
132
+ end
133
+
134
+ def close
135
+ return if @done
136
+ if not @_mGotData
137
+ $stderr << "no data received!\n" if $debug
138
+ return
139
+ end
140
+ @done = true
141
+
142
+ if @_mInputState == EPureAscii
143
+ @result = {'encoding' => 'ascii', 'confidence' => 1.0}
144
+ return @result
145
+ end
146
+
147
+ if @_mInputState == EHighbyte
148
+ confidences = {}
149
+ @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
150
+ maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
151
+ if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
152
+ @result = {'encoding' => maxProber.get_charset_name(),
153
+ 'confidence' => maxProber.get_confidence()}
154
+ return @result
155
+ end
156
+ end
157
+
158
+ if $debug
159
+ $stderr << "no probers hit minimum threshhold\n" if $debug
160
+ for prober in @_mCharSetProbers[0]._mProbers
161
+ next if not prober
162
+ $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,87 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ ONE_CHAR_PROB = 0.5
31
+
32
+ class UTF8Prober < CharSetProber
33
+ def initialize
34
+ super()
35
+ @_mCodingSM = CodingStateMachine.new(UTF8SMModel)
36
+ reset()
37
+ end
38
+
39
+ def reset
40
+ super()
41
+ @_mCodingSM.reset()
42
+ @_mNumOfMBChar = 0
43
+ end
44
+
45
+ def get_charset_name
46
+ return "utf-8"
47
+ end
48
+
49
+ def feed(aBuf)
50
+ aBuf.each_byte do |b|
51
+ c = b.chr
52
+ codingState = @_mCodingSM.next_state(c)
53
+ if codingState == EError
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ if @_mCodingSM.get_current_charlen() >= 2
61
+ @_mNumOfMBChar += 1
62
+ end
63
+ end
64
+ end
65
+
66
+ if get_state() == EDetecting
67
+ if get_confidence() > SHORTCUT_THRESHOLD
68
+ @_mState = EFoundIt
69
+ end
70
+ end
71
+
72
+ return get_state()
73
+ end
74
+
75
+ def get_confidence
76
+ unlike = 0.99
77
+ if @_mNumOfMBChar < 6
78
+ for i in (0...@_mNumOfMBChar)
79
+ unlike = unlike * ONE_CHAR_PROB
80
+ end
81
+ return 1.0 - unlike
82
+ else
83
+ return unlike
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,67 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # This library is free software; you can redistribute it and/or
3
+ # modify it under the terms of the GNU Lesser General Public
4
+ # License as published by the Free Software Foundation; either
5
+ # version 2.1 of the License, or (at your option) any later version.
6
+ #
7
+ # This library is distributed in the hope that it will be useful,
8
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
+ # Lesser General Public License for more details.
11
+ #
12
+ # You should have received a copy of the GNU Lesser General Public
13
+ # License along with this library; if not, write to the Free Software
14
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15
+ # 02110-1301 USA
16
+ ######################### END LICENSE BLOCK #########################
17
+
18
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
19
+
20
+ require 'rchardet/charsetprober'
21
+ require 'rchardet/mbcharsetprober'
22
+
23
+ require 'rchardet/big5freq'
24
+ require 'rchardet/big5prober'
25
+ require 'rchardet/chardistribution'
26
+ require 'rchardet/charsetgroupprober'
27
+
28
+ require 'rchardet/codingstatemachine'
29
+ require 'rchardet/constants'
30
+ require 'rchardet/escprober'
31
+ require 'rchardet/escsm'
32
+ require 'rchardet/eucjpprober'
33
+ require 'rchardet/euckrfreq'
34
+ require 'rchardet/euckrprober'
35
+ require 'rchardet/euctwfreq'
36
+ require 'rchardet/euctwprober'
37
+ require 'rchardet/gb2312freq'
38
+ require 'rchardet/gb2312prober'
39
+ require 'rchardet/hebrewprober'
40
+ require 'rchardet/jisfreq'
41
+ require 'rchardet/jpcntx'
42
+ require 'rchardet/langbulgarianmodel'
43
+ require 'rchardet/langcyrillicmodel'
44
+ require 'rchardet/langgreekmodel'
45
+ require 'rchardet/langhebrewmodel'
46
+ require 'rchardet/langhungarianmodel'
47
+ require 'rchardet/langthaimodel'
48
+ require 'rchardet/latin1prober'
49
+
50
+ require 'rchardet/mbcsgroupprober'
51
+ require 'rchardet/mbcssm'
52
+ require 'rchardet/sbcharsetprober'
53
+ require 'rchardet/sbcsgroupprober'
54
+ require 'rchardet/sjisprober'
55
+ require 'rchardet/universaldetector'
56
+ require 'rchardet/utf8prober'
57
+
58
+ module CharDet
59
+ VERSION = "1.3"
60
+ def CharDet.detect(aBuf)
61
+ u = UniversalDetector.new
62
+ u.reset
63
+ u.feed(aBuf)
64
+ u.close
65
+ u.result
66
+ end
67
+ end
@@ -0,0 +1,40 @@
1
+ #
2
+ # version.rb
3
+ #
4
+ #--
5
+ # Copyright (c) 1998-2003 Minero Aoki <aamine@loveruby.net>
6
+ #
7
+ # Permission is hereby granted, free of charge, to any person obtaining
8
+ # a copy of this software and associated documentation files (the
9
+ # "Software"), to deal in the Software without restriction, including
10
+ # without limitation the rights to use, copy, modify, merge, publish,
11
+ # distribute, sublicense, and/or sell copies of the Software, and to
12
+ # permit persons to whom the Software is furnished to do so, subject to
13
+ # the following conditions:
14
+ #
15
+ # The above copyright notice and this permission notice shall be
16
+ # included in all copies or substantial portions of the Software.
17
+ #
18
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
+ #
26
+ # Note: Originally licensed under LGPL v2+. Using MIT license for Rails
27
+ # with permission of Minero Aoki.
28
+ #++
29
+
30
+ #:stopdoc:
31
+ module TMail
32
+ module VERSION
33
+ MAJOR = 1
34
+ MINOR = 2
35
+ TINY = 7
36
+ MICRO = 1
37
+
38
+ STRING = [MAJOR, MINOR, TINY, MICRO].join('.')
39
+ end
40
+ end
data/lib/tmail.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'tmail/version'
2
+ require 'tmail/mail'
3
+ require 'tmail/mailbox'
4
+ require 'tmail/core_extensions'
5
+ require 'tmail/net'
6
+ require 'tmail/vendor/rchardet-1.3/lib/rchardet'