sa-tmail 1.2.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/CHANGES +74 -0
  2. data/LICENSE +21 -0
  3. data/NOTES +7 -0
  4. data/README +179 -0
  5. data/Rakefile +2 -0
  6. data/ext/Makefile +20 -0
  7. data/ext/tmailscanner/tmail/MANIFEST +4 -0
  8. data/ext/tmailscanner/tmail/depend +1 -0
  9. data/ext/tmailscanner/tmail/extconf.rb +34 -0
  10. data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
  11. data/lib/tmail.rb +6 -0
  12. data/lib/tmail/Makefile +18 -0
  13. data/lib/tmail/address.rb +392 -0
  14. data/lib/tmail/attachments.rb +65 -0
  15. data/lib/tmail/base64.rb +46 -0
  16. data/lib/tmail/compat.rb +41 -0
  17. data/lib/tmail/config.rb +67 -0
  18. data/lib/tmail/core_extensions.rb +63 -0
  19. data/lib/tmail/encode.rb +590 -0
  20. data/lib/tmail/header.rb +962 -0
  21. data/lib/tmail/index.rb +9 -0
  22. data/lib/tmail/interface.rb +1162 -0
  23. data/lib/tmail/loader.rb +3 -0
  24. data/lib/tmail/mail.rb +578 -0
  25. data/lib/tmail/mailbox.rb +496 -0
  26. data/lib/tmail/main.rb +6 -0
  27. data/lib/tmail/mbox.rb +3 -0
  28. data/lib/tmail/net.rb +250 -0
  29. data/lib/tmail/obsolete.rb +132 -0
  30. data/lib/tmail/parser.rb +1060 -0
  31. data/lib/tmail/parser.y +416 -0
  32. data/lib/tmail/port.rb +379 -0
  33. data/lib/tmail/quoting.rb +155 -0
  34. data/lib/tmail/require_arch.rb +58 -0
  35. data/lib/tmail/scanner.rb +49 -0
  36. data/lib/tmail/scanner_r.rb +261 -0
  37. data/lib/tmail/stringio.rb +280 -0
  38. data/lib/tmail/utils.rb +361 -0
  39. data/lib/tmail/version.rb +39 -0
  40. data/lib/vendor/rchardet-1.3/COPYING +504 -0
  41. data/lib/vendor/rchardet-1.3/README +12 -0
  42. data/lib/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  43. data/lib/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  44. data/lib/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  45. data/lib/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +237 -0
  46. data/lib/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  47. data/lib/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  48. data/lib/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  49. data/lib/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  50. data/lib/vendor/rchardet-1.3/lib/rchardet/escprober.rb +90 -0
  51. data/lib/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  52. data/lib/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  53. data/lib/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  54. data/lib/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  55. data/lib/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  56. data/lib/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  57. data/lib/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  58. data/lib/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  59. data/lib/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  60. data/lib/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  61. data/lib/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  62. data/lib/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  63. data/lib/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  64. data/lib/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  65. data/lib/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  66. data/lib/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  67. data/lib/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  68. data/lib/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  69. data/lib/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  70. data/lib/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +47 -0
  71. data/lib/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  72. data/lib/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  73. data/lib/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +58 -0
  74. data/lib/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  75. data/lib/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +166 -0
  76. data/lib/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  77. data/log/BugTrackingLog.txt +1245 -0
  78. data/log/Changelog.txt +534 -0
  79. data/log/Testlog.txt +2340 -0
  80. data/log/Todo.txt +30 -0
  81. data/meta/MANIFEST +128 -0
  82. data/meta/VERSION +1 -0
  83. data/meta/project.yaml +30 -0
  84. data/meta/unixname +1 -0
  85. data/sample/bench_base64.rb +48 -0
  86. data/sample/data/multipart +23 -0
  87. data/sample/data/normal +29 -0
  88. data/sample/data/sendtest +5 -0
  89. data/sample/data/simple +14 -0
  90. data/sample/data/test +27 -0
  91. data/sample/extract-attachements.rb +33 -0
  92. data/sample/from-check.rb +26 -0
  93. data/sample/multipart.rb +26 -0
  94. data/sample/parse-bench.rb +68 -0
  95. data/sample/parse-test.rb +19 -0
  96. data/sample/sendmail.rb +94 -0
  97. data/setup.rb +1482 -0
  98. data/site/contributing/index.html +183 -0
  99. data/site/css/clean.css +27 -0
  100. data/site/css/layout.css +31 -0
  101. data/site/css/style.css +60 -0
  102. data/site/download/index.html +61 -0
  103. data/site/img/envelope.jpg +0 -0
  104. data/site/img/mailman.gif +0 -0
  105. data/site/img/stamp-sm.jpg +0 -0
  106. data/site/img/stamp.jpg +0 -0
  107. data/site/img/stampborder.jpg +0 -0
  108. data/site/img/tfire.jpg +0 -0
  109. data/site/img/tmail.png +0 -0
  110. data/site/index.html +272 -0
  111. data/site/js/jquery.js +31 -0
  112. data/site/log/Changelog.xsl +33 -0
  113. data/site/log/changelog.xml +1677 -0
  114. data/site/outdated/BUGS +3 -0
  115. data/site/outdated/DEPENDS +1 -0
  116. data/site/outdated/Incompatibilities +89 -0
  117. data/site/outdated/Incompatibilities.ja +102 -0
  118. data/site/outdated/NEWS +9 -0
  119. data/site/outdated/README.ja +73 -0
  120. data/site/outdated/doc.ja/address.html +275 -0
  121. data/site/outdated/doc.ja/basics.html +405 -0
  122. data/site/outdated/doc.ja/config.html +49 -0
  123. data/site/outdated/doc.ja/details.html +146 -0
  124. data/site/outdated/doc.ja/index.html +39 -0
  125. data/site/outdated/doc.ja/mail.html +793 -0
  126. data/site/outdated/doc.ja/mailbox.html +265 -0
  127. data/site/outdated/doc.ja/port.html +95 -0
  128. data/site/outdated/doc.ja/tmail.html +58 -0
  129. data/site/outdated/doc.ja/usage.html +202 -0
  130. data/site/outdated/rdd/address.rrd.m +229 -0
  131. data/site/outdated/rdd/basics.rd.m +275 -0
  132. data/site/outdated/rdd/config.rrd.m +26 -0
  133. data/site/outdated/rdd/details.rd.m +117 -0
  134. data/site/outdated/rdd/index.rhtml.m +54 -0
  135. data/site/outdated/rdd/mail.rrd.m +701 -0
  136. data/site/outdated/rdd/mailbox.rrd.m +228 -0
  137. data/site/outdated/rdd/port.rrd.m +69 -0
  138. data/site/outdated/rdd/tmail.rrd.m +33 -0
  139. data/site/outdated/rdd/usage.rd.m +247 -0
  140. data/site/quickstart/index.html +69 -0
  141. data/site/quickstart/quickstart.html +52 -0
  142. data/site/quickstart/usage.html +193 -0
  143. data/site/reference/address.html +247 -0
  144. data/site/reference/config.html +30 -0
  145. data/site/reference/index.html +101 -0
  146. data/site/reference/mail.html +726 -0
  147. data/site/reference/mailbox.html +245 -0
  148. data/site/reference/port.html +75 -0
  149. data/site/reference/tmail.html +35 -0
  150. data/test/extctrl.rb +6 -0
  151. data/test/fixtures/mailbox +414 -0
  152. data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
  153. data/test/fixtures/mailbox_without_from +11 -0
  154. data/test/fixtures/mailbox_without_return_path +12 -0
  155. data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
  156. data/test/fixtures/raw_base64_decoded_string +0 -0
  157. data/test/fixtures/raw_base64_email +83 -0
  158. data/test/fixtures/raw_base64_encoded_string +1 -0
  159. data/test/fixtures/raw_email +14 -0
  160. data/test/fixtures/raw_email10 +20 -0
  161. data/test/fixtures/raw_email11 +34 -0
  162. data/test/fixtures/raw_email12 +32 -0
  163. data/test/fixtures/raw_email13 +29 -0
  164. data/test/fixtures/raw_email2 +114 -0
  165. data/test/fixtures/raw_email3 +70 -0
  166. data/test/fixtures/raw_email4 +59 -0
  167. data/test/fixtures/raw_email5 +19 -0
  168. data/test/fixtures/raw_email6 +20 -0
  169. data/test/fixtures/raw_email7 +66 -0
  170. data/test/fixtures/raw_email8 +47 -0
  171. data/test/fixtures/raw_email9 +28 -0
  172. data/test/fixtures/raw_email_multiple_from +30 -0
  173. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  174. data/test/fixtures/raw_email_reply +32 -0
  175. data/test/fixtures/raw_email_simple +11 -0
  176. data/test/fixtures/raw_email_with_bad_date +48 -0
  177. data/test/fixtures/raw_email_with_illegal_boundary +58 -0
  178. data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
  179. data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
  180. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  181. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  182. data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
  183. data/test/kcode.rb +14 -0
  184. data/test/temp_test_one.rb +46 -0
  185. data/test/test_address.rb +1216 -0
  186. data/test/test_attachments.rb +131 -0
  187. data/test/test_base64.rb +64 -0
  188. data/test/test_encode.rb +139 -0
  189. data/test/test_header.rb +1021 -0
  190. data/test/test_helper.rb +9 -0
  191. data/test/test_mail.rb +756 -0
  192. data/test/test_mbox.rb +184 -0
  193. data/test/test_port.rb +440 -0
  194. data/test/test_quote.rb +107 -0
  195. data/test/test_scanner.rb +209 -0
  196. data/test/test_utils.rb +36 -0
  197. data/work/script/make +26 -0
  198. data/work/script/rdoc +39 -0
  199. data/work/script/setup +1616 -0
  200. data/work/script/test +30 -0
  201. metadata +309 -0
@@ -0,0 +1,155 @@
1
+ =begin rdoc
2
+
3
+ = Quoting methods
4
+
5
+ =end
6
+ module TMail
7
+ class Mail
8
+ def subject(to_charset = 'utf-8')
9
+ Unquoter.unquote_and_convert_to(quoted_subject, to_charset)
10
+ end
11
+
12
+ def unquoted_body(to_charset = 'utf-8')
13
+ from_charset = charset
14
+ case (content_transfer_encoding || "7bit").downcase
15
+ when "quoted-printable"
16
+ # the default charset is set to iso-8859-1 instead of 'us-ascii'.
17
+ # This is needed as many mailer do not set the charset but send in ISO. This is only used if no charset is set.
18
+ if !from_charset.blank? && from_charset.downcase == 'us-ascii'
19
+ from_charset = 'iso-8859-1'
20
+ end
21
+
22
+ Unquoter.unquote_quoted_printable_and_convert_to(quoted_body,
23
+ to_charset, from_charset, true)
24
+ when "base64"
25
+ Unquoter.unquote_base64_and_convert_to(quoted_body, to_charset,
26
+ from_charset)
27
+ when "7bit", "8bit"
28
+ Unquoter.convert_to(quoted_body, to_charset, from_charset)
29
+ when "binary"
30
+ quoted_body
31
+ else
32
+ quoted_body
33
+ end
34
+ end
35
+
36
+ def body(to_charset = 'utf-8', &block)
37
+ attachment_presenter = block || Proc.new { |file_name| "Attachment: #{file_name}\n" }
38
+
39
+ if multipart?
40
+ parts.collect { |part|
41
+ header = part["content-type"]
42
+
43
+ if part.multipart?
44
+ part.body(to_charset, &attachment_presenter)
45
+ elsif header.nil?
46
+ ""
47
+ elsif !attachment?(part)
48
+ part.unquoted_body(to_charset)
49
+ else
50
+ attachment_presenter.call(header["name"] || "(unnamed)")
51
+ end
52
+ }.join
53
+ else
54
+ unquoted_body(to_charset)
55
+ end
56
+ end
57
+ end
58
+
59
+ class Attachment
60
+
61
+ include TextUtils
62
+
63
+ def original_filename(to_charset = 'utf-8')
64
+ Unquoter.unquote_and_convert_to(quoted_filename, to_charset).chomp
65
+ end
66
+ end
67
+
68
+ class Unquoter
69
+ class << self
70
+ def unquote_and_convert_to(text, to_charset, from_charset = "iso-8859-1", preserve_underscores=false)
71
+ return "" if text.nil?
72
+ text.gsub!(/\?=(\s*)=\?/, '?==?') # Remove whitespaces between 'encoded-word's
73
+ text.gsub(/(.*?)(?:(?:=\?(.*?)\?(.)\?(.*?)\?=)|$)/) do
74
+ before = $1
75
+ from_charset = $2
76
+ quoting_method = $3
77
+ text = $4
78
+
79
+ before = convert_to(before, to_charset, from_charset) if before.length > 0
80
+ before + case quoting_method
81
+ when "q", "Q" then
82
+ unquote_quoted_printable_and_convert_to(text, to_charset, from_charset, preserve_underscores)
83
+ when "b", "B" then
84
+ unquote_base64_and_convert_to(text, to_charset, from_charset)
85
+ when nil then
86
+ # will be nil at the end of the string, due to the nature of
87
+ # the regex used.
88
+ ""
89
+ else
90
+ raise "unknown quoting method #{quoting_method.inspect}"
91
+ end
92
+ end
93
+ end
94
+
95
+ def convert_to_with_fallback_on_iso_8859_1(text, to, from)
96
+ return text if to == 'utf-8' and text.isutf8
97
+
98
+ if from.blank? and !text.is_binary_data?
99
+ from = CharDet.detect(text)['encoding']
100
+
101
+ # Chardet ususally detects iso-8859-2 (aka windows-1250), but the text is
102
+ # iso-8859-1 (aka windows-1252 and Latin1). http://en.wikipedia.org/wiki/ISO/IEC_8859-2
103
+ # This can cause unwanted characters, like ŕ instead of à.
104
+ # (I know, could be a very bad decision...)
105
+ from = 'iso-8859-1' if from =~ /iso-8859-2/i
106
+ end
107
+
108
+ begin
109
+ convert_to_without_fallback_on_iso_8859_1(text, to, from)
110
+ rescue Iconv::InvalidCharacter
111
+ unless from == 'iso-8859-1'
112
+ from = 'iso-8859-1'
113
+ retry
114
+ end
115
+ end
116
+ end
117
+
118
+ def unquote_quoted_printable_and_convert_to(text, to, from, preserve_underscores=false)
119
+ text = text.gsub(/_/, " ") unless preserve_underscores
120
+ text = text.gsub(/\r\n|\r/, "\n") # normalize newlines
121
+ convert_to(text.unpack("M*").first, to, from)
122
+ end
123
+
124
+ def unquote_base64_and_convert_to(text, to, from)
125
+ convert_to(Base64.decode(text), to, from)
126
+ end
127
+
128
+ begin
129
+ require 'iconv'
130
+ def convert_to(text, to, from)
131
+ return text unless to && from
132
+ text ? Iconv.iconv(to, from, text).first : ""
133
+ rescue Iconv::IllegalSequence, Iconv::InvalidEncoding, Errno::EINVAL
134
+ # the 'from' parameter specifies a charset other than what the text
135
+ # actually is...not much we can do in this case but just return the
136
+ # unconverted text.
137
+ #
138
+ # Ditto if either parameter represents an unknown charset, like
139
+ # X-UNKNOWN.
140
+ text
141
+ end
142
+ rescue LoadError
143
+ # Not providing quoting support
144
+ def convert_to(text, to, from)
145
+ warn "Action Mailer: iconv not loaded; ignoring conversion from #{from} to #{to} (#{__FILE__}:#{__LINE__})"
146
+ text
147
+ end
148
+ end
149
+
150
+ alias_method :convert_to_without_fallback_on_iso_8859_1, :convert_to
151
+ alias_method :convert_to, :convert_to_with_fallback_on_iso_8859_1
152
+
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,58 @@
1
+ #:stopdoc:
2
+ require 'rbconfig'
3
+
4
+ # Attempts to require anative extension.
5
+ # Falls back to pure-ruby version, if it fails.
6
+ #
7
+ # This uses Config::CONFIG['arch'] from rbconfig.
8
+
9
+ def require_arch(fname)
10
+ arch = Config::CONFIG['arch']
11
+ begin
12
+ path = File.join("tmail", arch, fname)
13
+ require path
14
+ rescue LoadError => e
15
+ # try pre-built Windows binaries
16
+ if arch =~ /mswin/
17
+ require File.join("tmail", 'mswin32', fname)
18
+ else
19
+ raise e
20
+ end
21
+ end
22
+ end
23
+
24
+
25
+ # def require_arch(fname)
26
+ # dext = Config::CONFIG['DLEXT']
27
+ # begin
28
+ # if File.extname(fname) == dext
29
+ # path = fname
30
+ # else
31
+ # path = File.join("tmail","#{fname}.#{dext}")
32
+ # end
33
+ # require path
34
+ # rescue LoadError => e
35
+ # begin
36
+ # arch = Config::CONFIG['arch']
37
+ # path = File.join("tmail", arch, "#{fname}.#{dext}")
38
+ # require path
39
+ # rescue LoadError
40
+ # case path
41
+ # when /i686/
42
+ # path.sub!('i686', 'i586')
43
+ # when /i586/
44
+ # path.sub!('i586', 'i486')
45
+ # when /i486/
46
+ # path.sub!('i486', 'i386')
47
+ # else
48
+ # begin
49
+ # require fname + '.rb'
50
+ # rescue LoadError
51
+ # raise e
52
+ # end
53
+ # end
54
+ # retry
55
+ # end
56
+ # end
57
+ # end
58
+ #:startdoc:
@@ -0,0 +1,49 @@
1
+ =begin rdoc
2
+
3
+ = Scanner for TMail
4
+
5
+ =end
6
+ #--
7
+ # Copyright (c) 1998-2003 Minero Aoki <aamine@loveruby.net>
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+ #
28
+ # Note: Originally licensed under LGPL v2+. Using MIT license for Rails
29
+ # with permission of Minero Aoki.
30
+ #++
31
+ #:stopdoc:
32
+ #require 'tmail/require_arch'
33
+ require 'tmail/utils'
34
+ require 'tmail/config'
35
+
36
+ module TMail
37
+ # NOTE: It woiuld be nice if these two libs could boith be called "tmailscanner", and
38
+ # the native extension would have precedence. However RubyGems boffs that up b/c
39
+ # it does not gaurantee load_path order.
40
+ begin
41
+ raise LoadError, 'Turned off native extentions by user choice' if ENV['NORUBYEXT']
42
+ require('tmail/tmailscanner') # c extension
43
+ Scanner = TMailScanner
44
+ rescue LoadError
45
+ require 'tmail/scanner_r'
46
+ Scanner = TMailScanner
47
+ end
48
+ end
49
+ #:stopdoc:
@@ -0,0 +1,261 @@
1
+ # scanner_r.rb
2
+ #
3
+ #--
4
+ # Copyright (c) 1998-2003 Minero Aoki <aamine@loveruby.net>
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ #
25
+ # Note: Originally licensed under LGPL v2+. Using MIT license for Rails
26
+ # with permission of Minero Aoki.
27
+ #++
28
+ #:stopdoc:
29
+ require 'tmail/config'
30
+
31
+ module TMail
32
+
33
+ class TMailScanner
34
+
35
+ Version = '1.2.3'
36
+ Version.freeze
37
+
38
+ MIME_HEADERS = {
39
+ :CTYPE => true,
40
+ :CENCODING => true,
41
+ :CDISPOSITION => true
42
+ }
43
+
44
+ alnum = 'a-zA-Z0-9'
45
+ atomsyms = %q[ _#!$%&`'*+-{|}~^/=? ].strip
46
+ tokensyms = %q[ _#!$%&`'*+-{|}~^@. ].strip
47
+ atomchars = alnum + Regexp.quote(atomsyms)
48
+ tokenchars = alnum + Regexp.quote(tokensyms)
49
+ iso2022str = '\e(?!\(B)..(?:[^\e]+|\e(?!\(B)..)*\e\(B'
50
+
51
+ eucstr = "(?:[\xa1-\xfe][\xa1-\xfe])+"
52
+ sjisstr = "(?:[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc])+"
53
+ utf8str = "(?:[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf])+"
54
+
55
+ quoted_with_iso2022 = /\A(?:[^\\\e"]+|#{iso2022str})+/n
56
+ domlit_with_iso2022 = /\A(?:[^\\\e\]]+|#{iso2022str})+/n
57
+ comment_with_iso2022 = /\A(?:[^\\\e()]+|#{iso2022str})+/n
58
+
59
+ quoted_without_iso2022 = /\A[^\\"]+/n
60
+ domlit_without_iso2022 = /\A[^\\\]]+/n
61
+ comment_without_iso2022 = /\A[^\\()]+/n
62
+
63
+ PATTERN_TABLE = {}
64
+ PATTERN_TABLE['EUC'] =
65
+ [
66
+ /\A(?:[#{atomchars}]+|#{iso2022str}|#{eucstr})+/n,
67
+ /\A(?:[#{tokenchars}]+|#{iso2022str}|#{eucstr})+/n,
68
+ quoted_with_iso2022,
69
+ domlit_with_iso2022,
70
+ comment_with_iso2022
71
+ ]
72
+ PATTERN_TABLE['SJIS'] =
73
+ [
74
+ /\A(?:[#{atomchars}]+|#{iso2022str}|#{sjisstr})+/n,
75
+ /\A(?:[#{tokenchars}]+|#{iso2022str}|#{sjisstr})+/n,
76
+ quoted_with_iso2022,
77
+ domlit_with_iso2022,
78
+ comment_with_iso2022
79
+ ]
80
+ PATTERN_TABLE['UTF8'] =
81
+ [
82
+ /\A(?:[#{atomchars}]+|#{utf8str})+/n,
83
+ /\A(?:[#{tokenchars}]+|#{utf8str})+/n,
84
+ quoted_without_iso2022,
85
+ domlit_without_iso2022,
86
+ comment_without_iso2022
87
+ ]
88
+ PATTERN_TABLE['NONE'] =
89
+ [
90
+ /\A[#{atomchars}]+/n,
91
+ /\A[#{tokenchars}]+/n,
92
+ quoted_without_iso2022,
93
+ domlit_without_iso2022,
94
+ comment_without_iso2022
95
+ ]
96
+
97
+
98
+ def initialize( str, scantype, comments )
99
+ init_scanner str
100
+ @comments = comments || []
101
+ @debug = false
102
+
103
+ # fix scanner mode
104
+ @received = (scantype == :RECEIVED)
105
+ @is_mime_header = MIME_HEADERS[scantype]
106
+
107
+ atom, token, @quoted_re, @domlit_re, @comment_re = PATTERN_TABLE[TMail.KCODE]
108
+ @word_re = (MIME_HEADERS[scantype] ? token : atom)
109
+ end
110
+
111
+ attr_accessor :debug
112
+
113
+ def scan( &block )
114
+ if @debug
115
+ scan_main do |arr|
116
+ s, v = arr
117
+ printf "%7d %-10s %s\n",
118
+ rest_size(),
119
+ s.respond_to?(:id2name) ? s.id2name : s.inspect,
120
+ v.inspect
121
+ yield arr
122
+ end
123
+ else
124
+ scan_main(&block)
125
+ end
126
+ end
127
+
128
+ private
129
+
130
+ RECV_TOKEN = {
131
+ 'from' => :FROM,
132
+ 'by' => :BY,
133
+ 'via' => :VIA,
134
+ 'with' => :WITH,
135
+ 'id' => :ID,
136
+ 'for' => :FOR
137
+ }
138
+
139
+ def scan_main
140
+ until eof?
141
+ if skip(/\A[\n\r\t ]+/n) # LWSP
142
+ break if eof?
143
+ end
144
+
145
+ if s = readstr(@word_re)
146
+ if @is_mime_header
147
+ yield [:TOKEN, s]
148
+ else
149
+ # atom
150
+ if /\A\d+\z/ === s
151
+ yield [:DIGIT, s]
152
+ elsif @received
153
+ yield [RECV_TOKEN[s.downcase] || :ATOM, s]
154
+ else
155
+ yield [:ATOM, s]
156
+ end
157
+ end
158
+
159
+ elsif skip(/\A"/)
160
+ yield [:QUOTED, scan_quoted_word()]
161
+
162
+ elsif skip(/\A\[/)
163
+ yield [:DOMLIT, scan_domain_literal()]
164
+
165
+ elsif skip(/\A\(/)
166
+ @comments.push scan_comment()
167
+
168
+ else
169
+ c = readchar()
170
+ yield [c, c]
171
+ end
172
+ end
173
+
174
+ yield [false, '$']
175
+ end
176
+
177
+ def scan_quoted_word
178
+ scan_qstr(@quoted_re, /\A"/, 'quoted-word')
179
+ end
180
+
181
+ def scan_domain_literal
182
+ '[' + scan_qstr(@domlit_re, /\A\]/, 'domain-literal') + ']'
183
+ end
184
+
185
+ def scan_qstr( pattern, terminal, type )
186
+ result = ''
187
+ until eof?
188
+ if s = readstr(pattern) then result << s
189
+ elsif skip(terminal) then return result
190
+ elsif skip(/\A\\/) then result << readchar()
191
+ else
192
+ raise "TMail FATAL: not match in #{type}"
193
+ end
194
+ end
195
+ scan_error! "found unterminated #{type}"
196
+ end
197
+
198
+ def scan_comment
199
+ result = ''
200
+ nest = 1
201
+ content = @comment_re
202
+
203
+ until eof?
204
+ if s = readstr(content) then result << s
205
+ elsif skip(/\A\)/) then nest -= 1
206
+ return result if nest == 0
207
+ result << ')'
208
+ elsif skip(/\A\(/) then nest += 1
209
+ result << '('
210
+ elsif skip(/\A\\/) then result << readchar()
211
+ else
212
+ raise 'TMail FATAL: not match in comment'
213
+ end
214
+ end
215
+ scan_error! 'found unterminated comment'
216
+ end
217
+
218
+ # string scanner
219
+
220
+ def init_scanner( str )
221
+ @src = str
222
+ end
223
+
224
+ def eof?
225
+ @src.empty?
226
+ end
227
+
228
+ def rest_size
229
+ @src.size
230
+ end
231
+
232
+ def readstr( re )
233
+ if m = re.match(@src)
234
+ @src = m.post_match
235
+ m[0]
236
+ else
237
+ nil
238
+ end
239
+ end
240
+
241
+ def readchar
242
+ readstr(/\A./)
243
+ end
244
+
245
+ def skip( re )
246
+ if m = re.match(@src)
247
+ @src = m.post_match
248
+ true
249
+ else
250
+ false
251
+ end
252
+ end
253
+
254
+ def scan_error!( msg )
255
+ raise SyntaxError, msg
256
+ end
257
+
258
+ end
259
+
260
+ end # module TMail
261
+ #:startdoc: