sisimai 4.25.16 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -3
  3. data/ANALYTICAL-PRECISION +2 -2
  4. data/Benchmarks.mk +3 -3
  5. data/CONTRIBUTING +1 -1
  6. data/ChangeLog.md +424 -393
  7. data/Developers.mk +5 -6
  8. data/Gemfile +1 -1
  9. data/Makefile +15 -15
  10. data/README-JA.md +323 -149
  11. data/README.md +319 -148
  12. data/Rakefile +9 -3
  13. data/Repository.mk +2 -3
  14. data/lib/sisimai/address.rb +118 -74
  15. data/lib/sisimai/arf.rb +84 -82
  16. data/lib/sisimai/datetime.rb +5 -52
  17. data/lib/sisimai/{data → fact}/json.rb +7 -9
  18. data/lib/sisimai/fact/yaml.rb +31 -0
  19. data/lib/sisimai/fact.rb +468 -0
  20. data/lib/sisimai/lhost/activehunter.rb +12 -14
  21. data/lib/sisimai/lhost/amavis.rb +11 -14
  22. data/lib/sisimai/lhost/amazonses.rb +37 -41
  23. data/lib/sisimai/lhost/amazonworkmail.rb +15 -18
  24. data/lib/sisimai/lhost/aol.rb +12 -14
  25. data/lib/sisimai/lhost/apachejames.rb +19 -21
  26. data/lib/sisimai/lhost/barracuda.rb +10 -12
  27. data/lib/sisimai/lhost/bigfoot.rb +21 -21
  28. data/lib/sisimai/lhost/biglobe.rb +15 -16
  29. data/lib/sisimai/lhost/courier.rb +20 -20
  30. data/lib/sisimai/lhost/domino.rb +23 -19
  31. data/lib/sisimai/lhost/einsundeins.rb +23 -18
  32. data/lib/sisimai/lhost/exchange2003.rb +30 -29
  33. data/lib/sisimai/lhost/exchange2007.rb +70 -58
  34. data/lib/sisimai/lhost/exim.rb +175 -161
  35. data/lib/sisimai/lhost/ezweb.rb +31 -56
  36. data/lib/sisimai/lhost/facebook.rb +21 -33
  37. data/lib/sisimai/lhost/fml.rb +43 -48
  38. data/lib/sisimai/lhost/gmail.rb +29 -29
  39. data/lib/sisimai/lhost/gmx.rb +18 -17
  40. data/lib/sisimai/lhost/googlegroups.rb +9 -10
  41. data/lib/sisimai/lhost/gsuite.rb +21 -27
  42. data/lib/sisimai/lhost/imailserver.rb +25 -39
  43. data/lib/sisimai/lhost/interscanmss.rb +28 -31
  44. data/lib/sisimai/lhost/kddi.rb +22 -28
  45. data/lib/sisimai/lhost/mailfoundry.rb +11 -12
  46. data/lib/sisimai/lhost/mailmarshalsmtp.rb +25 -29
  47. data/lib/sisimai/lhost/mailru.rb +33 -27
  48. data/lib/sisimai/lhost/mcafee.rb +21 -31
  49. data/lib/sisimai/lhost/messagelabs.rb +17 -20
  50. data/lib/sisimai/lhost/messagingserver.rb +40 -37
  51. data/lib/sisimai/lhost/mfilter.rb +15 -16
  52. data/lib/sisimai/lhost/mxlogic.rb +24 -23
  53. data/lib/sisimai/lhost/notes.rb +17 -17
  54. data/lib/sisimai/lhost/office365.rb +63 -27
  55. data/lib/sisimai/lhost/opensmtpd.rb +12 -13
  56. data/lib/sisimai/lhost/outlook.rb +12 -15
  57. data/lib/sisimai/lhost/postfix.rb +179 -129
  58. data/lib/sisimai/lhost/powermta.rb +12 -14
  59. data/lib/sisimai/lhost/qmail.rb +44 -47
  60. data/lib/sisimai/lhost/receivingses.rb +15 -20
  61. data/lib/sisimai/lhost/sendgrid.rb +34 -32
  62. data/lib/sisimai/lhost/sendmail.rb +66 -53
  63. data/lib/sisimai/lhost/surfcontrol.rb +19 -19
  64. data/lib/sisimai/lhost/v5sendmail.rb +45 -39
  65. data/lib/sisimai/lhost/verizon.rb +35 -39
  66. data/lib/sisimai/lhost/x1.rb +18 -17
  67. data/lib/sisimai/lhost/x2.rb +17 -14
  68. data/lib/sisimai/lhost/x3.rb +19 -19
  69. data/lib/sisimai/lhost/x4.rb +72 -57
  70. data/lib/sisimai/lhost/x5.rb +17 -19
  71. data/lib/sisimai/lhost/x6.rb +41 -17
  72. data/lib/sisimai/lhost/yahoo.rb +17 -16
  73. data/lib/sisimai/lhost/yandex.rb +16 -20
  74. data/lib/sisimai/lhost/zoho.rb +16 -15
  75. data/lib/sisimai/lhost.rb +8 -10
  76. data/lib/sisimai/mail/maildir.rb +1 -3
  77. data/lib/sisimai/mail/mbox.rb +3 -4
  78. data/lib/sisimai/mail/memory.rb +0 -1
  79. data/lib/sisimai/mail/stdin.rb +1 -3
  80. data/lib/sisimai/mail.rb +3 -7
  81. data/lib/sisimai/mda.rb +28 -42
  82. data/lib/sisimai/message.rb +435 -326
  83. data/lib/sisimai/order.rb +5 -5
  84. data/lib/sisimai/reason/authfailure.rb +64 -0
  85. data/lib/sisimai/reason/badreputation.rb +53 -0
  86. data/lib/sisimai/reason/blocked.rb +94 -160
  87. data/lib/sisimai/reason/contenterror.rb +8 -9
  88. data/lib/sisimai/reason/delivered.rb +4 -6
  89. data/lib/sisimai/reason/exceedlimit.rb +10 -12
  90. data/lib/sisimai/reason/expired.rb +6 -8
  91. data/lib/sisimai/reason/feedback.rb +2 -3
  92. data/lib/sisimai/reason/filtered.rb +17 -19
  93. data/lib/sisimai/reason/hasmoved.rb +9 -10
  94. data/lib/sisimai/reason/hostunknown.rb +15 -15
  95. data/lib/sisimai/reason/mailboxfull.rb +10 -12
  96. data/lib/sisimai/reason/mailererror.rb +18 -20
  97. data/lib/sisimai/reason/mesgtoobig.rb +9 -11
  98. data/lib/sisimai/reason/networkerror.rb +5 -8
  99. data/lib/sisimai/reason/norelaying.rb +8 -11
  100. data/lib/sisimai/reason/notaccept.rb +13 -14
  101. data/lib/sisimai/reason/notcompliantrfc.rb +43 -0
  102. data/lib/sisimai/reason/onhold.rb +6 -9
  103. data/lib/sisimai/reason/policyviolation.rb +14 -12
  104. data/lib/sisimai/reason/rejected.rb +26 -24
  105. data/lib/sisimai/reason/requireptr.rb +69 -0
  106. data/lib/sisimai/reason/securityerror.rb +33 -36
  107. data/lib/sisimai/reason/spamdetected.rb +114 -147
  108. data/lib/sisimai/reason/speeding.rb +49 -0
  109. data/lib/sisimai/reason/suspend.rb +11 -11
  110. data/lib/sisimai/reason/syntaxerror.rb +11 -10
  111. data/lib/sisimai/reason/systemerror.rb +7 -9
  112. data/lib/sisimai/reason/systemfull.rb +7 -8
  113. data/lib/sisimai/reason/toomanyconn.rb +9 -11
  114. data/lib/sisimai/reason/undefined.rb +2 -3
  115. data/lib/sisimai/reason/userunknown.rb +129 -146
  116. data/lib/sisimai/reason/vacation.rb +3 -4
  117. data/lib/sisimai/reason/virusdetected.rb +10 -11
  118. data/lib/sisimai/reason.rb +59 -64
  119. data/lib/sisimai/rfc1894.rb +55 -28
  120. data/lib/sisimai/rfc2045.rb +373 -0
  121. data/lib/sisimai/rfc3464.rb +250 -308
  122. data/lib/sisimai/rfc3834.rb +42 -45
  123. data/lib/sisimai/rfc5322.rb +75 -100
  124. data/lib/sisimai/rfc5965.rb +31 -0
  125. data/lib/sisimai/rhost/cox.rb +5 -6
  126. data/lib/sisimai/rhost/franceptt.rb +6 -8
  127. data/lib/sisimai/rhost/godaddy.rb +12 -12
  128. data/lib/sisimai/rhost/google.rb +530 -0
  129. data/lib/sisimai/rhost/iua.rb +9 -10
  130. data/lib/sisimai/rhost/kddi.rb +6 -8
  131. data/lib/sisimai/rhost/{exchangeonline.rb → microsoft.rb} +115 -114
  132. data/lib/sisimai/rhost/mimecast.rb +42 -40
  133. data/lib/sisimai/rhost/nttdocomo.rb +12 -12
  134. data/lib/sisimai/rhost/spectrum.rb +10 -12
  135. data/lib/sisimai/rhost/{tencentqq.rb → tencent.rb} +7 -8
  136. data/lib/sisimai/rhost.rb +23 -31
  137. data/lib/sisimai/smtp/command.rb +59 -0
  138. data/lib/sisimai/smtp/error.rb +4 -7
  139. data/lib/sisimai/smtp/reply.rb +161 -74
  140. data/lib/sisimai/smtp/status.rb +507 -393
  141. data/lib/sisimai/smtp/transcript.rb +124 -0
  142. data/lib/sisimai/smtp.rb +0 -1
  143. data/lib/sisimai/string.rb +74 -5
  144. data/lib/sisimai/time.rb +1 -2
  145. data/lib/sisimai/version.rb +1 -1
  146. data/lib/sisimai.rb +46 -31
  147. data/set-of-emails/maildir/bsd/lhost-domino-02.eml +6 -3
  148. data/set-of-emails/maildir/bsd/lhost-googlegroups-15.eml +174 -0
  149. data/set-of-emails/maildir/bsd/lhost-gsuite-15.eml +229 -0
  150. data/set-of-emails/maildir/bsd/lhost-postfix-75.eml +51 -0
  151. data/set-of-emails/maildir/bsd/lhost-postfix-76.eml +101 -0
  152. data/set-of-emails/maildir/bsd/lhost-postfix-77.eml +74 -0
  153. data/set-of-emails/maildir/bsd/lhost-postfix-78.eml +91 -0
  154. data/set-of-emails/maildir/bsd/lhost-receivingses-08.eml +88 -0
  155. data/set-of-emails/maildir/bsd/rfc3464-43.eml +88 -0
  156. data/set-of-emails/maildir/bsd/rhost-google-03.eml +101 -0
  157. data/set-of-emails/maildir/bsd/rhost-google-04.eml +102 -0
  158. data/set-of-emails/maildir/bsd/rhost-google-05.eml +82 -0
  159. data/set-of-emails/maildir/bsd/rhost-google-06.eml +102 -0
  160. data/set-of-emails/maildir/bsd/rhost-google-07.eml +69 -0
  161. data/set-of-emails/maildir/bsd/rhost-google-08.eml +99 -0
  162. data/sisimai-java.gemspec +1 -1
  163. data/sisimai.gemspec +1 -1
  164. metadata +41 -20
  165. data/.rspec +0 -2
  166. data/lib/sisimai/data/yaml.rb +0 -33
  167. data/lib/sisimai/data.rb +0 -411
  168. data/lib/sisimai/mime.rb +0 -456
  169. data/lib/sisimai/rhost/googleapps.rb +0 -261
  170. /data/set-of-emails/maildir/bsd/{rfc3464-41.eml → rfc3834-05.eml} +0 -0
  171. /data/set-of-emails/maildir/bsd/{rhost-googleapps-01.eml → rhost-google-01.eml} +0 -0
  172. /data/set-of-emails/maildir/bsd/{rhost-googleapps-02.eml → rhost-google-02.eml} +0 -0
  173. /data/set-of-emails/maildir/bsd/{rhost-exchangeonline-01.eml → rhost-microsoft-01.eml} +0 -0
  174. /data/set-of-emails/maildir/bsd/{rhost-exchangeonline-02.eml → rhost-microsoft-02.eml} +0 -0
  175. /data/set-of-emails/maildir/bsd/{rhost-exchangeonline-03.eml → rhost-microsoft-03.eml} +0 -0
  176. /data/set-of-emails/maildir/bsd/{rhost-tencentqq-01.eml → rhost-tencent-01.eml} +0 -0
  177. /data/set-of-emails/maildir/bsd/{rhost-tencentqq-02.eml → rhost-tencent-02.eml} +0 -0
  178. /data/set-of-emails/maildir/bsd/{rhost-tencentqq-03.eml → rhost-tencent-03.eml} +0 -0
data/lib/sisimai/mime.rb DELETED
@@ -1,456 +0,0 @@
1
- module Sisimai
2
- # Sisimai::MIME is MIME Utilities for Sisimai.
3
- module MIME
4
- # Imported from p5-Sisimail/lib/Sisimai/MIME.pm
5
- class << self
6
- require 'base64'
7
- require 'sisimai/string'
8
-
9
- ReE = {
10
- :'7bit-encoded' => %r/^content-transfer-encoding:[ ]*7bit/m,
11
- :'quoted-print' => %r/^content-transfer-encoding:[ ]*quoted-printable/m,
12
- :'some-iso2022' => %r/^content-type:[ ]*.+;[ ]*charset=["']?(iso-2022-[-a-z0-9]+?)['"]?\b/m,
13
- :'another-8bit' => %r/^content-type:[ ]*.+;[ ]*charset=["']?(.+?)['"]?\b/m,
14
- :'with-charset' => %r/^content[-]type:[ ]*.+[;][ ]*charset=['"]?(.+?)['"]?\b/,
15
- :'only-charset' => %r/^[\s\t]+charset=['"]?(.+?)['"]?\b/,
16
- :'html-message' => %r|^content-type:[ ]*text/html;|m,
17
- }.freeze
18
- AlsoAppend = %r{\A(?:text/rfc822-headers|message/)}.freeze
19
- ThisFormat = %r/\A(?:Content-Transfer-Encoding:\s*.+\n)?Content-Type:\s*([^ ;\s]+)/.freeze
20
- LeavesOnly = %r{\A(?>
21
- text/(?:plain|html|rfc822-headers)
22
- |message/(?:x?delivery-status|rfc822|partial|feedback-report)
23
- |multipart/(?:report|alternative|mixed|related|partial)
24
- )
25
- }x.freeze
26
-
27
- # Check that the argument is MIME-Encoded string or not
28
- # @param [String] argvs String to be checked
29
- # @return [True,False] false: Not MIME encoded string
30
- # true: MIME encoded string
31
- def is_mimeencoded(argv1)
32
- return nil unless argv1
33
-
34
- text1 = argv1.delete('"')
35
- mime1 = false
36
- piece = []
37
-
38
- if text1.include?(' ')
39
- # Multiple MIME-Encoded strings in a line
40
- piece = text1.split(' ')
41
- else
42
- piece << text1
43
- end
44
-
45
- while e = piece.shift do
46
- # Check all the string in the array
47
- next unless e =~ /[ \t]*=[?][-_0-9A-Za-z]+[?][BbQq][?].+[?]=?[ \t]*/
48
- mime1 = true
49
- end
50
- return mime1
51
- end
52
-
53
- # Decode MIME-Encoded string
54
- # @param [Array] argvs An array including MIME-Encoded text
55
- # @return [String] MIME-Decoded text
56
- def mimedecode(argvs = [])
57
- characterset = nil
58
- encodingname = nil
59
- decodedtext0 = []
60
-
61
- while e = argvs.shift do
62
- # Check and decode each element
63
- e = e.strip.delete('"')
64
-
65
- if self.is_mimeencoded(e)
66
- # MIME Encoded string like "=?utf-8?B?55m954yr44Gr44KD44KT44GT?="
67
- next unless cv = e.match(/\A(.*)=[?]([-_0-9A-Za-z]+)[?]([BbQq])[?](.+)[?]=?(.*)\z/)
68
-
69
- characterset ||= cv[2]
70
- encodingname ||= cv[3]
71
- mimeencoded0 = cv[4]
72
-
73
- decodedtext0 << cv[1]
74
- decodedtext0 << if encodingname.upcase == 'B'
75
- Base64.decode64(mimeencoded0)
76
- else
77
- mimeencoded0.unpack('M').first
78
- end
79
- decodedtext0[-1].gsub!(/\r\n/, '')
80
- decodedtext0 << cv[5]
81
- else
82
- decodedtext0 << if decodedtext0.empty? then e else ' ' << e end
83
- end
84
- end
85
-
86
- return '' if decodedtext0.empty?
87
- decodedtext1 = decodedtext0.join('')
88
-
89
- if characterset && encodingname
90
- # utf8 => UTF-8
91
- characterset = 'UTF-8' if characterset.casecmp('UTF8') == 0
92
-
93
- unless characterset.casecmp('UTF-8') == 0
94
- # Characterset is not UTF-8
95
- begin
96
- decodedtext1.encode!('UTF-8', characterset)
97
- rescue
98
- decodedtext1 = 'FAILED TO CONVERT THE SUBJECT'
99
- end
100
- end
101
- end
102
-
103
- return decodedtext1.force_encoding('UTF-8').scrub('?')
104
- end
105
-
106
- # Decode MIME Quoted-Printable Encoded string
107
- # @param [String] argv1 MIME Encoded text
108
- # @param [Hash] heads Email header
109
- # @return [String] MIME Decoded text
110
- def qprintd(argv1 = nil, heads = {})
111
- return nil unless argv1
112
- return argv1.unpack('M').first.scrub('?') unless heads['content-type']
113
- return argv1.unpack('M').first.scrub('?') if heads['content-type'].empty?
114
-
115
- # Quoted-printable encoded part is the part of the text
116
- boundary00 = Sisimai::MIME.boundary(heads['content-type'], 0)
117
-
118
- # Decoded using unpack('M') entire body string when the boundary string
119
- # or "Content-Transfer-Encoding: quoted-printable" are not included in
120
- # the message body.
121
- return argv1.unpack('M').first.scrub('?') if boundary00.empty?
122
- return argv1.unpack('M').first.scrub('?') unless argv1.downcase =~ ReE[:'quoted-print']
123
-
124
- boundary01 = Sisimai::MIME.boundary(heads['content-type'], 1)
125
- bodystring = ''
126
- notdecoded = ''
127
-
128
- encodename = nil
129
- ctencoding = nil
130
- mimeinside = false
131
- textslices = argv1.split("\n")
132
-
133
- while e = textslices.shift do
134
- # This is a multi-part message in MIME format. Your mail reader does not
135
- # understand MIME message format.
136
- # --=_gy7C4Gpes0RP4V5Bs9cK4o2Us2ZT57b-3OLnRN+4klS8dTmQ
137
- # Content-Type: text/plain; charset=iso-8859-15
138
- # Content-Transfer-Encoding: quoted-printable
139
- if mimeinside
140
- # Quoted-Printable encoded text block
141
- if e == boundary00
142
- # The next boundary string has appeared
143
- # --=_gy7C4Gpes0RP4V5Bs9cK4o2Us2ZT57b-3OLnRN+4klS8dTmQ
144
- hasdecoded = Sisimai::String.to_utf8(notdecoded.unpack('M').first, encodename)
145
- bodystring << hasdecoded << e + "\n"
146
-
147
- notdecoded = ''
148
- mimeinside = false
149
- ctencoding = false
150
- encodename = nil
151
- else
152
- # Inside of Quoted-Printable encoded text
153
- if e.size > 76
154
- # Invalid line exists in "quoted-printable" part
155
- e = [e].pack('M').chomp
156
- else
157
- # A bounce message generated by Office365(Outlook) include lines
158
- # which are not proper as Quoted-Printable:
159
- # - `=` is not encoded
160
- # - Longer than 76 charaters a line
161
- #
162
- # Content-Transfer-Encoding: quoted-printable
163
- # X-Microsoft-Exchange-Diagnostics:
164
- # 1;SLXP216MB0381;27:IdH7U/WHGgJu6J...LiiA8rYgU/E7SQ==
165
- # ...
166
- mustencode = true
167
- while true do
168
- break if e.end_with?(' ', "\t")
169
- break if e.split('').any? { |c| c.ord < 32 || c.ord > 126 }
170
- if e.end_with?('=')
171
- # Padding character of Base64 or not
172
- break if e =~ /[\+\/0-9A-Za-z]{32,}[=]+\z/
173
- else
174
- # Including "=" not as "=3D"
175
- break if e.include?('=') && ! e.upcase.include?('=3D')
176
- end
177
- mustencode = false
178
- break
179
- end
180
- e = [e].pack('M').chomp if mustencode
181
- mustencode = false
182
- end
183
- notdecoded << e + "\n"
184
- end
185
- else
186
- # NOT Quoted-Printable encoded text block
187
- lowercased = e.downcase
188
- if e =~ /\A[-]{2}[^\s]+[^-]\z/
189
- # Start of the boundary block
190
- # --=_gy7C4Gpes0RP4V5Bs9cK4o2Us2ZT57b-3OLnRN+4klS8dTmQ
191
- unless e == boundary00
192
- # New boundary string has appeared
193
- boundary00 = e
194
- boundary01 = e + '--'
195
- end
196
- elsif cv = lowercased.match(ReE[:'with-charset']) || lowercased.match(ReE[:'only-charset'])
197
- # Content-Type: text/plain; charset=ISO-2022-JP
198
- encodename = cv[1]
199
- mimeinside = true if ctencoding
200
-
201
- elsif lowercased =~ ReE[:'quoted-print']
202
- # Content-Transfer-Encoding: quoted-printable
203
- ctencoding = true
204
- mimeinside = true if encodename
205
-
206
- elsif e == boundary01
207
- # The end of boundary block
208
- # --=_gy7C4Gpes0RP4V5Bs9cK4o2Us2ZT57b-3OLnRN+4klS8dTmQ--
209
- mimeinside = false
210
- end
211
-
212
- bodystring << e + "\n"
213
- end
214
- end
215
-
216
- bodystring << notdecoded unless notdecoded.empty?
217
- return bodystring.scrub('?')
218
- end
219
-
220
- # Decode MIME BASE64 Encoded string
221
- # @param [String] argv1 MIME Encoded text
222
- # @return [String] MIME-Decoded text
223
- def base64d(argv1)
224
- return nil unless argv1
225
-
226
- plain = nil
227
- if cv = argv1.match(%r|([+/\=0-9A-Za-z\r\n]+)|) then plain = Base64.decode64(cv[1]) end
228
- return plain.scrub('?')
229
- end
230
-
231
- # Get boundary string
232
- # @param [String] argv1 The value of Content-Type header
233
- # @param [Integer] start -1: boundary string itself
234
- # 0: Start of boundary
235
- # 1: End of boundary
236
- # @return [String] Boundary string
237
- def boundary(argv1 = nil, start = -1)
238
- return nil unless argv1
239
- value = ''
240
-
241
- if cv = argv1.match(/\bboundary=([^ ]+)/i)
242
- # Content-Type: multipart/mixed; boundary=Apple-Mail-5--931376066
243
- # Content-Type: multipart/report; report-type=delivery-status;
244
- # boundary="n6H9lKZh014511.1247824040/mx.example.jp"
245
- value = cv[1]
246
- value = value.split(/\n/, 2).shift if value =~ /\n/
247
- value.delete!(%q|'";\\|)
248
- value = '--' + value if start > -1
249
- value = value + '--' if start > 0
250
- end
251
-
252
- return value
253
- end
254
-
255
- # Breaks up each multipart/* block
256
- # @param [String] argv0 Text block of multipart/*
257
- # @param [String] argv1 MIME type of the outside part
258
- # @return [String] Decoded part as a plain text(text part only)
259
- def breaksup(argv0 = nil, argv1 = '')
260
- return nil unless argv0
261
-
262
- hasflatten = '' # Message body including only text/plain and message/*
263
- mimeformat = '' # MIME type string of this part
264
- alternates = argv1.start_with?('multipart/alternative') ? true : false
265
-
266
- # Get MIME type string from Content-Type: "..." field at the first line
267
- # or the second line of the part.
268
- if cv = argv0.match(ThisFormat) then mimeformat = cv[1].downcase end
269
-
270
- # Sisimai require only MIME types defined in LeavesOnly variable
271
- return '' unless mimeformat =~ LeavesOnly
272
- return '' if alternates && mimeformat == 'text/html'
273
-
274
- (upperchunk, lowerchunk) = argv0.split(/^$/, 2)
275
- upperchunk.tr("\n", ' ').squeeze!(' ')
276
-
277
- # Content-Description: Undelivered Message
278
- # Content-Type: message/rfc822
279
- # <EOM>
280
- lowerchunk ||= ''
281
-
282
- if mimeformat.start_with?('multipart/')
283
- # Content-Type: multipart/*
284
- mpboundary = Regexp.new(Regexp.escape(Sisimai::MIME.boundary(upperchunk, 0)) << "\n")
285
- innerparts = lowerchunk.split(mpboundary)
286
- innerparts.shift if innerparts[0].empty? || innerparts[0] == "\n"
287
-
288
- while e = innerparts.shift do
289
- # Find internal multipart/* blocks and decode
290
- if cv = e.match(ThisFormat)
291
- # Found "Content-Type" field at the first or second line of this
292
- # splitted part
293
- nextformat = cv[1].downcase
294
- next unless nextformat =~ LeavesOnly
295
- next if nextformat == 'text/html'
296
- hasflatten << Sisimai::MIME.breaksup(e, mimeformat)
297
- else
298
- # The content of this part is almost '--': a part of boundary
299
- # string which is used for splitting multipart/* blocks.
300
- hasflatten << "\n"
301
- end
302
- end
303
- else
304
- # Is not "Content-Type: multipart/*"
305
- if cv = upperchunk.match(/Content-Transfer-Encoding: ([^\s;]+)/)
306
- # Content-Transfer-Encoding: quoted-printable|base64|7bit|...
307
- ctencoding = cv[1].downcase
308
- getdecoded = ''
309
-
310
- if ctencoding == 'quoted-printable'
311
- # Content-Transfer-Encoding: quoted-printable
312
- getdecoded = Sisimai::MIME.qprintd(lowerchunk)
313
-
314
- elsif ctencoding == 'base64'
315
- # Content-Transfer-Encoding: base64
316
- getdecoded = Sisimai::MIME.base64d(lowerchunk)
317
-
318
- elsif ctencoding == '7bit'
319
- # Content-Transfer-Encoding: 7bit
320
- if cv = upperchunk.downcase.match(ReE[:'some-iso2022'])
321
- # Content-Type: text/plain; charset=ISO-2022-JP
322
- getdecoded = Sisimai::String.to_utf8(lowerchunk, cv[1])
323
- else
324
- # No "charset" parameter in Content-Type field
325
- getdecoded = lowerchunk
326
- end
327
- else
328
- # Content-Transfer-Encoding: 8bit, binary, and so on
329
- getdecoded = lowerchunk
330
- end
331
- getdecoded.gsub!(/\r\n/, "\n") if getdecoded.include?("\r\n") # Convert CRLF to LF
332
-
333
- if mimeformat =~ AlsoAppend
334
- # Append field when the value of Content-Type: begins with
335
- # message/ or equals text/rfc822-headers.
336
- upperchunk.sub!(/Content-Transfer-Encoding:\s*[^\s]+./, '').strip!
337
- hasflatten << upperchunk
338
-
339
- elsif mimeformat == 'text/html'
340
- # Delete HTML tags inside of text/html part whenever possible
341
- getdecoded.gsub!(/[<][^@ ]+?[>]/, '')
342
- end
343
-
344
- unless getdecoded.empty?
345
- # The string will be encoded to UTF-8 forcely and call String#scrub
346
- # method to avoid the following errors:
347
- # - incompatible character encodings: ASCII-8BIT and UTF-8
348
- # - invalid byte sequence in UTF-8
349
- unless getdecoded.encoding.to_s == 'UTF-8'
350
- if cv = upperchunk.downcase.match(ReE[:'another-8bit'])
351
- # ISO-8859-1, GB2312, and so on
352
- getdecoded = Sisimai::String.to_utf8(getdecoded, cv[1])
353
- end
354
- end
355
- # A part which has no "charset" parameter causes an ArgumentError:
356
- # invalid byte sequence in UTF-8 so String#scrub should be called
357
- hasflatten << getdecoded.scrub!('?') << "\n\n"
358
- end
359
- else
360
- # Content-Type: text/plain OR text/rfc822-headers OR message/*
361
- if mimeformat.start_with?('message/') || mimeformat == 'text/rfc822-headers'
362
- # Append headers of multipart/* when the value of "Content-Type"
363
- # is inlucded in the following MIME types:
364
- # - message/delivery-status
365
- # - message/rfc822
366
- # - text/rfc822-headers
367
- hasflatten << upperchunk
368
- end
369
- lowerchunk.sub!(/^--\z/m, '')
370
- lowerchunk << "\n" unless lowerchunk =~ /\n\z/
371
- hasflatten << lowerchunk
372
- end
373
- end
374
-
375
- return hasflatten
376
- end
377
-
378
- # MIME decode entire message body
379
- # @param [String] argv0 Content-Type header
380
- # @param [String] argv1 Entire message body
381
- # @return [String] Decoded message body
382
- def makeflat(argv0 = nil, argv1 = nil)
383
- return nil unless argv0
384
- return nil unless argv1
385
-
386
- ehboundary = Sisimai::MIME.boundary(argv0, 0)
387
- mimeformat = ''
388
- bodystring = ''
389
-
390
- # Get MIME type string from an email header given as the 1st argument
391
- if cv = argv0.match(%r|\A([0-9a-z]+/[^ ;]+)|) then mimeformat = cv[1] end
392
-
393
- return '' unless mimeformat.include?('multipart/')
394
- return '' if ehboundary.empty?
395
-
396
- # Some bounce messages include lower-cased "content-type:" field such as
397
- # content-type: message/delivery-status
398
- # content-transfer-encoding: quoted-printable
399
- argv1.gsub!(/[Cc]ontent-[Tt]ype:/, 'Content-Type:')
400
- argv1.gsub!(/[Cc]ontent-[Tt]ransfer-[Ee]ncodeing:/, 'Content-Transfer-Encoding:')
401
-
402
- # 1. Some bounce messages include upper-cased "Content-Transfer-Encoding",
403
- # and "Content-Type" value such as
404
- # - Content-Type: multipart/RELATED;
405
- # - Content-Transfer-Encoding: 7BIT
406
- # 2. Unused fields inside of mutipart/* block should be removed
407
- argv1.gsub!(/(Content-[A-Za-z-]+?):[ ]*([^\s]+)/) do "#{$1}: #{$2.downcase}" end
408
- argv1.gsub!(/^Content-(?:Description|Disposition):.+?\n$/, '')
409
-
410
- multiparts = argv1.split(Regexp.new(Regexp.escape(ehboundary) << "\n?"))
411
- multiparts.shift if multiparts[0].empty?
412
- while e = multiparts.shift do
413
- # Find internal multipart blocks and decode
414
- catch :XCCT do
415
- while true
416
- # Remove fields except Content-Type, Content-Transfer-Encoding in
417
- # each part such as the following:
418
- # Date: Thu, 29 Apr 2018 22:22:22 +0900
419
- # MIME-Version: 1.0
420
- # Message-ID: ...
421
- # Content-Transfer-Encoding: quoted-printable
422
- # Content-Type: text/plain; charset=us-ascii
423
- #
424
- # Fields before "Content-Type:" in each part should have been removed
425
- # and "Content-Type:" should be exist at the first line of each part.
426
- # The field works as a delimiter to decode contents of each part.
427
- #
428
- throw :XCCT if e =~ /\AContent-T[ry]/ # The first field is "Content-Type:"
429
- if cv = e.match(/\A(.+?)Content-Type:/m) # Capture lines before "Content-Type:"
430
- throw :XCCT if cv[1] =~ /\n\n/ # There is no field before "Content-Type:"
431
- e.sub!(/\A.+?(Content-T[ry].+)\z/m, '\1') # Remove fields before "Content-Type:"
432
- end
433
- throw :XCCT
434
- end
435
- end
436
-
437
- if e =~ /\A(?:Content-[A-Za-z-]+:.+?\r\n)?Content-Type:[ ]*[^\s]+/
438
- # Content-Type: multipart/*
439
- bodystring << Sisimai::MIME.breaksup(e, mimeformat)
440
- else
441
- # Is not multipart/* block
442
- e.sub!(%r|^Content-Transfer-Encoding:.+?\n|mi, '')
443
- e.sub!(%r|^Content-Type:\s*text/plain.+?\n|mi, '')
444
- bodystring << e
445
- end
446
- end
447
- bodystring.gsub!(%r{^(Content-Type:\s*message/(?:rfc822|delivery-status)).+$}, '\1')
448
- bodystring.gsub!(/^\n{2,}/, "\n")
449
-
450
- return bodystring
451
- end
452
-
453
- end
454
- end
455
- end
456
-