rims-rfc822 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51ec6ae41467b2b68bd8d1bb23916ad42f27fb6b688f8769f64e61db5e8afd98
4
- data.tar.gz: f48b88893f67da1da2824336bfe274b033131527420de1926bea05faa7036618
3
+ metadata.gz: '0825e7fac6f422946c68301c42abcce200e31d32a96ed83d6b1879a5a2b7c6cf'
4
+ data.tar.gz: '091affd6b8394475774ea160b56eb870f01785f72c1b737ea41fe4a5c6698d82'
5
5
  SHA512:
6
- metadata.gz: 77e6b92ea9e32a7a94972d493103e03face8eaf6872400a324242431b3c8ecd5211bf0bd4c77959c47e634d820bd26089f250cd49a5e54ccfdf496cbbdaf7e08
7
- data.tar.gz: a94818fedb229ad9df8683d027aa9c0cacf57a9b5296ae279794d1132e310b0a93a7158bc2c25f72279117ce29c90090b945a2e79f552fab07b1b3a1467dedb2
6
+ metadata.gz: 210d959c67edec519382dfcb24e7aa1069a93e8da0b001badff3c8e643b2aaf83c5a0f84e54f0354fe1740e86b4a2467f5c5f503420fd6e2b2e89b15c0585dc7
7
+ data.tar.gz: ad09d49a4f5caab505893a90e5c309d3d2878178aa73dcec2784fbbf38ad24ac5028710c3f62ece160bc7ccbbbdbf1969fe70c38ae13d1a322f0a99cad5e1cd3
data/README.md CHANGED
@@ -33,6 +33,15 @@ msg = RIMS::RFC822::Message.new(your_rfc822_text)
33
33
  p msg.header
34
34
  p msg.body
35
35
 
36
+ # header fields
37
+ p msg.header[name]
38
+ p msg.header.fetch_upcase(name)
39
+ p msg.header.field_value_list(name)
40
+ p msg.header.key? name
41
+ msg.header.each do |name, value|
42
+ p [ name, value ]
43
+ end
44
+
36
45
  # source text attributes
37
46
  p msg.raw_source
38
47
  p msg.header.raw_source
@@ -52,7 +61,7 @@ p msg.content_disposition_parameter(name)
52
61
  p msg.content_disposition_parameter_list
53
62
  p msg.content_language
54
63
 
55
- # headear attributes
64
+ # header attributes
56
65
  p msg.date
57
66
  p msg.from
58
67
  p msg.sender
@@ -61,12 +70,23 @@ p msg.to
61
70
  p msg.cc
62
71
  p msg.bcc
63
72
 
64
- # content attributes
73
+ # body structure attributes
65
74
  p msg.text?
66
75
  p msg.multipart?
67
76
  p msg.message?
68
77
  p msg.parts
69
78
  p msg.message
79
+
80
+ # MIME header and body attributes
81
+ p msg.mime_decoded_header(name)
82
+ p msg.mime_decoded_header(name, decode_charset)
83
+ p msg.mime_decoded_header_field_value_list(name)
84
+ p msg.mime_decoded_header_field_value_list(name, decode_charset)
85
+ p msg.mime_decoded_header_text
86
+ p msg.mime_decoded_header_text(decode_charset)
87
+ p msg.mime_charset_body_text
88
+ p msg.mime_charset_body_text(charset)
89
+ p msg.mime_binary_body_string
70
90
  ```
71
91
 
72
92
  Contributing
@@ -1,256 +1,452 @@
1
1
  # -*- coding: utf-8; frozen_string_literal: true -*-
2
2
 
3
+ require 'forwardable'
3
4
  require 'rims/rfc822/version'
4
5
  require 'time'
5
6
 
6
7
  module RIMS
7
8
  module RFC822
8
- def split_message(msg_txt)
9
- header_txt, body_txt = msg_txt.lstrip.split(/\r?\n\r?\n/, 2)
10
- if ($&) then
11
- header_txt << $& if $&
12
- else
13
- body_txt = header_txt
14
- header_txt = nil
15
- end
16
-
17
- [ header_txt.freeze, body_txt.freeze ].freeze
18
- end
19
- module_function :split_message
9
+ module Parse
10
+ def split_message(msg_txt)
11
+ header_txt, body_txt = msg_txt.lstrip.split(/\r?\n\r?\n/, 2)
12
+ if ($&) then
13
+ header_txt << $& if $&
14
+ else
15
+ body_txt = header_txt
16
+ header_txt = nil
17
+ end
20
18
 
21
- def parse_header(header_txt)
22
- field_pair_list = header_txt.scan(%r{
23
- ^
24
- ((?#name) \S+? )
25
- \s* : \s*
26
- (
27
- (?#value)
28
- .*? (?: \n|\z)
29
- (?: ^\s .*? (?: \n|\z) )*
30
- )
31
- }x)
32
-
33
- for name, value in field_pair_list
34
- value.strip!
35
- name.freeze
36
- value.freeze
37
- end
38
-
39
- field_pair_list.freeze
40
- end
41
- module_function :parse_header
19
+ [ header_txt.freeze, body_txt.freeze ].freeze
20
+ end
21
+ module_function :split_message
22
+
23
+ def parse_header(header_txt)
24
+ field_pair_list = header_txt.scan(%r{
25
+ ^
26
+ ((?#name) \S+? )
27
+ \s* : \s*
28
+ (
29
+ (?#value)
30
+ .*? (?: \n|\z)
31
+ (?: ^\s .*? (?: \n|\z) )*
32
+ )
33
+ }x)
34
+
35
+ for name, value in field_pair_list
36
+ value.strip!
37
+ name.freeze
38
+ value.freeze
39
+ end
42
40
 
43
- def unquote_phrase(phrase_txt)
44
- state = :raw
45
- src_txt = phrase_txt.dup
46
- dst_txt = ''.encode(phrase_txt.encoding)
47
-
48
- while (src_txt.sub!(/\A (?: " | \( | \) | \\ | [^"\(\)\\]+ )/x, ''))
49
- match_txt = $&
50
- case (state)
51
- when :raw
52
- case (match_txt)
53
- when '"'
54
- state = :quote
55
- when '('
56
- state = :comment
57
- when "\\"
58
- src_txt.sub!(/\A./, '') and dst_txt << $&
41
+ field_pair_list.freeze
42
+ end
43
+ module_function :parse_header
44
+
45
+ def unquote_phrase(phrase_txt)
46
+ state = :raw
47
+ src_txt = phrase_txt.dup
48
+ dst_txt = ''.encode(phrase_txt.encoding)
49
+
50
+ while (src_txt.sub!(/\A (?: " | \( | \) | \\ | [^"\(\)\\]+ )/x, ''))
51
+ match_txt = $&
52
+ case (state)
53
+ when :raw
54
+ case (match_txt)
55
+ when '"'
56
+ state = :quote
57
+ when '('
58
+ state = :comment
59
+ when "\\"
60
+ src_txt.sub!(/\A./, '') and dst_txt << $&
61
+ else
62
+ dst_txt << match_txt
63
+ end
64
+ when :quote
65
+ case (match_txt)
66
+ when '"'
67
+ state = :raw
68
+ when "\\"
69
+ src_txt.sub!(/\A./, '') && dst_txt << $&
70
+ else
71
+ dst_txt << match_txt
72
+ end
73
+ when :comment
74
+ case (match_txt)
75
+ when ')'
76
+ state = :raw
77
+ when "\\"
78
+ src_txt.sub!(/\A./, '')
79
+ else
80
+ # ignore comment text.
81
+ end
59
82
  else
60
- dst_txt << match_txt
83
+ raise "internal error - unknown state: #{state}"
61
84
  end
62
- when :quote
63
- case (match_txt)
64
- when '"'
65
- state = :raw
66
- when "\\"
67
- src_txt.sub!(/\A./, '') && dst_txt << $&
85
+ end
86
+
87
+ dst_txt.freeze
88
+ end
89
+ module_function :unquote_phrase
90
+
91
+ def parse_parameters(parameters_txt)
92
+ params = {}
93
+ parameters_txt.scan(%r'(?<name>\S+?) \s* = \s* (?: (?<quoted_string>".*?") | (?<token>\S+?) ) \s* (?:;|\Z)'x) do
94
+ name = $~[:name]
95
+ if ($~[:quoted_string]) then
96
+ quoted_value = $~[:quoted_string]
97
+ value = unquote_phrase(quoted_value)
68
98
  else
69
- dst_txt << match_txt
99
+ value = $~[:token]
70
100
  end
71
- when :comment
72
- case (match_txt)
73
- when ')'
74
- state = :raw
75
- when "\\"
76
- src_txt.sub!(/\A./, '')
101
+ params[name.downcase.freeze] = [ name.freeze, value.freeze ].freeze
102
+ end
103
+
104
+ params.freeze
105
+ end
106
+ module_function :parse_parameters
107
+
108
+ def split_parameters(type_params_txt)
109
+ type, params_txt = type_params_txt.split(';', 2)
110
+ if (type) then
111
+ type.strip!
112
+ type.freeze
113
+ if (params_txt) then
114
+ params = parse_parameters(params_txt)
77
115
  else
78
- # ignore comment text.
116
+ params = {}.freeze
79
117
  end
118
+ [ type, params ].freeze
80
119
  else
81
- raise "internal error: unknown state #{state}"
120
+ [ nil, {}.freeze ].freeze
82
121
  end
83
122
  end
123
+ module_function :split_parameters
124
+
125
+ def parse_content_type(type_txt)
126
+ media_type_txt, params = split_parameters(type_txt)
127
+ if (media_type_txt) then
128
+ main_type, sub_type = media_type_txt.split('/', 2)
129
+ if (main_type) then
130
+ main_type.strip!
131
+ main_type.freeze
132
+ if (sub_type) then
133
+ sub_type.strip!
134
+ sub_type.freeze
135
+ if (! main_type.empty? && ! sub_type.empty?) then
136
+ return [ main_type, sub_type, params ].freeze
137
+ end
138
+ end
139
+ end
140
+ end
84
141
 
85
- dst_txt.freeze
86
- end
87
- module_function :unquote_phrase
142
+ [ 'application'.dup.force_encoding(type_txt.encoding).freeze,
143
+ 'octet-stream'.dup.force_encoding(type_txt.encoding).freeze,
144
+ params
145
+ ].freeze
146
+ end
147
+ module_function :parse_content_type
88
148
 
89
- def parse_parameters(parameters_txt)
90
- params = {}
91
- parameters_txt.scan(%r'(?<name>\S+?) \s* = \s* (?: (?<quoted_string>".*?") | (?<token>\S+?) ) \s* (?:;|\Z)'x) do
92
- name = $~[:name]
93
- if ($~[:quoted_string]) then
94
- quoted_value = $~[:quoted_string]
95
- value = unquote_phrase(quoted_value)
96
- else
97
- value = $~[:token]
98
- end
99
- params[name.downcase.freeze] = [ name.freeze, value.freeze ].freeze
149
+ def parse_content_disposition(disposition_txt)
150
+ split_parameters(disposition_txt)
100
151
  end
152
+ module_function :parse_content_disposition
101
153
 
102
- params.freeze
103
- end
104
- module_function :parse_parameters
154
+ def parse_content_language(language_tags_txt)
155
+ tag_list = language_tags_txt.split(',')
156
+ for tag in tag_list
157
+ tag.strip!
158
+ tag.freeze
159
+ end
160
+ tag_list.reject!(&:empty?)
161
+
162
+ tag_list.freeze
163
+ end
164
+ module_function :parse_content_language
165
+
166
+ def parse_multipart_body(boundary, body_txt)
167
+ delim = '--' + boundary
168
+ term = delim + '--'
169
+ body_txt2, _body_epilogue_txt = body_txt.split(term, 2)
170
+ if (body_txt2) then
171
+ _body_preamble_txt, body_parts_txt = body_txt2.split(delim, 2)
172
+ if (body_parts_txt) then
173
+ part_list = body_parts_txt.split(delim, -1)
174
+ for part_txt in part_list
175
+ part_txt.lstrip!
176
+ part_txt.chomp!("\n")
177
+ part_txt.chomp!("\r")
178
+ part_txt.freeze
179
+ end
180
+ return part_list.freeze
181
+ end
182
+ end
105
183
 
106
- def split_parameters(type_params_txt)
107
- type, params_txt = type_params_txt.split(';', 2)
108
- if (type) then
109
- type.strip!
110
- type.freeze
111
- if (params_txt) then
112
- params = parse_parameters(params_txt)
113
- else
114
- params = {}.freeze
184
+ [].freeze
185
+ end
186
+ module_function :parse_multipart_body
187
+
188
+ Address = Struct.new(:display_name, :route, :local_part, :domain)
189
+ class Address
190
+ # compatible for Net::MAP::Address
191
+ alias name display_name
192
+ alias mailbox local_part
193
+ alias host domain
194
+ end
195
+
196
+ def parse_mail_address_list(address_list_txt)
197
+ addr_list = []
198
+ src_txt = address_list_txt.dup
199
+
200
+ while (true)
201
+ if (src_txt.sub!(%r{
202
+ \A
203
+ \s*
204
+ (?<display_name>\S.*?) \s* : (?<group_list>.*?) ;
205
+ \s*
206
+ ,?
207
+ }x, ''))
208
+ then
209
+ display_name = $~[:display_name]
210
+ group_list = $~[:group_list]
211
+ addr_list << Address.new( nil, nil, unquote_phrase(display_name), nil).freeze
212
+ addr_list.concat(parse_mail_address_list(group_list))
213
+ addr_list << Address.new(nil, nil, nil, nil).freeze
214
+ elsif (src_txt.sub!(%r{
215
+ \A
216
+ \s*
217
+ (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
218
+ \s*
219
+ ,?
220
+ }x, ''))
221
+ then
222
+ addr_list << Address.new(nil, nil, $~[:local_part].freeze, $~[:domain].freeze).freeze
223
+ elsif (src_txt.sub!(%r{
224
+ \A
225
+ \s*
226
+ (?<display_name>\S.*?)
227
+ \s*
228
+ <
229
+ \s*
230
+ (?:
231
+ (?<route>@[^<>@",]* (?:, \s* @[^<>@",]*)*)
232
+ \s*
233
+ :
234
+ )?
235
+ \s*
236
+ (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
237
+ \s*
238
+ >
239
+ \s*
240
+ ,?
241
+ }x, ''))
242
+ then
243
+ display_name = $~[:display_name]
244
+ route = $~[:route]
245
+ local_part = $~[:local_part]
246
+ domain = $~[:domain]
247
+ addr_list << Address.new(unquote_phrase(display_name), route.freeze, local_part.freeze, domain.freeze).freeze
248
+ else
249
+ break
250
+ end
115
251
  end
116
- [ type, params ].freeze
117
- else
118
- [ nil, {}.freeze ].freeze
252
+
253
+ addr_list.freeze
119
254
  end
255
+ module_function :parse_mail_address_list
120
256
  end
257
+
258
+ # for backward compatibility
259
+ include Parse
260
+ module_function :split_message
261
+ module_function :parse_header
262
+ module_function :unquote_phrase
263
+ module_function :parse_parameters
121
264
  module_function :split_parameters
265
+ module_function :parse_content_type
266
+ module_function :parse_content_disposition
267
+ module_function :parse_content_language
268
+ module_function :parse_multipart_body
269
+ module_function :parse_mail_address_list
122
270
 
123
- def parse_content_type(type_txt)
124
- media_type_txt, params = split_parameters(type_txt)
125
- if (media_type_txt) then
126
- main_type, sub_type = media_type_txt.split('/', 2)
127
- if (main_type) then
128
- main_type.strip!
129
- main_type.freeze
130
- if (sub_type) then
131
- sub_type.strip!
132
- sub_type.freeze
133
- if (! main_type.empty? && ! sub_type.empty?) then
134
- return [ main_type, sub_type, params ].freeze
135
- end
136
- end
271
+ class CharsetAliases
272
+ def initialize
273
+ @alias_table = {}
274
+ end
275
+
276
+ # API methods
277
+
278
+ def [](name)
279
+ @alias_table[name.upcase]
280
+ end
281
+
282
+ def add_alias(name, encoding)
283
+ @alias_table[name.upcase] = encoding
284
+ self
285
+ end
286
+
287
+ def delete_alias(name)
288
+ @alias_table.delete(name.upcase)
289
+ end
290
+
291
+ # minimal methods like `Hash'
292
+
293
+ extend Forwardable
294
+ include Enumerable
295
+
296
+ def_delegators :@alias_table, :empty?, :size, :keys
297
+ alias length size
298
+
299
+ def key?(name)
300
+ @alias_table.key? name.upcase
301
+ end
302
+
303
+ alias has_key? key?
304
+ alias include? key?
305
+ alias member? key?
306
+
307
+ def each_key
308
+ return enum_for(:each_key) unless block_given?
309
+ @alias_table.each_key do |name|
310
+ yield(name)
137
311
  end
312
+ self
138
313
  end
139
314
 
140
- [ 'application'.dup.force_encoding(type_txt.encoding).freeze,
141
- 'octet-stream'.dup.force_encoding(type_txt.encoding).freeze,
142
- params
143
- ].freeze
144
- end
145
- module_function :parse_content_type
315
+ def each_pair
316
+ return enum_for(:each_pair) unless block_given?
317
+ @alias_table.each_pair do |name, encoding|
318
+ yield(name, encoding)
319
+ end
320
+ self
321
+ end
146
322
 
147
- def parse_content_disposition(disposition_txt)
148
- split_parameters(disposition_txt)
323
+ alias each each_pair
149
324
  end
150
- module_function :parse_content_disposition
151
325
 
152
- def parse_content_language(language_tags_txt)
153
- tag_list = language_tags_txt.split(',')
154
- for tag in tag_list
155
- tag.strip!
156
- tag.freeze
326
+ DEFAULT_CHARSET_ALIASES = CharsetAliases.new
327
+ #DEFAULT_CHARSET_ALIASES.add_alias('euc-jp', Encoding::CP51932)
328
+ DEFAULT_CHARSET_ALIASES.add_alias('euc-jp', Encoding::EUCJP_MS)
329
+ #DEFAULT_CHARSET_ALIASES.add_alias('iso-2022-jp', Encoding::CP50220)
330
+ DEFAULT_CHARSET_ALIASES.add_alias('iso-2022-jp', Encoding::CP50221)
331
+ DEFAULT_CHARSET_ALIASES.add_alias('shift_jis', Encoding::WINDOWS_31J)
332
+
333
+ module CharsetText
334
+ def self.find_string_encoding(name)
335
+ begin
336
+ Encoding.find(name)
337
+ rescue ArgumentError
338
+ raise EncodingError.new($!.to_s)
339
+ end
157
340
  end
158
- tag_list.reject!(&:empty?)
159
341
 
160
- tag_list.freeze
161
- end
162
- module_function :parse_content_language
342
+ def get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES)
343
+ case (transfer_encoding&.upcase)
344
+ when 'BASE64'
345
+ text = binary_string.unpack1('m')
346
+ when 'QUOTED-PRINTABLE'
347
+ text = binary_string.unpack1('M')
348
+ else
349
+ text = binary_string.dup
350
+ end
163
351
 
164
- def parse_multipart_body(boundary, body_txt)
165
- delim = '--' + boundary
166
- term = delim + '--'
167
- body_txt2, _body_epilogue_txt = body_txt.split(term, 2)
168
- if (body_txt2) then
169
- _body_preamble_txt, body_parts_txt = body_txt2.split(delim, 2)
170
- if (body_parts_txt) then
171
- part_list = body_parts_txt.split(delim, -1)
172
- for part_txt in part_list
173
- part_txt.lstrip!
174
- part_txt.chomp!("\n")
175
- part_txt.chomp!("\r")
176
- part_txt.freeze
352
+ if (charset) then
353
+ if (charset.is_a? Encoding) then
354
+ enc = charset
355
+ else
356
+ enc = charset_aliases[charset] ||
357
+ CharsetText.find_string_encoding(charset) # raise `EncodingError' when wrong charset due to document
177
358
  end
178
- return part_list.freeze
359
+ text.force_encoding(enc)
360
+ text.valid_encoding? or raise EncodingError, "invalid encoding - #{enc}"
179
361
  end
362
+
363
+ text.freeze
180
364
  end
365
+ module_function :get_mime_charset_text
181
366
 
182
- [].freeze
183
- end
184
- module_function :parse_multipart_body
367
+ ENCODED_WORD_TRANSFER_ENCODING_TABLE = { # :nodoc:
368
+ 'B' => 'BASE64',
369
+ 'Q' => 'QUOTED-PRINTABLE'
370
+ }.freeze
185
371
 
186
- Address = Struct.new(:display_name, :route, :local_part, :domain)
187
- class Address
188
- # compatible for Net::MAP::Address
189
- alias name display_name
190
- alias mailbox local_part
191
- alias host domain
192
- end
372
+ def decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {})
373
+ src = encoded_string
374
+ dst = ''.dup
193
375
 
194
- def parse_mail_address_list(address_list_txt)
195
- addr_list = []
196
- src_txt = address_list_txt.dup
197
-
198
- while (true)
199
- if (src_txt.sub!(%r{
200
- \A
201
- \s*
202
- (?<display_name>\S.*?) \s* : (?<group_list>.*?) ;
203
- \s*
204
- ,?
205
- }x, ''))
206
- then
207
- display_name = $~[:display_name]
208
- group_list = $~[:group_list]
209
- addr_list << Address.new( nil, nil, unquote_phrase(display_name), nil).freeze
210
- addr_list.concat(parse_mail_address_list(group_list))
211
- addr_list << Address.new(nil, nil, nil, nil).freeze
212
- elsif (src_txt.sub!(%r{
213
- \A
214
- \s*
215
- (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
216
- \s*
217
- ,?
218
- }x, ''))
219
- then
220
- addr_list << Address.new(nil, nil, $~[:local_part].freeze, $~[:domain].freeze).freeze
221
- elsif (src_txt.sub!(%r{
222
- \A
223
- \s*
224
- (?<display_name>\S.*?)
225
- \s*
226
- <
227
- \s*
228
- (?:
229
- (?<route>@[^<>@",]* (?:, \s* @[^<>@",]*)*)
230
- \s*
231
- :
232
- )?
233
- \s*
234
- (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
235
- \s*
236
- >
237
- \s*
238
- ,?
239
- }x, ''))
240
- then
241
- display_name = $~[:display_name]
242
- route = $~[:route]
243
- local_part = $~[:local_part]
244
- domain = $~[:domain]
245
- addr_list << Address.new(unquote_phrase(display_name), route.freeze, local_part.freeze, domain.freeze).freeze
376
+ if (decode_charset) then
377
+ if (decode_charset.is_a? Encoding) then
378
+ decode_charset_encoding = decode_charset
379
+ else
380
+ decode_charset_encoding = charset_aliases[decode_charset] ||
381
+ Encoding.find(decode_charset) # raise `ArgumentError' when wrong charset due to library user
382
+ end
383
+ dst.force_encoding(decode_charset_encoding)
246
384
  else
247
- break
385
+ dst.force_encoding(encoded_string.encoding)
248
386
  end
249
- end
250
387
 
251
- addr_list.freeze
388
+ while (src =~ %r{
389
+ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
390
+ (?:
391
+ \s+
392
+ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
393
+ )*
394
+ }ix)
395
+
396
+ src = $'
397
+ foreword = $`
398
+ encoded_word_list = $&.split(/\s+/, -1)
399
+
400
+ unless (foreword.empty?) then
401
+ if (dst.encoding.dummy?) then
402
+ # run the slow `String#encode' only when really needed
403
+ # because of a premise that the strings other than
404
+ # encoded words are ASCII only.
405
+ foreword.encode!(decode_charset_encoding, charset_convert_options)
406
+ end
407
+ dst << foreword
408
+ end
409
+
410
+ for encoded_word in encoded_word_list
411
+ _, charset, encoding, encoded_text, _ = encoded_word.split('?', 5)
412
+ encoding.upcase!
413
+ encoded_text.tr!('_', ' ') if (encoding == 'Q')
414
+ transfer_encoding = ENCODED_WORD_TRANSFER_ENCODING_TABLE[encoding] or raise "internal error - unknown encoding: #{encoding}"
415
+ decoded_text = get_mime_charset_text(encoded_text, charset, transfer_encoding, charset_aliases: charset_aliases)
416
+
417
+ if (decode_charset_encoding) then
418
+ if (decoded_text.encoding != decode_charset_encoding) then
419
+ # `decoded_text' is frozen
420
+ decoded_text = decoded_text.encode(decode_charset_encoding, charset_convert_options)
421
+ end
422
+ elsif (dst.ascii_only?) then
423
+ if (decoded_text.encoding.dummy?) then
424
+ dst.encode!(decoded_text.encoding, charset_convert_options)
425
+ end
426
+ else
427
+ if (decoded_text.encoding != dst.encoding) then
428
+ # `decoded_text' is frozen
429
+ decoded_text = decoded_text.encode(dst.encoding, charset_convert_options)
430
+ end
431
+ end
432
+ dst << decoded_text
433
+ end
434
+ end
435
+
436
+ unless (src.empty?) then
437
+ if (dst.encoding.dummy?) then
438
+ # run the slow `String#encode' only when really needed
439
+ # because of a premise that the strings other than encoded
440
+ # words are ASCII only.
441
+ src = src.encode(dst.encoding, charset_convert_options) # `src' may be frozen
442
+ end
443
+ dst << src
444
+ end
445
+
446
+ dst.freeze
447
+ end
448
+ module_function :decode_mime_encoded_words
252
449
  end
253
- module_function :parse_mail_address_list
254
450
 
255
451
  class Header
256
452
  include Enumerable
@@ -258,26 +454,27 @@ module RIMS
258
454
  def initialize(header_txt)
259
455
  @raw_source = header_txt
260
456
  @field_list = nil
261
- @field_map = nil
457
+ @field_table = nil
262
458
  end
263
459
 
264
460
  attr_reader :raw_source
265
461
 
266
462
  def setup_header
267
- if (@field_list.nil? || @field_map.nil?) then
268
- @field_list = RFC822.parse_header(@raw_source)
269
- @field_map = {}
463
+ if (@field_list.nil? || @field_table.nil?) then
464
+ @field_list = Parse.parse_header(@raw_source)
465
+ @field_table = {}
270
466
  for name, value in @field_list
271
467
  key = name.downcase
272
- @field_map[key] = [] unless (@field_map.key? key)
273
- @field_map[key] << value
468
+ @field_table[key] = [] unless (@field_table.key? key)
469
+ @field_table[key] << value
274
470
  end
275
- @field_map.each_value do |value_list|
471
+ @field_table.each_value do |value_list|
276
472
  value_list.freeze
277
473
  end
278
- @field_map.freeze
279
- self
474
+ @field_table.freeze
280
475
  end
476
+
477
+ nil
281
478
  end
282
479
  private :setup_header
283
480
 
@@ -292,19 +489,24 @@ module RIMS
292
489
 
293
490
  def key?(name)
294
491
  setup_header
295
- @field_map.key? name.downcase
492
+ @field_table.key? name.downcase
296
493
  end
297
494
 
495
+ # aliases like `Hash'
496
+ alias has_key? key?
497
+ alias include? key?
498
+ alias member? key?
499
+
298
500
  def [](name)
299
501
  setup_header
300
- if (value_list = @field_map[name.downcase]) then
502
+ if (value_list = @field_table[name.downcase]) then
301
503
  value_list[0]
302
504
  end
303
505
  end
304
506
 
305
507
  def fetch_upcase(name)
306
508
  setup_header
307
- if (value_list = @field_map[name.downcase]) then
509
+ if (value_list = @field_table[name.downcase]) then
308
510
  if (value = value_list[0]) then
309
511
  value.upcase
310
512
  end
@@ -313,7 +515,7 @@ module RIMS
313
515
 
314
516
  def field_value_list(name)
315
517
  setup_header
316
- @field_map[name.downcase]
518
+ @field_table[name.downcase]
317
519
  end
318
520
  end
319
521
 
@@ -326,8 +528,9 @@ module RIMS
326
528
  end
327
529
 
328
530
  class Message
329
- def initialize(msg_txt)
531
+ def initialize(msg_txt, charset_aliases: DEFAULT_CHARSET_ALIASES)
330
532
  @raw_source = msg_txt.dup.freeze
533
+ @charset_aliases = charset_aliases
331
534
  @header = nil
332
535
  @body = nil
333
536
  @content_type = nil
@@ -342,17 +545,22 @@ module RIMS
342
545
  @to = nil
343
546
  @cc = nil
344
547
  @bcc = nil
548
+ @mime_decoded_header_cache = nil
549
+ @mime_decoded_header_field_value_list_cache = nil
550
+ @mime_decoded_header_text_cache = nil
551
+ @mime_charset_body_text_cache = nil
345
552
  end
346
553
 
347
554
  attr_reader :raw_source
348
555
 
349
556
  def setup_message
350
557
  if (@header.nil? || @body.nil?) then
351
- header_txt, body_txt = RFC822.split_message(@raw_source)
558
+ header_txt, body_txt = Parse.split_message(@raw_source)
352
559
  @header = Header.new(header_txt || '')
353
560
  @body = Body.new(body_txt || '')
354
- self
355
561
  end
562
+
563
+ nil
356
564
  end
357
565
  private :setup_message
358
566
 
@@ -367,10 +575,8 @@ module RIMS
367
575
  end
368
576
 
369
577
  def setup_content_type
370
- if (@content_type.nil?) then
371
- @content_type = RFC822.parse_content_type(header['Content-Type'] || '')
372
- self
373
- end
578
+ @content_type ||= Parse.parse_content_type(header['Content-Type'] || '')
579
+ nil
374
580
  end
375
581
  private :setup_content_type
376
582
 
@@ -430,11 +636,10 @@ module RIMS
430
636
 
431
637
  def setup_content_disposition
432
638
  if (header.key? 'Content-Disposition') then
433
- if (@content_disposition.nil?) then
434
- @content_disposition = RFC822.parse_content_disposition(header['Content-Disposition'])
435
- self
436
- end
639
+ @content_disposition ||= Parse.parse_content_disposition(header['Content-Disposition'])
437
640
  end
641
+
642
+ nil
438
643
  end
439
644
  private :setup_content_type
440
645
 
@@ -468,11 +673,13 @@ module RIMS
468
673
  def setup_content_language
469
674
  if (header.key? 'Content-Language') then
470
675
  if (@content_language.nil?) then
471
- @content_language = header.field_value_list('Content-Language').map{|tags_txt| RFC822.parse_content_language(tags_txt) }.inject(:+)
676
+ @content_language = header.field_value_list('Content-Language').map{|tags_txt| Parse.parse_content_language(tags_txt) }
677
+ @content_language.flatten!
472
678
  @content_language.freeze
473
- self
474
679
  end
475
680
  end
681
+
682
+ nil
476
683
  end
477
684
  private :setup_content_language
478
685
 
@@ -499,7 +706,7 @@ module RIMS
499
706
  if (multipart?) then
500
707
  if (@parts.nil?) then
501
708
  if (boundary = self.boundary) then
502
- part_list = RFC822.parse_multipart_body(boundary, body.raw_source)
709
+ part_list = Parse.parse_multipart_body(boundary, body.raw_source)
503
710
  @parts = part_list.map{|msg_txt| Message.new(msg_txt) }
504
711
  else
505
712
  @parts = []
@@ -517,11 +724,7 @@ module RIMS
517
724
 
518
725
  def message
519
726
  if (message?) then
520
- if (@message.nil?) then
521
- @message = Message.new(body.raw_source)
522
- end
523
-
524
- @message
727
+ @message ||= Message.new(body.raw_source)
525
728
  end
526
729
  end
527
730
 
@@ -545,7 +748,8 @@ module RIMS
545
748
  ivar_name = '@' + field_name.downcase.gsub('-', '_')
546
749
  addr_list = instance_variable_get(ivar_name)
547
750
  if (addr_list.nil?) then
548
- addr_list = header.field_value_list(field_name).map{|addr_list_txt| RFC822.parse_mail_address_list(addr_list_txt) }.inject(:+)
751
+ addr_list = header.field_value_list(field_name).map{|addr_list_txt| Parse.parse_mail_address_list(addr_list_txt) }
752
+ addr_list.flatten!
549
753
  addr_list.freeze
550
754
  instance_variable_set(ivar_name, addr_list)
551
755
  end
@@ -578,6 +782,74 @@ module RIMS
578
782
  def bcc
579
783
  mail_address_header_field('bcc')
580
784
  end
785
+
786
+ def make_charset_key(charset)
787
+ if (charset.is_a? Encoding) then
788
+ charset
789
+ else
790
+ charset.downcase.freeze
791
+ end
792
+ end
793
+ private :make_charset_key
794
+
795
+ def mime_decoded_header(name, decode_charset=nil, charset_convert_options: {})
796
+ cache_key = [
797
+ name.downcase.freeze,
798
+ (decode_charset) ? make_charset_key(decode_charset) : :default
799
+ ].freeze
800
+ @mime_decoded_header_cache ||= {}
801
+ @mime_decoded_header_cache[cache_key] ||= CharsetText.decode_mime_encoded_words(header[name],
802
+ decode_charset,
803
+ charset_aliases: @charset_aliases,
804
+ charset_convert_options: charset_convert_options)
805
+ end
806
+
807
+ def mime_decoded_header_field_value_list(name, decode_charset=nil, charset_convert_options: {})
808
+ cache_key = [
809
+ name.downcase.freeze,
810
+ (decode_charset) ? make_charset_key(decode_charset) : :default
811
+ ].freeze
812
+ @mime_decoded_header_field_value_list_cache ||= {}
813
+ @mime_decoded_header_field_value_list_cache[cache_key] ||= header.field_value_list(name).map{|field_value|
814
+ CharsetText.decode_mime_encoded_words(field_value,
815
+ decode_charset,
816
+ charset_aliases: @charset_aliases,
817
+ charset_convert_options: charset_convert_options)
818
+ }.freeze
819
+ end
820
+
821
+ def mime_decoded_header_text(decode_charset=nil, charset_convert_options: {})
822
+ cache_key = (decode_charset) ? make_charset_key(decode_charset) : :default
823
+ @mime_decoded_header_text_cache ||= {}
824
+ @mime_decoded_header_text_cache[cache_key] ||= CharsetText.decode_mime_encoded_words(header.raw_source,
825
+ decode_charset,
826
+ charset_aliases: @charset_aliases,
827
+ charset_convert_options: charset_convert_options)
828
+ end
829
+
830
+ def mime_charset_body_text(charset=nil)
831
+ @mime_charset_body_text_cache ||= {}
832
+ unless (charset) then
833
+ unless (@mime_charset_body_text_cache.key? :default) then
834
+ charset = (text?) ? self.charset : Encoding::ASCII_8BIT
835
+ @mime_charset_body_text_cache[:default] = CharsetText.get_mime_charset_text(body.raw_source,
836
+ charset,
837
+ header['Content-Transfer-Encoding'],
838
+ charset_aliases: @charset_aliases)
839
+ end
840
+ @mime_charset_body_text_cache[:default]
841
+ else
842
+ cache_key = make_charset_key(charset)
843
+ @mime_charset_body_text_cache[cache_key] ||= CharsetText.get_mime_charset_text(body.raw_source,
844
+ charset,
845
+ header['Content-Transfer-Encoding'],
846
+ charset_aliases: @charset_aliases)
847
+ end
848
+ end
849
+
850
+ def mime_binary_body_string
851
+ mime_charset_body_text(Encoding::ASCII_8BIT)
852
+ end
581
853
  end
582
854
  end
583
855
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module RIMS
4
4
  module RFC822
5
- VERSION = '0.2.0'
5
+ VERSION = '0.2.1'
6
6
  end
7
7
  end
8
8
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rims-rfc822
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TOKI Yoshinori
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-20 00:00:00.000000000 Z
11
+ date: 2019-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler