rims-rfc822 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51ec6ae41467b2b68bd8d1bb23916ad42f27fb6b688f8769f64e61db5e8afd98
4
- data.tar.gz: f48b88893f67da1da2824336bfe274b033131527420de1926bea05faa7036618
3
+ metadata.gz: '0825e7fac6f422946c68301c42abcce200e31d32a96ed83d6b1879a5a2b7c6cf'
4
+ data.tar.gz: '091affd6b8394475774ea160b56eb870f01785f72c1b737ea41fe4a5c6698d82'
5
5
  SHA512:
6
- metadata.gz: 77e6b92ea9e32a7a94972d493103e03face8eaf6872400a324242431b3c8ecd5211bf0bd4c77959c47e634d820bd26089f250cd49a5e54ccfdf496cbbdaf7e08
7
- data.tar.gz: a94818fedb229ad9df8683d027aa9c0cacf57a9b5296ae279794d1132e310b0a93a7158bc2c25f72279117ce29c90090b945a2e79f552fab07b1b3a1467dedb2
6
+ metadata.gz: 210d959c67edec519382dfcb24e7aa1069a93e8da0b001badff3c8e643b2aaf83c5a0f84e54f0354fe1740e86b4a2467f5c5f503420fd6e2b2e89b15c0585dc7
7
+ data.tar.gz: ad09d49a4f5caab505893a90e5c309d3d2878178aa73dcec2784fbbf38ad24ac5028710c3f62ece160bc7ccbbbdbf1969fe70c38ae13d1a322f0a99cad5e1cd3
data/README.md CHANGED
@@ -33,6 +33,15 @@ msg = RIMS::RFC822::Message.new(your_rfc822_text)
33
33
  p msg.header
34
34
  p msg.body
35
35
 
36
+ # header fields
37
+ p msg.header[name]
38
+ p msg.header.fetch_upcase(name)
39
+ p msg.header.field_value_list(name)
40
+ p msg.header.key? name
41
+ msg.header.each do |name, value|
42
+ p [ name, value ]
43
+ end
44
+
36
45
  # source text attributes
37
46
  p msg.raw_source
38
47
  p msg.header.raw_source
@@ -52,7 +61,7 @@ p msg.content_disposition_parameter(name)
52
61
  p msg.content_disposition_parameter_list
53
62
  p msg.content_language
54
63
 
55
- # headear attributes
64
+ # header attributes
56
65
  p msg.date
57
66
  p msg.from
58
67
  p msg.sender
@@ -61,12 +70,23 @@ p msg.to
61
70
  p msg.cc
62
71
  p msg.bcc
63
72
 
64
- # content attributes
73
+ # body structure attributes
65
74
  p msg.text?
66
75
  p msg.multipart?
67
76
  p msg.message?
68
77
  p msg.parts
69
78
  p msg.message
79
+
80
+ # MIME header and body attributes
81
+ p msg.mime_decoded_header(name)
82
+ p msg.mime_decoded_header(name, decode_charset)
83
+ p msg.mime_decoded_header_field_value_list(name)
84
+ p msg.mime_decoded_header_field_value_list(name, decode_charset)
85
+ p msg.mime_decoded_header_text
86
+ p msg.mime_decoded_header_text(decode_charset)
87
+ p msg.mime_charset_body_text
88
+ p msg.mime_charset_body_text(charset)
89
+ p msg.mime_binary_body_string
70
90
  ```
71
91
 
72
92
  Contributing
@@ -1,256 +1,452 @@
1
1
  # -*- coding: utf-8; frozen_string_literal: true -*-
2
2
 
3
+ require 'forwardable'
3
4
  require 'rims/rfc822/version'
4
5
  require 'time'
5
6
 
6
7
  module RIMS
7
8
  module RFC822
8
- def split_message(msg_txt)
9
- header_txt, body_txt = msg_txt.lstrip.split(/\r?\n\r?\n/, 2)
10
- if ($&) then
11
- header_txt << $& if $&
12
- else
13
- body_txt = header_txt
14
- header_txt = nil
15
- end
16
-
17
- [ header_txt.freeze, body_txt.freeze ].freeze
18
- end
19
- module_function :split_message
9
+ module Parse
10
+ def split_message(msg_txt)
11
+ header_txt, body_txt = msg_txt.lstrip.split(/\r?\n\r?\n/, 2)
12
+ if ($&) then
13
+ header_txt << $& if $&
14
+ else
15
+ body_txt = header_txt
16
+ header_txt = nil
17
+ end
20
18
 
21
- def parse_header(header_txt)
22
- field_pair_list = header_txt.scan(%r{
23
- ^
24
- ((?#name) \S+? )
25
- \s* : \s*
26
- (
27
- (?#value)
28
- .*? (?: \n|\z)
29
- (?: ^\s .*? (?: \n|\z) )*
30
- )
31
- }x)
32
-
33
- for name, value in field_pair_list
34
- value.strip!
35
- name.freeze
36
- value.freeze
37
- end
38
-
39
- field_pair_list.freeze
40
- end
41
- module_function :parse_header
19
+ [ header_txt.freeze, body_txt.freeze ].freeze
20
+ end
21
+ module_function :split_message
22
+
23
+ def parse_header(header_txt)
24
+ field_pair_list = header_txt.scan(%r{
25
+ ^
26
+ ((?#name) \S+? )
27
+ \s* : \s*
28
+ (
29
+ (?#value)
30
+ .*? (?: \n|\z)
31
+ (?: ^\s .*? (?: \n|\z) )*
32
+ )
33
+ }x)
34
+
35
+ for name, value in field_pair_list
36
+ value.strip!
37
+ name.freeze
38
+ value.freeze
39
+ end
42
40
 
43
- def unquote_phrase(phrase_txt)
44
- state = :raw
45
- src_txt = phrase_txt.dup
46
- dst_txt = ''.encode(phrase_txt.encoding)
47
-
48
- while (src_txt.sub!(/\A (?: " | \( | \) | \\ | [^"\(\)\\]+ )/x, ''))
49
- match_txt = $&
50
- case (state)
51
- when :raw
52
- case (match_txt)
53
- when '"'
54
- state = :quote
55
- when '('
56
- state = :comment
57
- when "\\"
58
- src_txt.sub!(/\A./, '') and dst_txt << $&
41
+ field_pair_list.freeze
42
+ end
43
+ module_function :parse_header
44
+
45
+ def unquote_phrase(phrase_txt)
46
+ state = :raw
47
+ src_txt = phrase_txt.dup
48
+ dst_txt = ''.encode(phrase_txt.encoding)
49
+
50
+ while (src_txt.sub!(/\A (?: " | \( | \) | \\ | [^"\(\)\\]+ )/x, ''))
51
+ match_txt = $&
52
+ case (state)
53
+ when :raw
54
+ case (match_txt)
55
+ when '"'
56
+ state = :quote
57
+ when '('
58
+ state = :comment
59
+ when "\\"
60
+ src_txt.sub!(/\A./, '') and dst_txt << $&
61
+ else
62
+ dst_txt << match_txt
63
+ end
64
+ when :quote
65
+ case (match_txt)
66
+ when '"'
67
+ state = :raw
68
+ when "\\"
69
+ src_txt.sub!(/\A./, '') && dst_txt << $&
70
+ else
71
+ dst_txt << match_txt
72
+ end
73
+ when :comment
74
+ case (match_txt)
75
+ when ')'
76
+ state = :raw
77
+ when "\\"
78
+ src_txt.sub!(/\A./, '')
79
+ else
80
+ # ignore comment text.
81
+ end
59
82
  else
60
- dst_txt << match_txt
83
+ raise "internal error - unknown state: #{state}"
61
84
  end
62
- when :quote
63
- case (match_txt)
64
- when '"'
65
- state = :raw
66
- when "\\"
67
- src_txt.sub!(/\A./, '') && dst_txt << $&
85
+ end
86
+
87
+ dst_txt.freeze
88
+ end
89
+ module_function :unquote_phrase
90
+
91
+ def parse_parameters(parameters_txt)
92
+ params = {}
93
+ parameters_txt.scan(%r'(?<name>\S+?) \s* = \s* (?: (?<quoted_string>".*?") | (?<token>\S+?) ) \s* (?:;|\Z)'x) do
94
+ name = $~[:name]
95
+ if ($~[:quoted_string]) then
96
+ quoted_value = $~[:quoted_string]
97
+ value = unquote_phrase(quoted_value)
68
98
  else
69
- dst_txt << match_txt
99
+ value = $~[:token]
70
100
  end
71
- when :comment
72
- case (match_txt)
73
- when ')'
74
- state = :raw
75
- when "\\"
76
- src_txt.sub!(/\A./, '')
101
+ params[name.downcase.freeze] = [ name.freeze, value.freeze ].freeze
102
+ end
103
+
104
+ params.freeze
105
+ end
106
+ module_function :parse_parameters
107
+
108
+ def split_parameters(type_params_txt)
109
+ type, params_txt = type_params_txt.split(';', 2)
110
+ if (type) then
111
+ type.strip!
112
+ type.freeze
113
+ if (params_txt) then
114
+ params = parse_parameters(params_txt)
77
115
  else
78
- # ignore comment text.
116
+ params = {}.freeze
79
117
  end
118
+ [ type, params ].freeze
80
119
  else
81
- raise "internal error: unknown state #{state}"
120
+ [ nil, {}.freeze ].freeze
82
121
  end
83
122
  end
123
+ module_function :split_parameters
124
+
125
+ def parse_content_type(type_txt)
126
+ media_type_txt, params = split_parameters(type_txt)
127
+ if (media_type_txt) then
128
+ main_type, sub_type = media_type_txt.split('/', 2)
129
+ if (main_type) then
130
+ main_type.strip!
131
+ main_type.freeze
132
+ if (sub_type) then
133
+ sub_type.strip!
134
+ sub_type.freeze
135
+ if (! main_type.empty? && ! sub_type.empty?) then
136
+ return [ main_type, sub_type, params ].freeze
137
+ end
138
+ end
139
+ end
140
+ end
84
141
 
85
- dst_txt.freeze
86
- end
87
- module_function :unquote_phrase
142
+ [ 'application'.dup.force_encoding(type_txt.encoding).freeze,
143
+ 'octet-stream'.dup.force_encoding(type_txt.encoding).freeze,
144
+ params
145
+ ].freeze
146
+ end
147
+ module_function :parse_content_type
88
148
 
89
- def parse_parameters(parameters_txt)
90
- params = {}
91
- parameters_txt.scan(%r'(?<name>\S+?) \s* = \s* (?: (?<quoted_string>".*?") | (?<token>\S+?) ) \s* (?:;|\Z)'x) do
92
- name = $~[:name]
93
- if ($~[:quoted_string]) then
94
- quoted_value = $~[:quoted_string]
95
- value = unquote_phrase(quoted_value)
96
- else
97
- value = $~[:token]
98
- end
99
- params[name.downcase.freeze] = [ name.freeze, value.freeze ].freeze
149
+ def parse_content_disposition(disposition_txt)
150
+ split_parameters(disposition_txt)
100
151
  end
152
+ module_function :parse_content_disposition
101
153
 
102
- params.freeze
103
- end
104
- module_function :parse_parameters
154
+ def parse_content_language(language_tags_txt)
155
+ tag_list = language_tags_txt.split(',')
156
+ for tag in tag_list
157
+ tag.strip!
158
+ tag.freeze
159
+ end
160
+ tag_list.reject!(&:empty?)
161
+
162
+ tag_list.freeze
163
+ end
164
+ module_function :parse_content_language
165
+
166
+ def parse_multipart_body(boundary, body_txt)
167
+ delim = '--' + boundary
168
+ term = delim + '--'
169
+ body_txt2, _body_epilogue_txt = body_txt.split(term, 2)
170
+ if (body_txt2) then
171
+ _body_preamble_txt, body_parts_txt = body_txt2.split(delim, 2)
172
+ if (body_parts_txt) then
173
+ part_list = body_parts_txt.split(delim, -1)
174
+ for part_txt in part_list
175
+ part_txt.lstrip!
176
+ part_txt.chomp!("\n")
177
+ part_txt.chomp!("\r")
178
+ part_txt.freeze
179
+ end
180
+ return part_list.freeze
181
+ end
182
+ end
105
183
 
106
- def split_parameters(type_params_txt)
107
- type, params_txt = type_params_txt.split(';', 2)
108
- if (type) then
109
- type.strip!
110
- type.freeze
111
- if (params_txt) then
112
- params = parse_parameters(params_txt)
113
- else
114
- params = {}.freeze
184
+ [].freeze
185
+ end
186
+ module_function :parse_multipart_body
187
+
188
+ Address = Struct.new(:display_name, :route, :local_part, :domain)
189
+ class Address
190
+ # compatible for Net::MAP::Address
191
+ alias name display_name
192
+ alias mailbox local_part
193
+ alias host domain
194
+ end
195
+
196
+ def parse_mail_address_list(address_list_txt)
197
+ addr_list = []
198
+ src_txt = address_list_txt.dup
199
+
200
+ while (true)
201
+ if (src_txt.sub!(%r{
202
+ \A
203
+ \s*
204
+ (?<display_name>\S.*?) \s* : (?<group_list>.*?) ;
205
+ \s*
206
+ ,?
207
+ }x, ''))
208
+ then
209
+ display_name = $~[:display_name]
210
+ group_list = $~[:group_list]
211
+ addr_list << Address.new( nil, nil, unquote_phrase(display_name), nil).freeze
212
+ addr_list.concat(parse_mail_address_list(group_list))
213
+ addr_list << Address.new(nil, nil, nil, nil).freeze
214
+ elsif (src_txt.sub!(%r{
215
+ \A
216
+ \s*
217
+ (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
218
+ \s*
219
+ ,?
220
+ }x, ''))
221
+ then
222
+ addr_list << Address.new(nil, nil, $~[:local_part].freeze, $~[:domain].freeze).freeze
223
+ elsif (src_txt.sub!(%r{
224
+ \A
225
+ \s*
226
+ (?<display_name>\S.*?)
227
+ \s*
228
+ <
229
+ \s*
230
+ (?:
231
+ (?<route>@[^<>@",]* (?:, \s* @[^<>@",]*)*)
232
+ \s*
233
+ :
234
+ )?
235
+ \s*
236
+ (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
237
+ \s*
238
+ >
239
+ \s*
240
+ ,?
241
+ }x, ''))
242
+ then
243
+ display_name = $~[:display_name]
244
+ route = $~[:route]
245
+ local_part = $~[:local_part]
246
+ domain = $~[:domain]
247
+ addr_list << Address.new(unquote_phrase(display_name), route.freeze, local_part.freeze, domain.freeze).freeze
248
+ else
249
+ break
250
+ end
115
251
  end
116
- [ type, params ].freeze
117
- else
118
- [ nil, {}.freeze ].freeze
252
+
253
+ addr_list.freeze
119
254
  end
255
+ module_function :parse_mail_address_list
120
256
  end
257
+
258
+ # for backward compatibility
259
+ include Parse
260
+ module_function :split_message
261
+ module_function :parse_header
262
+ module_function :unquote_phrase
263
+ module_function :parse_parameters
121
264
  module_function :split_parameters
265
+ module_function :parse_content_type
266
+ module_function :parse_content_disposition
267
+ module_function :parse_content_language
268
+ module_function :parse_multipart_body
269
+ module_function :parse_mail_address_list
122
270
 
123
- def parse_content_type(type_txt)
124
- media_type_txt, params = split_parameters(type_txt)
125
- if (media_type_txt) then
126
- main_type, sub_type = media_type_txt.split('/', 2)
127
- if (main_type) then
128
- main_type.strip!
129
- main_type.freeze
130
- if (sub_type) then
131
- sub_type.strip!
132
- sub_type.freeze
133
- if (! main_type.empty? && ! sub_type.empty?) then
134
- return [ main_type, sub_type, params ].freeze
135
- end
136
- end
271
+ class CharsetAliases
272
+ def initialize
273
+ @alias_table = {}
274
+ end
275
+
276
+ # API methods
277
+
278
+ def [](name)
279
+ @alias_table[name.upcase]
280
+ end
281
+
282
+ def add_alias(name, encoding)
283
+ @alias_table[name.upcase] = encoding
284
+ self
285
+ end
286
+
287
+ def delete_alias(name)
288
+ @alias_table.delete(name.upcase)
289
+ end
290
+
291
+ # minimal methods like `Hash'
292
+
293
+ extend Forwardable
294
+ include Enumerable
295
+
296
+ def_delegators :@alias_table, :empty?, :size, :keys
297
+ alias length size
298
+
299
+ def key?(name)
300
+ @alias_table.key? name.upcase
301
+ end
302
+
303
+ alias has_key? key?
304
+ alias include? key?
305
+ alias member? key?
306
+
307
+ def each_key
308
+ return enum_for(:each_key) unless block_given?
309
+ @alias_table.each_key do |name|
310
+ yield(name)
137
311
  end
312
+ self
138
313
  end
139
314
 
140
- [ 'application'.dup.force_encoding(type_txt.encoding).freeze,
141
- 'octet-stream'.dup.force_encoding(type_txt.encoding).freeze,
142
- params
143
- ].freeze
144
- end
145
- module_function :parse_content_type
315
+ def each_pair
316
+ return enum_for(:each_pair) unless block_given?
317
+ @alias_table.each_pair do |name, encoding|
318
+ yield(name, encoding)
319
+ end
320
+ self
321
+ end
146
322
 
147
- def parse_content_disposition(disposition_txt)
148
- split_parameters(disposition_txt)
323
+ alias each each_pair
149
324
  end
150
- module_function :parse_content_disposition
151
325
 
152
- def parse_content_language(language_tags_txt)
153
- tag_list = language_tags_txt.split(',')
154
- for tag in tag_list
155
- tag.strip!
156
- tag.freeze
326
+ DEFAULT_CHARSET_ALIASES = CharsetAliases.new
327
+ #DEFAULT_CHARSET_ALIASES.add_alias('euc-jp', Encoding::CP51932)
328
+ DEFAULT_CHARSET_ALIASES.add_alias('euc-jp', Encoding::EUCJP_MS)
329
+ #DEFAULT_CHARSET_ALIASES.add_alias('iso-2022-jp', Encoding::CP50220)
330
+ DEFAULT_CHARSET_ALIASES.add_alias('iso-2022-jp', Encoding::CP50221)
331
+ DEFAULT_CHARSET_ALIASES.add_alias('shift_jis', Encoding::WINDOWS_31J)
332
+
333
+ module CharsetText
334
+ def self.find_string_encoding(name)
335
+ begin
336
+ Encoding.find(name)
337
+ rescue ArgumentError
338
+ raise EncodingError.new($!.to_s)
339
+ end
157
340
  end
158
- tag_list.reject!(&:empty?)
159
341
 
160
- tag_list.freeze
161
- end
162
- module_function :parse_content_language
342
+ def get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES)
343
+ case (transfer_encoding&.upcase)
344
+ when 'BASE64'
345
+ text = binary_string.unpack1('m')
346
+ when 'QUOTED-PRINTABLE'
347
+ text = binary_string.unpack1('M')
348
+ else
349
+ text = binary_string.dup
350
+ end
163
351
 
164
- def parse_multipart_body(boundary, body_txt)
165
- delim = '--' + boundary
166
- term = delim + '--'
167
- body_txt2, _body_epilogue_txt = body_txt.split(term, 2)
168
- if (body_txt2) then
169
- _body_preamble_txt, body_parts_txt = body_txt2.split(delim, 2)
170
- if (body_parts_txt) then
171
- part_list = body_parts_txt.split(delim, -1)
172
- for part_txt in part_list
173
- part_txt.lstrip!
174
- part_txt.chomp!("\n")
175
- part_txt.chomp!("\r")
176
- part_txt.freeze
352
+ if (charset) then
353
+ if (charset.is_a? Encoding) then
354
+ enc = charset
355
+ else
356
+ enc = charset_aliases[charset] ||
357
+ CharsetText.find_string_encoding(charset) # raise `EncodingError' when wrong charset due to document
177
358
  end
178
- return part_list.freeze
359
+ text.force_encoding(enc)
360
+ text.valid_encoding? or raise EncodingError, "invalid encoding - #{enc}"
179
361
  end
362
+
363
+ text.freeze
180
364
  end
365
+ module_function :get_mime_charset_text
181
366
 
182
- [].freeze
183
- end
184
- module_function :parse_multipart_body
367
+ ENCODED_WORD_TRANSFER_ENCODING_TABLE = { # :nodoc:
368
+ 'B' => 'BASE64',
369
+ 'Q' => 'QUOTED-PRINTABLE'
370
+ }.freeze
185
371
 
186
- Address = Struct.new(:display_name, :route, :local_part, :domain)
187
- class Address
188
- # compatible for Net::MAP::Address
189
- alias name display_name
190
- alias mailbox local_part
191
- alias host domain
192
- end
372
+ def decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {})
373
+ src = encoded_string
374
+ dst = ''.dup
193
375
 
194
- def parse_mail_address_list(address_list_txt)
195
- addr_list = []
196
- src_txt = address_list_txt.dup
197
-
198
- while (true)
199
- if (src_txt.sub!(%r{
200
- \A
201
- \s*
202
- (?<display_name>\S.*?) \s* : (?<group_list>.*?) ;
203
- \s*
204
- ,?
205
- }x, ''))
206
- then
207
- display_name = $~[:display_name]
208
- group_list = $~[:group_list]
209
- addr_list << Address.new( nil, nil, unquote_phrase(display_name), nil).freeze
210
- addr_list.concat(parse_mail_address_list(group_list))
211
- addr_list << Address.new(nil, nil, nil, nil).freeze
212
- elsif (src_txt.sub!(%r{
213
- \A
214
- \s*
215
- (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
216
- \s*
217
- ,?
218
- }x, ''))
219
- then
220
- addr_list << Address.new(nil, nil, $~[:local_part].freeze, $~[:domain].freeze).freeze
221
- elsif (src_txt.sub!(%r{
222
- \A
223
- \s*
224
- (?<display_name>\S.*?)
225
- \s*
226
- <
227
- \s*
228
- (?:
229
- (?<route>@[^<>@",]* (?:, \s* @[^<>@",]*)*)
230
- \s*
231
- :
232
- )?
233
- \s*
234
- (?<local_part>[^<>@",\s]+) \s* @ \s* (?<domain>[^<>@",\s]+)
235
- \s*
236
- >
237
- \s*
238
- ,?
239
- }x, ''))
240
- then
241
- display_name = $~[:display_name]
242
- route = $~[:route]
243
- local_part = $~[:local_part]
244
- domain = $~[:domain]
245
- addr_list << Address.new(unquote_phrase(display_name), route.freeze, local_part.freeze, domain.freeze).freeze
376
+ if (decode_charset) then
377
+ if (decode_charset.is_a? Encoding) then
378
+ decode_charset_encoding = decode_charset
379
+ else
380
+ decode_charset_encoding = charset_aliases[decode_charset] ||
381
+ Encoding.find(decode_charset) # raise `ArgumentError' when wrong charset due to library user
382
+ end
383
+ dst.force_encoding(decode_charset_encoding)
246
384
  else
247
- break
385
+ dst.force_encoding(encoded_string.encoding)
248
386
  end
249
- end
250
387
 
251
- addr_list.freeze
388
+ while (src =~ %r{
389
+ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
390
+ (?:
391
+ \s+
392
+ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
393
+ )*
394
+ }ix)
395
+
396
+ src = $'
397
+ foreword = $`
398
+ encoded_word_list = $&.split(/\s+/, -1)
399
+
400
+ unless (foreword.empty?) then
401
+ if (dst.encoding.dummy?) then
402
+ # run the slow `String#encode' only when really needed
403
+ # because of a premise that the strings other than
404
+ # encoded words are ASCII only.
405
+ foreword.encode!(decode_charset_encoding, charset_convert_options)
406
+ end
407
+ dst << foreword
408
+ end
409
+
410
+ for encoded_word in encoded_word_list
411
+ _, charset, encoding, encoded_text, _ = encoded_word.split('?', 5)
412
+ encoding.upcase!
413
+ encoded_text.tr!('_', ' ') if (encoding == 'Q')
414
+ transfer_encoding = ENCODED_WORD_TRANSFER_ENCODING_TABLE[encoding] or raise "internal error - unknown encoding: #{encoding}"
415
+ decoded_text = get_mime_charset_text(encoded_text, charset, transfer_encoding, charset_aliases: charset_aliases)
416
+
417
+ if (decode_charset_encoding) then
418
+ if (decoded_text.encoding != decode_charset_encoding) then
419
+ # `decoded_text' is frozen
420
+ decoded_text = decoded_text.encode(decode_charset_encoding, charset_convert_options)
421
+ end
422
+ elsif (dst.ascii_only?) then
423
+ if (decoded_text.encoding.dummy?) then
424
+ dst.encode!(decoded_text.encoding, charset_convert_options)
425
+ end
426
+ else
427
+ if (decoded_text.encoding != dst.encoding) then
428
+ # `decoded_text' is frozen
429
+ decoded_text = decoded_text.encode(dst.encoding, charset_convert_options)
430
+ end
431
+ end
432
+ dst << decoded_text
433
+ end
434
+ end
435
+
436
+ unless (src.empty?) then
437
+ if (dst.encoding.dummy?) then
438
+ # run the slow `String#encode' only when really needed
439
+ # because of a premise that the strings other than encoded
440
+ # words are ASCII only.
441
+ src = src.encode(dst.encoding, charset_convert_options) # `src' may be frozen
442
+ end
443
+ dst << src
444
+ end
445
+
446
+ dst.freeze
447
+ end
448
+ module_function :decode_mime_encoded_words
252
449
  end
253
- module_function :parse_mail_address_list
254
450
 
255
451
  class Header
256
452
  include Enumerable
@@ -258,26 +454,27 @@ module RIMS
258
454
  def initialize(header_txt)
259
455
  @raw_source = header_txt
260
456
  @field_list = nil
261
- @field_map = nil
457
+ @field_table = nil
262
458
  end
263
459
 
264
460
  attr_reader :raw_source
265
461
 
266
462
  def setup_header
267
- if (@field_list.nil? || @field_map.nil?) then
268
- @field_list = RFC822.parse_header(@raw_source)
269
- @field_map = {}
463
+ if (@field_list.nil? || @field_table.nil?) then
464
+ @field_list = Parse.parse_header(@raw_source)
465
+ @field_table = {}
270
466
  for name, value in @field_list
271
467
  key = name.downcase
272
- @field_map[key] = [] unless (@field_map.key? key)
273
- @field_map[key] << value
468
+ @field_table[key] = [] unless (@field_table.key? key)
469
+ @field_table[key] << value
274
470
  end
275
- @field_map.each_value do |value_list|
471
+ @field_table.each_value do |value_list|
276
472
  value_list.freeze
277
473
  end
278
- @field_map.freeze
279
- self
474
+ @field_table.freeze
280
475
  end
476
+
477
+ nil
281
478
  end
282
479
  private :setup_header
283
480
 
@@ -292,19 +489,24 @@ module RIMS
292
489
 
293
490
  def key?(name)
294
491
  setup_header
295
- @field_map.key? name.downcase
492
+ @field_table.key? name.downcase
296
493
  end
297
494
 
495
+ # aliases like `Hash'
496
+ alias has_key? key?
497
+ alias include? key?
498
+ alias member? key?
499
+
298
500
  def [](name)
299
501
  setup_header
300
- if (value_list = @field_map[name.downcase]) then
502
+ if (value_list = @field_table[name.downcase]) then
301
503
  value_list[0]
302
504
  end
303
505
  end
304
506
 
305
507
  def fetch_upcase(name)
306
508
  setup_header
307
- if (value_list = @field_map[name.downcase]) then
509
+ if (value_list = @field_table[name.downcase]) then
308
510
  if (value = value_list[0]) then
309
511
  value.upcase
310
512
  end
@@ -313,7 +515,7 @@ module RIMS
313
515
 
314
516
  def field_value_list(name)
315
517
  setup_header
316
- @field_map[name.downcase]
518
+ @field_table[name.downcase]
317
519
  end
318
520
  end
319
521
 
@@ -326,8 +528,9 @@ module RIMS
326
528
  end
327
529
 
328
530
  class Message
329
- def initialize(msg_txt)
531
+ def initialize(msg_txt, charset_aliases: DEFAULT_CHARSET_ALIASES)
330
532
  @raw_source = msg_txt.dup.freeze
533
+ @charset_aliases = charset_aliases
331
534
  @header = nil
332
535
  @body = nil
333
536
  @content_type = nil
@@ -342,17 +545,22 @@ module RIMS
342
545
  @to = nil
343
546
  @cc = nil
344
547
  @bcc = nil
548
+ @mime_decoded_header_cache = nil
549
+ @mime_decoded_header_field_value_list_cache = nil
550
+ @mime_decoded_header_text_cache = nil
551
+ @mime_charset_body_text_cache = nil
345
552
  end
346
553
 
347
554
  attr_reader :raw_source
348
555
 
349
556
  def setup_message
350
557
  if (@header.nil? || @body.nil?) then
351
- header_txt, body_txt = RFC822.split_message(@raw_source)
558
+ header_txt, body_txt = Parse.split_message(@raw_source)
352
559
  @header = Header.new(header_txt || '')
353
560
  @body = Body.new(body_txt || '')
354
- self
355
561
  end
562
+
563
+ nil
356
564
  end
357
565
  private :setup_message
358
566
 
@@ -367,10 +575,8 @@ module RIMS
367
575
  end
368
576
 
369
577
  def setup_content_type
370
- if (@content_type.nil?) then
371
- @content_type = RFC822.parse_content_type(header['Content-Type'] || '')
372
- self
373
- end
578
+ @content_type ||= Parse.parse_content_type(header['Content-Type'] || '')
579
+ nil
374
580
  end
375
581
  private :setup_content_type
376
582
 
@@ -430,11 +636,10 @@ module RIMS
430
636
 
431
637
  def setup_content_disposition
432
638
  if (header.key? 'Content-Disposition') then
433
- if (@content_disposition.nil?) then
434
- @content_disposition = RFC822.parse_content_disposition(header['Content-Disposition'])
435
- self
436
- end
639
+ @content_disposition ||= Parse.parse_content_disposition(header['Content-Disposition'])
437
640
  end
641
+
642
+ nil
438
643
  end
439
644
  private :setup_content_type
440
645
 
@@ -468,11 +673,13 @@ module RIMS
468
673
  def setup_content_language
469
674
  if (header.key? 'Content-Language') then
470
675
  if (@content_language.nil?) then
471
- @content_language = header.field_value_list('Content-Language').map{|tags_txt| RFC822.parse_content_language(tags_txt) }.inject(:+)
676
+ @content_language = header.field_value_list('Content-Language').map{|tags_txt| Parse.parse_content_language(tags_txt) }
677
+ @content_language.flatten!
472
678
  @content_language.freeze
473
- self
474
679
  end
475
680
  end
681
+
682
+ nil
476
683
  end
477
684
  private :setup_content_language
478
685
 
@@ -499,7 +706,7 @@ module RIMS
499
706
  if (multipart?) then
500
707
  if (@parts.nil?) then
501
708
  if (boundary = self.boundary) then
502
- part_list = RFC822.parse_multipart_body(boundary, body.raw_source)
709
+ part_list = Parse.parse_multipart_body(boundary, body.raw_source)
503
710
  @parts = part_list.map{|msg_txt| Message.new(msg_txt) }
504
711
  else
505
712
  @parts = []
@@ -517,11 +724,7 @@ module RIMS
517
724
 
518
725
  def message
519
726
  if (message?) then
520
- if (@message.nil?) then
521
- @message = Message.new(body.raw_source)
522
- end
523
-
524
- @message
727
+ @message ||= Message.new(body.raw_source)
525
728
  end
526
729
  end
527
730
 
@@ -545,7 +748,8 @@ module RIMS
545
748
  ivar_name = '@' + field_name.downcase.gsub('-', '_')
546
749
  addr_list = instance_variable_get(ivar_name)
547
750
  if (addr_list.nil?) then
548
- addr_list = header.field_value_list(field_name).map{|addr_list_txt| RFC822.parse_mail_address_list(addr_list_txt) }.inject(:+)
751
+ addr_list = header.field_value_list(field_name).map{|addr_list_txt| Parse.parse_mail_address_list(addr_list_txt) }
752
+ addr_list.flatten!
549
753
  addr_list.freeze
550
754
  instance_variable_set(ivar_name, addr_list)
551
755
  end
@@ -578,6 +782,74 @@ module RIMS
578
782
  def bcc
579
783
  mail_address_header_field('bcc')
580
784
  end
785
+
786
+ def make_charset_key(charset)
787
+ if (charset.is_a? Encoding) then
788
+ charset
789
+ else
790
+ charset.downcase.freeze
791
+ end
792
+ end
793
+ private :make_charset_key
794
+
795
+ def mime_decoded_header(name, decode_charset=nil, charset_convert_options: {})
796
+ cache_key = [
797
+ name.downcase.freeze,
798
+ (decode_charset) ? make_charset_key(decode_charset) : :default
799
+ ].freeze
800
+ @mime_decoded_header_cache ||= {}
801
+ @mime_decoded_header_cache[cache_key] ||= CharsetText.decode_mime_encoded_words(header[name],
802
+ decode_charset,
803
+ charset_aliases: @charset_aliases,
804
+ charset_convert_options: charset_convert_options)
805
+ end
806
+
807
+ def mime_decoded_header_field_value_list(name, decode_charset=nil, charset_convert_options: {})
808
+ cache_key = [
809
+ name.downcase.freeze,
810
+ (decode_charset) ? make_charset_key(decode_charset) : :default
811
+ ].freeze
812
+ @mime_decoded_header_field_value_list_cache ||= {}
813
+ @mime_decoded_header_field_value_list_cache[cache_key] ||= header.field_value_list(name).map{|field_value|
814
+ CharsetText.decode_mime_encoded_words(field_value,
815
+ decode_charset,
816
+ charset_aliases: @charset_aliases,
817
+ charset_convert_options: charset_convert_options)
818
+ }.freeze
819
+ end
820
+
821
+ def mime_decoded_header_text(decode_charset=nil, charset_convert_options: {})
822
+ cache_key = (decode_charset) ? make_charset_key(decode_charset) : :default
823
+ @mime_decoded_header_text_cache ||= {}
824
+ @mime_decoded_header_text_cache[cache_key] ||= CharsetText.decode_mime_encoded_words(header.raw_source,
825
+ decode_charset,
826
+ charset_aliases: @charset_aliases,
827
+ charset_convert_options: charset_convert_options)
828
+ end
829
+
830
+ def mime_charset_body_text(charset=nil)
831
+ @mime_charset_body_text_cache ||= {}
832
+ unless (charset) then
833
+ unless (@mime_charset_body_text_cache.key? :default) then
834
+ charset = (text?) ? self.charset : Encoding::ASCII_8BIT
835
+ @mime_charset_body_text_cache[:default] = CharsetText.get_mime_charset_text(body.raw_source,
836
+ charset,
837
+ header['Content-Transfer-Encoding'],
838
+ charset_aliases: @charset_aliases)
839
+ end
840
+ @mime_charset_body_text_cache[:default]
841
+ else
842
+ cache_key = make_charset_key(charset)
843
+ @mime_charset_body_text_cache[cache_key] ||= CharsetText.get_mime_charset_text(body.raw_source,
844
+ charset,
845
+ header['Content-Transfer-Encoding'],
846
+ charset_aliases: @charset_aliases)
847
+ end
848
+ end
849
+
850
+ def mime_binary_body_string
851
+ mime_charset_body_text(Encoding::ASCII_8BIT)
852
+ end
581
853
  end
582
854
  end
583
855
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module RIMS
4
4
  module RFC822
5
- VERSION = '0.2.0'
5
+ VERSION = '0.2.1'
6
6
  end
7
7
  end
8
8
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rims-rfc822
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TOKI Yoshinori
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-20 00:00:00.000000000 Z
11
+ date: 2019-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler