mailparser 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: acab11e0f5b50260e17f32e1fea879d8505fff45
4
- data.tar.gz: f0ce38c8bb40d11a3332515b4aabb7d835cb692f
5
- !binary "U0hBNTEy":
6
- metadata.gz: 4edf18f3211be9fb2d2114db16265fd728a4f5a69eb1d96ab9a7fc430e0cdcc740f1d3b00ae04290a6203b611cfcd0b22531ac8c5962d7a513798bbdda57c53d
7
- data.tar.gz: ee05e15d52db8517094e5c9189787c7e3f557a5c17c9d2dd8a8d3868111e671e086121296b1d13493e9ee6d43fc0ab6576fcd35bd3dda4b39dbfb91451904a95
2
+ SHA1:
3
+ metadata.gz: acf77838874db7d6c17b1e85420b5027eaea07c5
4
+ data.tar.gz: b67ae4ea3374d66de3cb11f40a5fe25074e682d6
5
+ SHA512:
6
+ metadata.gz: 790f81bfd597d4b209efb81a29ffa6356484ab5b8570140b16dac2b0e33982269df2a0fa11f16362c06e198c759eef8b087d17a3a50e2967ef4daea6710d9a97
7
+ data.tar.gz: 2f2502e6975c84472da8675c4c42d65056e1ec4f3845a2ebe8a5477a3162f4ecaa0f43c37b729da9c189ca5dd12609704bac7c2dba002d095cabe2266ca931ff
@@ -81,6 +81,8 @@ module MailParser
81
81
  r = @raw.chomp.gsub(/\r?\n/, '').gsub(/\t/, ' ')
82
82
  if @opt[:decode_mime_header] then
83
83
  @parsed = RFC2047.decode(r, @opt)
84
+ elsif @opt[:output_charset]
85
+ @parsed = @opt[:charset_converter].call(@opt[:output_charset], @opt[:output_charset], r)
84
86
  else
85
87
  @parsed = r
86
88
  end
@@ -67,7 +67,8 @@ module MailParser
67
67
  rescue
68
68
  t = Time.now
69
69
  end
70
- return RFC2822::DateTime.new(t.year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
70
+ year = t.year > 9999 ? 9999 : t.year
71
+ return RFC2822::DateTime.new(year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
71
72
  end
72
73
 
73
74
  # parse From, To,Cc field
@@ -91,7 +92,7 @@ module MailParser
91
92
  # @param [Hash] opt options
92
93
  # @return [MailParser::RFC2822::MsgId]
93
94
  def parse_msg_id(str, opt={})
94
- msg_id_list(str)[0]
95
+ msg_id_list(str, opt)[0]
95
96
  end
96
97
 
97
98
  # parse In-Reply-To, References field
@@ -99,7 +100,7 @@ module MailParser
99
100
  # @param [Hash] opt options
100
101
  # @return [MailParser::RFC2822::MsgIdList]
101
102
  def parse_msg_id_list(str, opt={})
102
- msg_id_list(str)
103
+ msg_id_list(str, opt)
103
104
  end
104
105
 
105
106
  # parse Keywords field
@@ -112,7 +113,7 @@ module MailParser
112
113
  if opt[:decode_mime_header] then
113
114
  s.map!{|i| RFC2047.decode(i, opt)}
114
115
  end
115
- s
116
+ s.map{|_| _conv(_, opt)}
116
117
  end
117
118
 
118
119
  # parse Return-Path field
@@ -145,6 +146,9 @@ module MailParser
145
146
  i += 1
146
147
  end
147
148
  end
149
+ name_val.keys.each do |k|
150
+ name_val[k] = _conv(name_val[k], opt)
151
+ end
148
152
  RFC2822::Received.new(name_val, date)
149
153
  end
150
154
 
@@ -158,13 +162,13 @@ module MailParser
158
162
  params = {}
159
163
  token.each do |param|
160
164
  pn, pv = param.join.split(/=/, 2)
161
- params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
165
+ params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
162
166
  end
163
167
  type = "text" if type.nil? or type.empty?
164
168
  if subtype.nil? or subtype.empty?
165
169
  subtype = type == "text" ? "plain" : ""
166
170
  end
167
- RFC2045::ContentType.new(type, subtype, params)
171
+ RFC2045::ContentType.new(_conv(type, opt), _conv(subtype, opt), params)
168
172
  end
169
173
 
170
174
  # parse Content-Transfer-Encoding field
@@ -172,7 +176,7 @@ module MailParser
172
176
  # @param [Hash] opt options
173
177
  # @return [MailParser::RFC2045::ContentTransferEncoding]
174
178
  def parse_content_transfer_encoding(str, opt={})
175
- RFC2045::ContentTransferEncoding.new(Tokenizer.token(str).first.to_s)
179
+ RFC2045::ContentTransferEncoding.new(_conv(Tokenizer.token(str).first.to_s, opt))
176
180
  end
177
181
 
178
182
  # parse Mime-Version field
@@ -180,7 +184,7 @@ module MailParser
180
184
  # @param [Hash] opt options
181
185
  # @return [String]
182
186
  def parse_mime_version(str, opt={})
183
- Tokenizer.token(str).join
187
+ _conv(Tokenizer.token(str).join, opt)
184
188
  end
185
189
 
186
190
  # parse Content-Disposition field
@@ -193,9 +197,9 @@ module MailParser
193
197
  params = {}
194
198
  token.each do |param|
195
199
  pn, pv = param.join.split(/=/, 2)
196
- params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
200
+ params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
197
201
  end
198
- RFC2183::ContentDisposition.new(type, params)
202
+ RFC2183::ContentDisposition.new(_conv(type, opt), params)
199
203
  end
200
204
 
201
205
  # split arry by delim
@@ -229,11 +233,11 @@ module MailParser
229
233
  if opt[:decode_mime_header] then
230
234
  display_name = RFC2047.decode(display_name, opt)
231
235
  end
232
- mailaddr = m[a1+1..a2-1].join
236
+ mailaddr = _conv(m[a1+1..a2-1].join, opt)
233
237
  local_part, domain = mailaddr.split(/@/, 2)
234
- ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), display_name)
238
+ ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), _conv(display_name, opt))
235
239
  else
236
- local_part, domain = m.join.split(/@/, 2)
240
+ local_part, domain = _conv(m.join, opt).split(/@/, 2)
237
241
  ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain))
238
242
  end
239
243
  end
@@ -243,7 +247,7 @@ module MailParser
243
247
  # parse MsgId type field
244
248
  # @param [String] str
245
249
  # @return [Array<MailParser::RFC2822::MsgId>]
246
- def msg_id_list(str)
250
+ def msg_id_list(str, opt={})
247
251
  ret = []
248
252
  flag = false
249
253
  msgid = nil
@@ -257,14 +261,14 @@ module MailParser
257
261
  when ">"
258
262
  if flag
259
263
  flag = false
260
- ret << RFC2822::MsgId.new(msgid)
264
+ ret << RFC2822::MsgId.new(_conv(msgid, opt))
261
265
  end
262
266
  else
263
267
  msgid << m if flag
264
268
  end
265
269
  end
266
270
  if ret.empty?
267
- ret = str.split.map{|s| RFC2822::MsgId.new(s)}
271
+ ret = str.split.map{|s| RFC2822::MsgId.new(_conv(s, opt))}
268
272
  end
269
273
  return ret
270
274
  end
@@ -291,9 +295,9 @@ module MailParser
291
295
  @ss.pos = pos
292
296
  token << s
293
297
  end
294
- elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/o) ||
295
- @ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}]))*\s*\]/o) ||
296
- @ss.scan(/[#{ATEXT_RE}]+/o)
298
+ elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}\x80-\xff]))*\s*\"/o) ||
299
+ @ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}\x80-\xff]))*\s*\]/o) ||
300
+ @ss.scan(/[#{ATEXT_RE}\x80-\xff]+/o)
297
301
  token << s
298
302
  else
299
303
  token << @ss.scan(/./)
@@ -337,5 +341,11 @@ module MailParser
337
341
  end
338
342
  end
339
343
 
344
+ def _conv(str, opt)
345
+ cv = opt[:charset_converter]
346
+ cs = opt[:output_charset]
347
+ cv && cs ? cv.call(cs, cs, str) : str
348
+ end
349
+
340
350
  end
341
351
  end
@@ -31,6 +31,23 @@ class TC_Loose < Test::Unit::TestCase
31
31
  end
32
32
  end
33
33
 
34
+ def test_parse_date_y10000()
35
+ tzbak = ENV["TZ"]
36
+ begin
37
+ ENV["TZ"] = "GMT"
38
+ d = parse_date("Wed, 10 Jan 10000 12:53:55 +0900")
39
+ assert_equal(9999, d.year)
40
+ assert_equal(1, d.month)
41
+ assert_equal(10, d.day)
42
+ assert_equal(3, d.hour)
43
+ assert_equal(53, d.min)
44
+ assert_equal(55, d.sec)
45
+ assert_equal("+0000", d.zone)
46
+ ensure
47
+ ENV["TZ"] = tzbak
48
+ end
49
+ end
50
+
34
51
  def test_parse_phrase_list()
35
52
  p = parse_phrase_list("abc def, ghi jkl")
36
53
  assert_equal(2, p.size)
@@ -52,6 +69,15 @@ class TC_Loose < Test::Unit::TestCase
52
69
  assert_equal("GHI JKL", p[1])
53
70
  end
54
71
 
72
+ if String.method_defined? :force_encoding
73
+ def test_parse_phrase_list_output_charset_with_raw_utf8
74
+ p = parse_phrase_list("あいう, えお", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
75
+ assert_equal(2, p.size)
76
+ assert_equal("あいう".force_encoding("utf-8"), p[0])
77
+ assert_equal("えお".force_encoding("utf-8"), p[1])
78
+ end
79
+ end
80
+
55
81
  def test_parse_received()
56
82
  tzbak = ENV["TZ"]
57
83
  begin
@@ -150,6 +176,28 @@ class TC_Loose < Test::Unit::TestCase
150
176
  end
151
177
  end
152
178
 
179
+ if String.method_defined? :force_encoding
180
+ def test_parse_received_output_charset_with_raw_utf8
181
+ tzbak = ENV["TZ"]
182
+ begin
183
+ ENV["TZ"] = "GMT"
184
+ r = parse_received("from ほげ by ふが for ぴよ; Wed, 10 Jan 2007 12:09:55 +0900", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
185
+ assert_equal(2007, r.date_time.year)
186
+ assert_equal(1, r.date_time.month)
187
+ assert_equal(10, r.date_time.day)
188
+ assert_equal(3, r.date_time.hour)
189
+ assert_equal(9, r.date_time.min)
190
+ assert_equal(55, r.date_time.sec)
191
+ assert_equal("+0000", r.date_time.zone)
192
+ assert_equal("ほげ".force_encoding("utf-8"), r.name_val["from"])
193
+ assert_equal("ふが".force_encoding("utf-8"), r.name_val["by"])
194
+ assert_equal("ぴよ".force_encoding("utf-8"), r.name_val["for"])
195
+ ensure
196
+ ENV["TZ"] = tzbak
197
+ end
198
+ end
199
+ end
200
+
153
201
  def test_parse_content_type()
154
202
  ct = parse_content_type("text/plain; charset=iso-2022-jp")
155
203
  assert_equal("text", ct.type)
@@ -190,6 +238,15 @@ class TC_Loose < Test::Unit::TestCase
190
238
  assert_equal("", ct.subtype)
191
239
  end
192
240
 
241
+ if String.method_defined? :force_encoding
242
+ def test_parse_content_type_output_charset_with_raw_utf8
243
+ ct = parse_content_type("text/plain; name=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
244
+ assert_equal("text", ct.type)
245
+ assert_equal("plain", ct.subtype)
246
+ assert_equal({"name"=>"ほげ".force_encoding("utf-8")}, ct.params)
247
+ end
248
+ end
249
+
193
250
  def test_parse_content_transfer_encoding
194
251
  cte = parse_content_transfer_encoding("7BIT")
195
252
  assert_equal "7bit", cte.mechanism
@@ -200,6 +257,13 @@ class TC_Loose < Test::Unit::TestCase
200
257
  assert_equal "", cte.mechanism
201
258
  end
202
259
 
260
+ if String.method_defined? :force_encoding
261
+ def test_parse_content_transfer_encoding_output_charset_with_raw_utf8
262
+ cte = parse_content_transfer_encoding("あいう", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
263
+ assert_equal "あいう".force_encoding("utf-8"), cte.mechanism
264
+ end
265
+ end
266
+
203
267
  def test_parse_mime_version
204
268
  assert_equal "1.0", parse_mime_version("1.0")
205
269
  assert_equal "1.0", parse_mime_version("1 . 0")
@@ -210,6 +274,12 @@ class TC_Loose < Test::Unit::TestCase
210
274
  assert_equal "", parse_mime_version("")
211
275
  end
212
276
 
277
+ if String.method_defined? :force_encoding
278
+ def test_parse_mime_version_output_charset_with_raw_utf8
279
+ assert_equal "ほげ".force_encoding("utf-8"), parse_mime_version("ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
280
+ end
281
+ end
282
+
213
283
  def test_parse_content_disposition()
214
284
  c = parse_content_disposition("attachment; filename=hoge.txt")
215
285
  assert_equal("attachment", c.type)
@@ -227,6 +297,14 @@ class TC_Loose < Test::Unit::TestCase
227
297
  assert_equal "", c.type
228
298
  end
229
299
 
300
+ if String.method_defined? :force_encoding
301
+ def test_parse_content_disposition_output_charset_with_raw_utf8
302
+ c = parse_content_disposition("attachment; filename=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
303
+ assert_equal("attachment", c.type)
304
+ assert_equal({"filename"=>"ほげ".force_encoding("utf-8")}, c.params)
305
+ end
306
+ end
307
+
230
308
  def test_parse_other_header
231
309
  s = parse("subject", "=?euc-jp?q?=A4=A2=A4=A4?=")
232
310
  assert_equal "=?euc-jp?q?=A4=A2=A4=A4?=", s
@@ -282,8 +360,21 @@ class TC_Loose < Test::Unit::TestCase
282
360
  ml = mailbox_list("hoge =?us-ascii?q?hoge?= <hoge.hoge@example.com>", {:decode_mime_header=>true, :output_charset=>"us-ascii", :charset_converter=>proc{|_,_,s| s.upcase}})
283
361
  assert_equal(1, ml.size)
284
362
  assert_equal("HOGE HOGE", ml[0].phrase)
285
- assert_equal("hoge.hoge", ml[0].addr_spec.local_part)
286
- assert_equal("example.com", ml[0].addr_spec.domain)
363
+ assert_equal("HOGE.HOGE", ml[0].addr_spec.local_part)
364
+ assert_equal("EXAMPLE.COM", ml[0].addr_spec.domain)
365
+ end
366
+
367
+ if String.method_defined? :force_encoding
368
+ def test_mailbox_list_output_charset_with_raw_utf8
369
+ ml = mailbox_list("ほげ <ほげ@ぴよ>, ふが@ぴよ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
370
+ assert_equal(2, ml.size)
371
+ assert_equal("ほげ".force_encoding("utf-8"), ml[0].phrase)
372
+ assert_equal("ほげ".force_encoding("utf-8"), ml[0].addr_spec.local_part)
373
+ assert_equal("ぴよ".force_encoding("utf-8"), ml[0].addr_spec.domain)
374
+ assert_equal("", ml[1].phrase)
375
+ assert_equal("ふが".force_encoding("utf-8"), ml[1].addr_spec.local_part)
376
+ assert_equal("ぴよ".force_encoding("utf-8"), ml[1].addr_spec.domain)
377
+ end
287
378
  end
288
379
 
289
380
  def test_msg_id_list_old_in_reply_to()
@@ -317,6 +408,13 @@ class TC_Loose < Test::Unit::TestCase
317
408
  assert_equal m, []
318
409
  end
319
410
 
411
+ if String.method_defined? :force_encoding
412
+ def test_msg_id_output_charset_with_raw_utf8
413
+ m = msg_id_list "<ほげ>", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8"
414
+ assert_equal "ほげ".force_encoding("utf-8"), m[0].msg_id
415
+ end
416
+ end
417
+
320
418
  end
321
419
 
322
420
  class TC_Loose_Tokenizer < Test::Unit::TestCase
@@ -344,6 +344,15 @@ EOS
344
344
  assert_equal("abcdefg", m.subject)
345
345
  end
346
346
 
347
+ def test_subject_raw_utf8_with_output_charset
348
+ msg = StringIO.new(<<EOS)
349
+ Subject: あいうえお
350
+
351
+ EOS
352
+ m = MailParser::Message.new(msg, :output_charset=>"utf-8")
353
+ assert_equal("あいうえお", m.subject)
354
+ end
355
+
347
356
  def test_content_type()
348
357
  msg = StringIO.new(<<EOS)
349
358
  Content-Type: text/plain; charset=us-ascii
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mailparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - TOMITA Masahiro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-01-23 00:00:00.000000000 Z
11
+ date: 2013-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mmapscanner
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  description: MailParser is a parser for mail message
@@ -64,17 +64,17 @@ require_paths:
64
64
  - lib
65
65
  required_ruby_version: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - ">="
67
+ - - '>='
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  required_rubygems_version: !ruby/object:Gem::Requirement
71
71
  requirements:
72
- - - ">="
72
+ - - '>='
73
73
  - !ruby/object:Gem::Version
74
74
  version: '0'
75
75
  requirements: []
76
76
  rubyforge_project:
77
- rubygems_version: 2.0.0.preview3.1
77
+ rubygems_version: 2.0.0
78
78
  signing_key:
79
79
  specification_version: 4
80
80
  summary: Mail Parser