mailparser 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: acab11e0f5b50260e17f32e1fea879d8505fff45
4
- data.tar.gz: f0ce38c8bb40d11a3332515b4aabb7d835cb692f
5
- !binary "U0hBNTEy":
6
- metadata.gz: 4edf18f3211be9fb2d2114db16265fd728a4f5a69eb1d96ab9a7fc430e0cdcc740f1d3b00ae04290a6203b611cfcd0b22531ac8c5962d7a513798bbdda57c53d
7
- data.tar.gz: ee05e15d52db8517094e5c9189787c7e3f557a5c17c9d2dd8a8d3868111e671e086121296b1d13493e9ee6d43fc0ab6576fcd35bd3dda4b39dbfb91451904a95
2
+ SHA1:
3
+ metadata.gz: acf77838874db7d6c17b1e85420b5027eaea07c5
4
+ data.tar.gz: b67ae4ea3374d66de3cb11f40a5fe25074e682d6
5
+ SHA512:
6
+ metadata.gz: 790f81bfd597d4b209efb81a29ffa6356484ab5b8570140b16dac2b0e33982269df2a0fa11f16362c06e198c759eef8b087d17a3a50e2967ef4daea6710d9a97
7
+ data.tar.gz: 2f2502e6975c84472da8675c4c42d65056e1ec4f3845a2ebe8a5477a3162f4ecaa0f43c37b729da9c189ca5dd12609704bac7c2dba002d095cabe2266ca931ff
@@ -81,6 +81,8 @@ module MailParser
81
81
  r = @raw.chomp.gsub(/\r?\n/, '').gsub(/\t/, ' ')
82
82
  if @opt[:decode_mime_header] then
83
83
  @parsed = RFC2047.decode(r, @opt)
84
+ elsif @opt[:output_charset]
85
+ @parsed = @opt[:charset_converter].call(@opt[:output_charset], @opt[:output_charset], r)
84
86
  else
85
87
  @parsed = r
86
88
  end
@@ -67,7 +67,8 @@ module MailParser
67
67
  rescue
68
68
  t = Time.now
69
69
  end
70
- return RFC2822::DateTime.new(t.year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
70
+ year = t.year > 9999 ? 9999 : t.year
71
+ return RFC2822::DateTime.new(year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
71
72
  end
72
73
 
73
74
  # parse From, To,Cc field
@@ -91,7 +92,7 @@ module MailParser
91
92
  # @param [Hash] opt options
92
93
  # @return [MailParser::RFC2822::MsgId]
93
94
  def parse_msg_id(str, opt={})
94
- msg_id_list(str)[0]
95
+ msg_id_list(str, opt)[0]
95
96
  end
96
97
 
97
98
  # parse In-Reply-To, References field
@@ -99,7 +100,7 @@ module MailParser
99
100
  # @param [Hash] opt options
100
101
  # @return [MailParser::RFC2822::MsgIdList]
101
102
  def parse_msg_id_list(str, opt={})
102
- msg_id_list(str)
103
+ msg_id_list(str, opt)
103
104
  end
104
105
 
105
106
  # parse Keywords field
@@ -112,7 +113,7 @@ module MailParser
112
113
  if opt[:decode_mime_header] then
113
114
  s.map!{|i| RFC2047.decode(i, opt)}
114
115
  end
115
- s
116
+ s.map{|_| _conv(_, opt)}
116
117
  end
117
118
 
118
119
  # parse Return-Path field
@@ -145,6 +146,9 @@ module MailParser
145
146
  i += 1
146
147
  end
147
148
  end
149
+ name_val.keys.each do |k|
150
+ name_val[k] = _conv(name_val[k], opt)
151
+ end
148
152
  RFC2822::Received.new(name_val, date)
149
153
  end
150
154
 
@@ -158,13 +162,13 @@ module MailParser
158
162
  params = {}
159
163
  token.each do |param|
160
164
  pn, pv = param.join.split(/=/, 2)
161
- params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
165
+ params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
162
166
  end
163
167
  type = "text" if type.nil? or type.empty?
164
168
  if subtype.nil? or subtype.empty?
165
169
  subtype = type == "text" ? "plain" : ""
166
170
  end
167
- RFC2045::ContentType.new(type, subtype, params)
171
+ RFC2045::ContentType.new(_conv(type, opt), _conv(subtype, opt), params)
168
172
  end
169
173
 
170
174
  # parse Content-Transfer-Encoding field
@@ -172,7 +176,7 @@ module MailParser
172
176
  # @param [Hash] opt options
173
177
  # @return [MailParser::RFC2045::ContentTransferEncoding]
174
178
  def parse_content_transfer_encoding(str, opt={})
175
- RFC2045::ContentTransferEncoding.new(Tokenizer.token(str).first.to_s)
179
+ RFC2045::ContentTransferEncoding.new(_conv(Tokenizer.token(str).first.to_s, opt))
176
180
  end
177
181
 
178
182
  # parse Mime-Version field
@@ -180,7 +184,7 @@ module MailParser
180
184
  # @param [Hash] opt options
181
185
  # @return [String]
182
186
  def parse_mime_version(str, opt={})
183
- Tokenizer.token(str).join
187
+ _conv(Tokenizer.token(str).join, opt)
184
188
  end
185
189
 
186
190
  # parse Content-Disposition field
@@ -193,9 +197,9 @@ module MailParser
193
197
  params = {}
194
198
  token.each do |param|
195
199
  pn, pv = param.join.split(/=/, 2)
196
- params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
200
+ params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
197
201
  end
198
- RFC2183::ContentDisposition.new(type, params)
202
+ RFC2183::ContentDisposition.new(_conv(type, opt), params)
199
203
  end
200
204
 
201
205
  # split arry by delim
@@ -229,11 +233,11 @@ module MailParser
229
233
  if opt[:decode_mime_header] then
230
234
  display_name = RFC2047.decode(display_name, opt)
231
235
  end
232
- mailaddr = m[a1+1..a2-1].join
236
+ mailaddr = _conv(m[a1+1..a2-1].join, opt)
233
237
  local_part, domain = mailaddr.split(/@/, 2)
234
- ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), display_name)
238
+ ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), _conv(display_name, opt))
235
239
  else
236
- local_part, domain = m.join.split(/@/, 2)
240
+ local_part, domain = _conv(m.join, opt).split(/@/, 2)
237
241
  ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain))
238
242
  end
239
243
  end
@@ -243,7 +247,7 @@ module MailParser
243
247
  # parse MsgId type field
244
248
  # @param [String] str
245
249
  # @return [Array<MailParser::RFC2822::MsgId>]
246
- def msg_id_list(str)
250
+ def msg_id_list(str, opt={})
247
251
  ret = []
248
252
  flag = false
249
253
  msgid = nil
@@ -257,14 +261,14 @@ module MailParser
257
261
  when ">"
258
262
  if flag
259
263
  flag = false
260
- ret << RFC2822::MsgId.new(msgid)
264
+ ret << RFC2822::MsgId.new(_conv(msgid, opt))
261
265
  end
262
266
  else
263
267
  msgid << m if flag
264
268
  end
265
269
  end
266
270
  if ret.empty?
267
- ret = str.split.map{|s| RFC2822::MsgId.new(s)}
271
+ ret = str.split.map{|s| RFC2822::MsgId.new(_conv(s, opt))}
268
272
  end
269
273
  return ret
270
274
  end
@@ -291,9 +295,9 @@ module MailParser
291
295
  @ss.pos = pos
292
296
  token << s
293
297
  end
294
- elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/o) ||
295
- @ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}]))*\s*\]/o) ||
296
- @ss.scan(/[#{ATEXT_RE}]+/o)
298
+ elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}\x80-\xff]))*\s*\"/o) ||
299
+ @ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}\x80-\xff]))*\s*\]/o) ||
300
+ @ss.scan(/[#{ATEXT_RE}\x80-\xff]+/o)
297
301
  token << s
298
302
  else
299
303
  token << @ss.scan(/./)
@@ -337,5 +341,11 @@ module MailParser
337
341
  end
338
342
  end
339
343
 
344
+ def _conv(str, opt)
345
+ cv = opt[:charset_converter]
346
+ cs = opt[:output_charset]
347
+ cv && cs ? cv.call(cs, cs, str) : str
348
+ end
349
+
340
350
  end
341
351
  end
@@ -31,6 +31,23 @@ class TC_Loose < Test::Unit::TestCase
31
31
  end
32
32
  end
33
33
 
34
+ def test_parse_date_y10000()
35
+ tzbak = ENV["TZ"]
36
+ begin
37
+ ENV["TZ"] = "GMT"
38
+ d = parse_date("Wed, 10 Jan 10000 12:53:55 +0900")
39
+ assert_equal(9999, d.year)
40
+ assert_equal(1, d.month)
41
+ assert_equal(10, d.day)
42
+ assert_equal(3, d.hour)
43
+ assert_equal(53, d.min)
44
+ assert_equal(55, d.sec)
45
+ assert_equal("+0000", d.zone)
46
+ ensure
47
+ ENV["TZ"] = tzbak
48
+ end
49
+ end
50
+
34
51
  def test_parse_phrase_list()
35
52
  p = parse_phrase_list("abc def, ghi jkl")
36
53
  assert_equal(2, p.size)
@@ -52,6 +69,15 @@ class TC_Loose < Test::Unit::TestCase
52
69
  assert_equal("GHI JKL", p[1])
53
70
  end
54
71
 
72
+ if String.method_defined? :force_encoding
73
+ def test_parse_phrase_list_output_charset_with_raw_utf8
74
+ p = parse_phrase_list("あいう, えお", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
75
+ assert_equal(2, p.size)
76
+ assert_equal("あいう".force_encoding("utf-8"), p[0])
77
+ assert_equal("えお".force_encoding("utf-8"), p[1])
78
+ end
79
+ end
80
+
55
81
  def test_parse_received()
56
82
  tzbak = ENV["TZ"]
57
83
  begin
@@ -150,6 +176,28 @@ class TC_Loose < Test::Unit::TestCase
150
176
  end
151
177
  end
152
178
 
179
+ if String.method_defined? :force_encoding
180
+ def test_parse_received_output_charset_with_raw_utf8
181
+ tzbak = ENV["TZ"]
182
+ begin
183
+ ENV["TZ"] = "GMT"
184
+ r = parse_received("from ほげ by ふが for ぴよ; Wed, 10 Jan 2007 12:09:55 +0900", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
185
+ assert_equal(2007, r.date_time.year)
186
+ assert_equal(1, r.date_time.month)
187
+ assert_equal(10, r.date_time.day)
188
+ assert_equal(3, r.date_time.hour)
189
+ assert_equal(9, r.date_time.min)
190
+ assert_equal(55, r.date_time.sec)
191
+ assert_equal("+0000", r.date_time.zone)
192
+ assert_equal("ほげ".force_encoding("utf-8"), r.name_val["from"])
193
+ assert_equal("ふが".force_encoding("utf-8"), r.name_val["by"])
194
+ assert_equal("ぴよ".force_encoding("utf-8"), r.name_val["for"])
195
+ ensure
196
+ ENV["TZ"] = tzbak
197
+ end
198
+ end
199
+ end
200
+
153
201
  def test_parse_content_type()
154
202
  ct = parse_content_type("text/plain; charset=iso-2022-jp")
155
203
  assert_equal("text", ct.type)
@@ -190,6 +238,15 @@ class TC_Loose < Test::Unit::TestCase
190
238
  assert_equal("", ct.subtype)
191
239
  end
192
240
 
241
+ if String.method_defined? :force_encoding
242
+ def test_parse_content_type_output_charset_with_raw_utf8
243
+ ct = parse_content_type("text/plain; name=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
244
+ assert_equal("text", ct.type)
245
+ assert_equal("plain", ct.subtype)
246
+ assert_equal({"name"=>"ほげ".force_encoding("utf-8")}, ct.params)
247
+ end
248
+ end
249
+
193
250
  def test_parse_content_transfer_encoding
194
251
  cte = parse_content_transfer_encoding("7BIT")
195
252
  assert_equal "7bit", cte.mechanism
@@ -200,6 +257,13 @@ class TC_Loose < Test::Unit::TestCase
200
257
  assert_equal "", cte.mechanism
201
258
  end
202
259
 
260
+ if String.method_defined? :force_encoding
261
+ def test_parse_content_transfer_encoding_output_charset_with_raw_utf8
262
+ cte = parse_content_transfer_encoding("あいう", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
263
+ assert_equal "あいう".force_encoding("utf-8"), cte.mechanism
264
+ end
265
+ end
266
+
203
267
  def test_parse_mime_version
204
268
  assert_equal "1.0", parse_mime_version("1.0")
205
269
  assert_equal "1.0", parse_mime_version("1 . 0")
@@ -210,6 +274,12 @@ class TC_Loose < Test::Unit::TestCase
210
274
  assert_equal "", parse_mime_version("")
211
275
  end
212
276
 
277
+ if String.method_defined? :force_encoding
278
+ def test_parse_mime_version_output_charset_with_raw_utf8
279
+ assert_equal "ほげ".force_encoding("utf-8"), parse_mime_version("ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
280
+ end
281
+ end
282
+
213
283
  def test_parse_content_disposition()
214
284
  c = parse_content_disposition("attachment; filename=hoge.txt")
215
285
  assert_equal("attachment", c.type)
@@ -227,6 +297,14 @@ class TC_Loose < Test::Unit::TestCase
227
297
  assert_equal "", c.type
228
298
  end
229
299
 
300
+ if String.method_defined? :force_encoding
301
+ def test_parse_content_disposition_output_charset_with_raw_utf8
302
+ c = parse_content_disposition("attachment; filename=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
303
+ assert_equal("attachment", c.type)
304
+ assert_equal({"filename"=>"ほげ".force_encoding("utf-8")}, c.params)
305
+ end
306
+ end
307
+
230
308
  def test_parse_other_header
231
309
  s = parse("subject", "=?euc-jp?q?=A4=A2=A4=A4?=")
232
310
  assert_equal "=?euc-jp?q?=A4=A2=A4=A4?=", s
@@ -282,8 +360,21 @@ class TC_Loose < Test::Unit::TestCase
282
360
  ml = mailbox_list("hoge =?us-ascii?q?hoge?= <hoge.hoge@example.com>", {:decode_mime_header=>true, :output_charset=>"us-ascii", :charset_converter=>proc{|_,_,s| s.upcase}})
283
361
  assert_equal(1, ml.size)
284
362
  assert_equal("HOGE HOGE", ml[0].phrase)
285
- assert_equal("hoge.hoge", ml[0].addr_spec.local_part)
286
- assert_equal("example.com", ml[0].addr_spec.domain)
363
+ assert_equal("HOGE.HOGE", ml[0].addr_spec.local_part)
364
+ assert_equal("EXAMPLE.COM", ml[0].addr_spec.domain)
365
+ end
366
+
367
+ if String.method_defined? :force_encoding
368
+ def test_mailbox_list_output_charset_with_raw_utf8
369
+ ml = mailbox_list("ほげ <ほげ@ぴよ>, ふが@ぴよ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
370
+ assert_equal(2, ml.size)
371
+ assert_equal("ほげ".force_encoding("utf-8"), ml[0].phrase)
372
+ assert_equal("ほげ".force_encoding("utf-8"), ml[0].addr_spec.local_part)
373
+ assert_equal("ぴよ".force_encoding("utf-8"), ml[0].addr_spec.domain)
374
+ assert_equal("", ml[1].phrase)
375
+ assert_equal("ふが".force_encoding("utf-8"), ml[1].addr_spec.local_part)
376
+ assert_equal("ぴよ".force_encoding("utf-8"), ml[1].addr_spec.domain)
377
+ end
287
378
  end
288
379
 
289
380
  def test_msg_id_list_old_in_reply_to()
@@ -317,6 +408,13 @@ class TC_Loose < Test::Unit::TestCase
317
408
  assert_equal m, []
318
409
  end
319
410
 
411
+ if String.method_defined? :force_encoding
412
+ def test_msg_id_output_charset_with_raw_utf8
413
+ m = msg_id_list "<ほげ>", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8"
414
+ assert_equal "ほげ".force_encoding("utf-8"), m[0].msg_id
415
+ end
416
+ end
417
+
320
418
  end
321
419
 
322
420
  class TC_Loose_Tokenizer < Test::Unit::TestCase
@@ -344,6 +344,15 @@ EOS
344
344
  assert_equal("abcdefg", m.subject)
345
345
  end
346
346
 
347
+ def test_subject_raw_utf8_with_output_charset
348
+ msg = StringIO.new(<<EOS)
349
+ Subject: あいうえお
350
+
351
+ EOS
352
+ m = MailParser::Message.new(msg, :output_charset=>"utf-8")
353
+ assert_equal("あいうえお", m.subject)
354
+ end
355
+
347
356
  def test_content_type()
348
357
  msg = StringIO.new(<<EOS)
349
358
  Content-Type: text/plain; charset=us-ascii
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mailparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - TOMITA Masahiro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-01-23 00:00:00.000000000 Z
11
+ date: 2013-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mmapscanner
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  description: MailParser is a parser for mail message
@@ -64,17 +64,17 @@ require_paths:
64
64
  - lib
65
65
  required_ruby_version: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - ">="
67
+ - - '>='
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  required_rubygems_version: !ruby/object:Gem::Requirement
71
71
  requirements:
72
- - - ">="
72
+ - - '>='
73
73
  - !ruby/object:Gem::Version
74
74
  version: '0'
75
75
  requirements: []
76
76
  rubyforge_project:
77
- rubygems_version: 2.0.0.preview3.1
77
+ rubygems_version: 2.0.0
78
78
  signing_key:
79
79
  specification_version: 4
80
80
  summary: Mail Parser