mailparser 0.5.0.beta2 → 0.5.0.beta3

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,9 +21,10 @@ module MailParser::RFC2047
21
21
  end
22
22
  charset_converter ||= MailParser::ConvCharset.method(:conv_charset)
23
23
  last_charset = nil
24
- ret = ""
25
- ret.force_encoding(charset) if String.method_defined? :force_encoding and charset
26
- split_decode(str) do |s, cs, raw|
24
+ words = []
25
+ mime_word = false
26
+ str.split(/(\s+)/).each do |s|
27
+ s, cs, raw = decode_word(s)
27
28
  if charset
28
29
  begin
29
30
  s = charset_converter.call(cs || charset, charset, s)
@@ -32,32 +33,37 @@ module MailParser::RFC2047
32
33
  cs = nil
33
34
  end
34
35
  end
35
- ret << " " if last_charset.nil? or cs.nil?
36
- ret << s
37
- last_charset = cs
36
+ if cs
37
+ words.pop if mime_word and words.last =~ /\A\s*\z/
38
+ mime_word = true
39
+ elsif s !~ /\A\s*\z/
40
+ mime_word = false
41
+ end
42
+ words.push s
43
+ end
44
+ begin
45
+ ret = words.join
46
+ rescue
47
+ ret = words.map{|s| s.force_encoding('binary')}.join
38
48
  end
39
- return ret.strip
49
+ ret
40
50
  end
41
51
 
42
- def split_decode(str)
43
- while str =~ /\=\?([^\(\)\<\>\@\,\;\:\"\/\[\]\?\.\=]+)\?([QB])\?([^\? ]+)\?\=/i do
44
- raw = $&
45
- pre, charset, encoding, enc_text, after = $`, $1.downcase, $2.downcase, $3, $'
46
- s = pre.strip
47
- yield s, nil, raw unless s.empty?
48
- s = encoding == "q" ? q_decode(enc_text) : b_decode(enc_text)
52
+ def decode_word(str)
53
+ charset = nil
54
+ if str =~ /\=\?([^\(\)\<\>\@\,\;\:\"\/\[\]\?\.\=]+)\?([QB])\?([^\? ]+)\?\=/i
55
+ charset, encoding, enc_text = $1.downcase, $2.downcase, $3
56
+ raw = str
57
+ str = encoding == "q" ? q_decode(enc_text) : b_decode(enc_text)
49
58
  if String.method_defined? :force_encoding
50
59
  begin
51
- s.force_encoding(charset)
60
+ str.force_encoding(charset)
52
61
  rescue
53
- s.force_encoding('ascii-8bit')
62
+ str.force_encoding('ascii-8bit')
54
63
  end
55
64
  end
56
- yield s, charset, raw
57
- str = after
58
65
  end
59
- s = str.strip
60
- yield s, nil, raw unless s.empty?
66
+ [str, charset, raw]
61
67
  end
62
68
 
63
69
  def q_decode(str)
data/lib/mailparser.rb CHANGED
@@ -78,7 +78,7 @@ module MailParser
78
78
  @parsed = Loose.parse(@name, @raw, @opt)
79
79
  end
80
80
  else
81
- r = @raw.chomp.gsub(/\s+/, " ")
81
+ r = @raw.chomp.gsub(/\r?\n/, '').gsub(/\t/, ' ')
82
82
  if @opt[:decode_mime_header] then
83
83
  @parsed = RFC2047.decode(r, @opt)
84
84
  else
@@ -257,13 +257,21 @@ EOS
257
257
  def test_subject_multi_line()
258
258
  msg = StringIO.new(<<EOS)
259
259
  Subject: This is a pen.
260
- Is this a pen?
260
+ Is this a pen?
261
261
 
262
262
  EOS
263
263
  m = MailParser::Message.new(msg)
264
264
  assert_equal("This is a pen. Is this a pen?", m.subject)
265
265
  end
266
266
 
267
+ def test_subject_keep_multi_space
268
+ msg = StringIO.new(<<EOS)
269
+ Subject: This is a pen.
270
+ EOS
271
+ m = MailParser::Message.new(msg)
272
+ assert_equal("This is a pen. ", m.subject)
273
+ end
274
+
267
275
  def test_subject_multi_header()
268
276
  msg = StringIO.new(<<EOS)
269
277
  Subject: This is a pen.
@@ -292,6 +300,22 @@ EOS
292
300
  assert_equal("This is a pen.", m.subject)
293
301
  end
294
302
 
303
+ def test_subject_mime_decode_keep_multi_space
304
+ msg = StringIO.new(<<EOS)
305
+ Subject: This is a pen.
306
+ EOS
307
+ m = MailParser::Message.new(msg, :decode_mime_header=>true)
308
+ assert_equal("This is a pen. ", m.subject)
309
+ end
310
+
311
+ def test_subject_mime_decode_keep_multi_space2
312
+ msg = StringIO.new(<<EOS)
313
+ Subject: abcdefg =?us-ascii?q?hoge?= =?us-ascii?q?fuga?=
314
+ EOS
315
+ m = MailParser::Message.new(msg, :decode_mime_header=>true)
316
+ assert_equal("abcdefg hogefuga ", m.subject)
317
+ end
318
+
295
319
  def test_subject_mime_decode_charset()
296
320
  msg = StringIO.new(<<EOS)
297
321
  Subject: =?iso-2022-jp?b?GyRCJCIkJCQmJCgkKhsoQg==?=
data/test/test_rfc2047.rb CHANGED
@@ -38,20 +38,18 @@ class TC_RFC2047 < Test::Unit::TestCase
38
38
  assert_equal("とみた", MailParser::RFC2047.b_decode("44Go 44 G/4 4Gf"))
39
39
  end
40
40
 
41
- def test_split_decode_q_ascii()
42
- MailParser::RFC2047.split_decode("=?us-ascii?q?hoge?=") do |s, cs, raw |
43
- assert_equal 'hoge', s
44
- assert_equal 'us-ascii', cs
45
- assert_equal '=?us-ascii?q?hoge?=', raw
46
- end
41
+ def test_decode_word()
42
+ s, cs, raw = MailParser::RFC2047.decode_word("=?us-ascii?q?hoge?=")
43
+ assert_equal 'hoge', s
44
+ assert_equal 'us-ascii', cs
45
+ assert_equal '=?us-ascii?q?hoge?=', raw
47
46
  end
48
47
 
49
- def test_split_decode_q_ascii_upcase()
50
- MailParser::RFC2047.split_decode("=?US-ASCII?Q?hoge?=") do |s, cs, raw|
51
- assert_equal 'hoge', s
52
- assert_equal 'us-ascii', cs
53
- assert_equal '=?US-ASCII?Q?hoge?=', raw
54
- end
48
+ def test_decode_word_upcase()
49
+ s, cs, raw = MailParser::RFC2047.decode_word("=?US-ASCII?Q?hoge?=")
50
+ assert_equal 'hoge', s
51
+ assert_equal 'us-ascii', cs
52
+ assert_equal '=?US-ASCII?Q?hoge?=', raw
55
53
  end
56
54
 
57
55
  def test_decode_q_ascii()
@@ -121,6 +119,8 @@ class TC_RFC2047 < Test::Unit::TestCase
121
119
  s = MailParser::RFC2047.decode("=?sjis?b?h0A=?=", "UTF-8")
122
120
  if String.method_defined? :force_encoding
123
121
  assert_equal("\xe2\x91\xa0".force_encoding('utf-8'), s)
122
+ else
123
+ assert_equal("\xe2\x91\xa0", s)
124
124
  end
125
125
  end
126
126
 
@@ -128,9 +128,16 @@ class TC_RFC2047 < Test::Unit::TestCase
128
128
  s = MailParser::RFC2047.decode("=?iso-2022-jp?b?GyRCLSEbKEI=?=", "UTF-8")
129
129
  if String.method_defined? :force_encoding
130
130
  assert_equal("\xe2\x91\xa0".force_encoding('utf-8'), s)
131
+ else
132
+ assert_equal("\xe2\x91\xa0", s)
131
133
  end
132
134
  end
133
135
 
136
+ def test_decode_different_charset
137
+ s = MailParser::RFC2047.decode("=?iso-2022-jp?b?GyRCJCIbKEI=?= =?utf-8?b?44GE?=")
138
+ assert_equal("\e$B$\"\e(B\xE3\x81\x84", s)
139
+ end
140
+
134
141
  def test_decode_charset_converter()
135
142
  proc = Proc.new{|f,t,s| s.gsub(/o/, "X")}
136
143
  s = MailParser::RFC2047.decode("=?us-ascii?q?hoge?=", :output_charset=>"utf-8", :charset_converter=>proc)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mailparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0.beta2
4
+ version: 0.5.0.beta3
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-18 00:00:00.000000000 Z
12
+ date: 2012-09-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mmapscanner