mailparser 0.5.0.beta2 → 0.5.0.beta3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,9 +21,10 @@ module MailParser::RFC2047
21
21
  end
22
22
  charset_converter ||= MailParser::ConvCharset.method(:conv_charset)
23
23
  last_charset = nil
24
- ret = ""
25
- ret.force_encoding(charset) if String.method_defined? :force_encoding and charset
26
- split_decode(str) do |s, cs, raw|
24
+ words = []
25
+ mime_word = false
26
+ str.split(/(\s+)/).each do |s|
27
+ s, cs, raw = decode_word(s)
27
28
  if charset
28
29
  begin
29
30
  s = charset_converter.call(cs || charset, charset, s)
@@ -32,32 +33,37 @@ module MailParser::RFC2047
32
33
  cs = nil
33
34
  end
34
35
  end
35
- ret << " " if last_charset.nil? or cs.nil?
36
- ret << s
37
- last_charset = cs
36
+ if cs
37
+ words.pop if mime_word and words.last =~ /\A\s*\z/
38
+ mime_word = true
39
+ elsif s !~ /\A\s*\z/
40
+ mime_word = false
41
+ end
42
+ words.push s
43
+ end
44
+ begin
45
+ ret = words.join
46
+ rescue
47
+ ret = words.map{|s| s.force_encoding('binary')}.join
38
48
  end
39
- return ret.strip
49
+ ret
40
50
  end
41
51
 
42
- def split_decode(str)
43
- while str =~ /\=\?([^\(\)\<\>\@\,\;\:\"\/\[\]\?\.\=]+)\?([QB])\?([^\? ]+)\?\=/i do
44
- raw = $&
45
- pre, charset, encoding, enc_text, after = $`, $1.downcase, $2.downcase, $3, $'
46
- s = pre.strip
47
- yield s, nil, raw unless s.empty?
48
- s = encoding == "q" ? q_decode(enc_text) : b_decode(enc_text)
52
+ def decode_word(str)
53
+ charset = nil
54
+ if str =~ /\=\?([^\(\)\<\>\@\,\;\:\"\/\[\]\?\.\=]+)\?([QB])\?([^\? ]+)\?\=/i
55
+ charset, encoding, enc_text = $1.downcase, $2.downcase, $3
56
+ raw = str
57
+ str = encoding == "q" ? q_decode(enc_text) : b_decode(enc_text)
49
58
  if String.method_defined? :force_encoding
50
59
  begin
51
- s.force_encoding(charset)
60
+ str.force_encoding(charset)
52
61
  rescue
53
- s.force_encoding('ascii-8bit')
62
+ str.force_encoding('ascii-8bit')
54
63
  end
55
64
  end
56
- yield s, charset, raw
57
- str = after
58
65
  end
59
- s = str.strip
60
- yield s, nil, raw unless s.empty?
66
+ [str, charset, raw]
61
67
  end
62
68
 
63
69
  def q_decode(str)
data/lib/mailparser.rb CHANGED
@@ -78,7 +78,7 @@ module MailParser
78
78
  @parsed = Loose.parse(@name, @raw, @opt)
79
79
  end
80
80
  else
81
- r = @raw.chomp.gsub(/\s+/, " ")
81
+ r = @raw.chomp.gsub(/\r?\n/, '').gsub(/\t/, ' ')
82
82
  if @opt[:decode_mime_header] then
83
83
  @parsed = RFC2047.decode(r, @opt)
84
84
  else
@@ -257,13 +257,21 @@ EOS
257
257
  def test_subject_multi_line()
258
258
  msg = StringIO.new(<<EOS)
259
259
  Subject: This is a pen.
260
- Is this a pen?
260
+ Is this a pen?
261
261
 
262
262
  EOS
263
263
  m = MailParser::Message.new(msg)
264
264
  assert_equal("This is a pen. Is this a pen?", m.subject)
265
265
  end
266
266
 
267
+ def test_subject_keep_multi_space
268
+ msg = StringIO.new(<<EOS)
269
+ Subject: This is a pen.
270
+ EOS
271
+ m = MailParser::Message.new(msg)
272
+ assert_equal("This is a pen. ", m.subject)
273
+ end
274
+
267
275
  def test_subject_multi_header()
268
276
  msg = StringIO.new(<<EOS)
269
277
  Subject: This is a pen.
@@ -292,6 +300,22 @@ EOS
292
300
  assert_equal("This is a pen.", m.subject)
293
301
  end
294
302
 
303
+ def test_subject_mime_decode_keep_multi_space
304
+ msg = StringIO.new(<<EOS)
305
+ Subject: This is a pen.
306
+ EOS
307
+ m = MailParser::Message.new(msg, :decode_mime_header=>true)
308
+ assert_equal("This is a pen. ", m.subject)
309
+ end
310
+
311
+ def test_subject_mime_decode_keep_multi_space2
312
+ msg = StringIO.new(<<EOS)
313
+ Subject: abcdefg =?us-ascii?q?hoge?= =?us-ascii?q?fuga?=
314
+ EOS
315
+ m = MailParser::Message.new(msg, :decode_mime_header=>true)
316
+ assert_equal("abcdefg hogefuga ", m.subject)
317
+ end
318
+
295
319
  def test_subject_mime_decode_charset()
296
320
  msg = StringIO.new(<<EOS)
297
321
  Subject: =?iso-2022-jp?b?GyRCJCIkJCQmJCgkKhsoQg==?=
data/test/test_rfc2047.rb CHANGED
@@ -38,20 +38,18 @@ class TC_RFC2047 < Test::Unit::TestCase
38
38
  assert_equal("とみた", MailParser::RFC2047.b_decode("44Go 44 G/4 4Gf"))
39
39
  end
40
40
 
41
- def test_split_decode_q_ascii()
42
- MailParser::RFC2047.split_decode("=?us-ascii?q?hoge?=") do |s, cs, raw |
43
- assert_equal 'hoge', s
44
- assert_equal 'us-ascii', cs
45
- assert_equal '=?us-ascii?q?hoge?=', raw
46
- end
41
+ def test_decode_word()
42
+ s, cs, raw = MailParser::RFC2047.decode_word("=?us-ascii?q?hoge?=")
43
+ assert_equal 'hoge', s
44
+ assert_equal 'us-ascii', cs
45
+ assert_equal '=?us-ascii?q?hoge?=', raw
47
46
  end
48
47
 
49
- def test_split_decode_q_ascii_upcase()
50
- MailParser::RFC2047.split_decode("=?US-ASCII?Q?hoge?=") do |s, cs, raw|
51
- assert_equal 'hoge', s
52
- assert_equal 'us-ascii', cs
53
- assert_equal '=?US-ASCII?Q?hoge?=', raw
54
- end
48
+ def test_decode_word_upcase()
49
+ s, cs, raw = MailParser::RFC2047.decode_word("=?US-ASCII?Q?hoge?=")
50
+ assert_equal 'hoge', s
51
+ assert_equal 'us-ascii', cs
52
+ assert_equal '=?US-ASCII?Q?hoge?=', raw
55
53
  end
56
54
 
57
55
  def test_decode_q_ascii()
@@ -121,6 +119,8 @@ class TC_RFC2047 < Test::Unit::TestCase
121
119
  s = MailParser::RFC2047.decode("=?sjis?b?h0A=?=", "UTF-8")
122
120
  if String.method_defined? :force_encoding
123
121
  assert_equal("\xe2\x91\xa0".force_encoding('utf-8'), s)
122
+ else
123
+ assert_equal("\xe2\x91\xa0", s)
124
124
  end
125
125
  end
126
126
 
@@ -128,9 +128,16 @@ class TC_RFC2047 < Test::Unit::TestCase
128
128
  s = MailParser::RFC2047.decode("=?iso-2022-jp?b?GyRCLSEbKEI=?=", "UTF-8")
129
129
  if String.method_defined? :force_encoding
130
130
  assert_equal("\xe2\x91\xa0".force_encoding('utf-8'), s)
131
+ else
132
+ assert_equal("\xe2\x91\xa0", s)
131
133
  end
132
134
  end
133
135
 
136
+ def test_decode_different_charset
137
+ s = MailParser::RFC2047.decode("=?iso-2022-jp?b?GyRCJCIbKEI=?= =?utf-8?b?44GE?=")
138
+ assert_equal("\e$B$\"\e(B\xE3\x81\x84", s)
139
+ end
140
+
134
141
  def test_decode_charset_converter()
135
142
  proc = Proc.new{|f,t,s| s.gsub(/o/, "X")}
136
143
  s = MailParser::RFC2047.decode("=?us-ascii?q?hoge?=", :output_charset=>"utf-8", :charset_converter=>proc)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mailparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0.beta2
4
+ version: 0.5.0.beta3
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-18 00:00:00.000000000 Z
12
+ date: 2012-09-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mmapscanner