mail-sanitizer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 658fd411823dcad053eede5f1c678558c216e60a3719c6e027051594734031f4
4
- data.tar.gz: ed9a14a70b8d9453f5c0ba23959d0c85926d369f1daf67e0db8025074bcfd164
3
+ metadata.gz: 5db6a4447245ea29e1e3926f92f7d8f6b0a12859077c5ce3a5a1f835a60de426
4
+ data.tar.gz: 7c63c81f47f55c08c369261ae5ca23e09d073b8e5b22d7b0f72187b782f8d76a
5
5
  SHA512:
6
- metadata.gz: acfa6e0931da027d2c016735f650f809f1e90c89596c41cc03be9c1052d2b58818482e75450b4cd2f81a84453e6e0fd629bd33ef49576853aede72b699ff5f51
7
- data.tar.gz: 6254de9db9f514f511fa9b2df7947a819409faea88a1ac2228a308cb8407479f4d5c4d438604a24e2554c7058b15abf2d75c7db6f84898c822a45e432d758f3a
6
+ metadata.gz: 46b1d1bf9e1095c29229b17d43394ed60f1087004e6892b0a35828fb517de2ca4e4374afec404cbadcc993df73d511604e304b1504196da163588576434b45bc
7
+ data.tar.gz: 3ce2b4ab7c2714fe91468eb1d5b6f121318e801e6017a19e922eadd2b4e5455e709211372c369903b7d3ee3ec89627ce3b825f01929ddcdcf4fcc7b6db9a2e71
@@ -2,7 +2,7 @@ module Mail
2
2
  module Sanitizer
3
3
  class Constant
4
4
  QUOT_SYMBOL_PATTERN = /^>/
5
- QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
5
+ QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby|beginforwardedmessage/
6
6
  QUOT_DATETIME_PATTERN = /^(on|at)/
7
7
  QUOT_KEYWORD_SET = [
8
8
  ['from:', 'sent:', 'to:', 'subject:'],
@@ -7,6 +7,7 @@ module Mail
7
7
 
8
8
  def initialize(body)
9
9
  @src = body.dup
10
+ @src = @src.encode('utf-8', invalid: :replace, undef: :replace) unless @src.nil?
10
11
  @sanitized_body = nil
11
12
  @quot = nil
12
13
  @sign = nil
@@ -48,14 +49,18 @@ module Mail
48
49
  numrow = lines.size
49
50
 
50
51
  line_types = Array.new(numrow, :normal)
51
- sign = false
52
+ sign_index = nil
52
53
  look_sign_symbol = false
53
54
  lines.each_with_index.reverse_each do |line, i|
54
55
  if line =~ Mail::Sanitizer::Constant::QUOT_SYMBOL_PATTERN
55
56
  line_types[i] = :quot
56
57
  elsif line =~ Mail::Sanitizer::Constant::SIGN_PATTERN
57
- line_types[i] = :sign
58
- sign = !sign
58
+ if sign_index.nil?
59
+ sign_index = i
60
+ else
61
+ line_types[i, sign_index - i + 1] = Array.new(sign_index - i + 1, :sign)
62
+ sign_index = nil
63
+ end
59
64
  elsif line =~ Mail::Sanitizer::Constant::SIGN_SYMBOL_PATTERN
60
65
  next if look_sign_symbol
61
66
  if line_types[i..-1].include?(:quot)
@@ -67,8 +72,6 @@ module Mail
67
72
  look_sign_symbol = true
68
73
  elsif line =~ /^[[:space:]]*$/
69
74
  line_types[i] = nil
70
- elsif sign
71
- line_types[i] = :sign
72
75
  end
73
76
  end
74
77
 
@@ -93,10 +96,9 @@ module Mail
93
96
  break if keywords.values.all?
94
97
  end
95
98
 
96
- downcased_line = Mail::Sanitizer::String.downcase(line)
97
- if keywords.values.all? || downcased_line =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
98
- (downcased_line =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(line)) ||
99
- (Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
99
+ if keywords.values.all? ||
100
+ Mail::Sanitizer::String.quot_pattern?(line) ||
101
+ Mail::Sanitizer::String.include_datetime_and_email_address?(line)
100
102
  line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
101
103
  break
102
104
  end
@@ -1,7 +1,7 @@
1
1
  module Mail
2
2
  module Sanitizer
3
3
  class String
4
- ADDRESS_REGEXP = /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i
4
+ ADDRESS_REGEXP = /([a-zA-Z0-9_!#$%&`'"*+\-{|}~^\/=?\.]+@[a-zA-Z0-9][a-zA-Z0-9\.\-]+)/
5
5
  SP = "[[:space:]]"
6
6
  DIGIT = "[0-90-9]"
7
7
  YEAR = "(#{DIGIT}{4})#{SP}*年"
@@ -17,13 +17,24 @@ module Mail
17
17
 
18
18
  class << self
19
19
  def split_line(str)
20
- str.split(/[\r\n]/)
20
+ str.split(/\r\n|\r|\n/)
21
21
  end
22
22
 
23
23
  def downcase(str)
24
24
  str.downcase.gsub(/[[:space:]]/, '')
25
25
  end
26
26
 
27
+ def quot_pattern?(str)
28
+ s = downcase(str)
29
+ (s =~ Mail::Sanitizer::Constant::QUOT_PATTERN) ||
30
+ (s =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(str))
31
+ end
32
+
33
+ def include_datetime_and_email_address?(str)
34
+ s = Mail::Sanitizer::String.remove_email_address(str)
35
+ Mail::Sanitizer::String.include_email_address?(str) && Mail::Sanitizer::String.include_datetime?(s)
36
+ end
37
+
27
38
  def include_datetime?(str)
28
39
  str = replace_jp_datetime(str)
29
40
  DateTime.parse(str)
@@ -33,13 +44,11 @@ module Mail
33
44
  end
34
45
 
35
46
  def include_email_address?(str)
36
- texts = str.split(/#{SP}/)
37
- texts.each do |text|
38
- text.strip!
39
- text.gsub!(/^\(|^<|[^[:alpha:]]*$/, '')
40
- return true if ADDRESS_REGEXP === text
41
- end
42
- return false
47
+ ADDRESS_REGEXP.match?(str)
48
+ end
49
+
50
+ def remove_email_address(str)
51
+ str.gsub(ADDRESS_REGEXP, '')
43
52
  end
44
53
 
45
54
  def replace_jp_datetime(str)
@@ -1,5 +1,5 @@
1
1
  module Mail
2
2
  module Sanitizer
3
- VERSION = "0.3.0"
3
+ VERSION = "0.4.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mail-sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - MasatoMiyoshi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-05 00:00:00.000000000 Z
11
+ date: 2022-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
110
  requirements: []
111
- rubygems_version: 3.0.3
111
+ rubygems_version: 3.3.3
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: A simple sanitizer for mail bodies