mail-sanitizer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 658fd411823dcad053eede5f1c678558c216e60a3719c6e027051594734031f4
4
- data.tar.gz: ed9a14a70b8d9453f5c0ba23959d0c85926d369f1daf67e0db8025074bcfd164
3
+ metadata.gz: 5db6a4447245ea29e1e3926f92f7d8f6b0a12859077c5ce3a5a1f835a60de426
4
+ data.tar.gz: 7c63c81f47f55c08c369261ae5ca23e09d073b8e5b22d7b0f72187b782f8d76a
5
5
  SHA512:
6
- metadata.gz: acfa6e0931da027d2c016735f650f809f1e90c89596c41cc03be9c1052d2b58818482e75450b4cd2f81a84453e6e0fd629bd33ef49576853aede72b699ff5f51
7
- data.tar.gz: 6254de9db9f514f511fa9b2df7947a819409faea88a1ac2228a308cb8407479f4d5c4d438604a24e2554c7058b15abf2d75c7db6f84898c822a45e432d758f3a
6
+ metadata.gz: 46b1d1bf9e1095c29229b17d43394ed60f1087004e6892b0a35828fb517de2ca4e4374afec404cbadcc993df73d511604e304b1504196da163588576434b45bc
7
+ data.tar.gz: 3ce2b4ab7c2714fe91468eb1d5b6f121318e801e6017a19e922eadd2b4e5455e709211372c369903b7d3ee3ec89627ce3b825f01929ddcdcf4fcc7b6db9a2e71
@@ -2,7 +2,7 @@ module Mail
2
2
  module Sanitizer
3
3
  class Constant
4
4
  QUOT_SYMBOL_PATTERN = /^>/
5
- QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
5
+ QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby|beginforwardedmessage/
6
6
  QUOT_DATETIME_PATTERN = /^(on|at)/
7
7
  QUOT_KEYWORD_SET = [
8
8
  ['from:', 'sent:', 'to:', 'subject:'],
@@ -7,6 +7,7 @@ module Mail
7
7
 
8
8
  def initialize(body)
9
9
  @src = body.dup
10
+ @src = @src.encode('utf-8', invalid: :replace, undef: :replace) unless @src.nil?
10
11
  @sanitized_body = nil
11
12
  @quot = nil
12
13
  @sign = nil
@@ -48,14 +49,18 @@ module Mail
48
49
  numrow = lines.size
49
50
 
50
51
  line_types = Array.new(numrow, :normal)
51
- sign = false
52
+ sign_index = nil
52
53
  look_sign_symbol = false
53
54
  lines.each_with_index.reverse_each do |line, i|
54
55
  if line =~ Mail::Sanitizer::Constant::QUOT_SYMBOL_PATTERN
55
56
  line_types[i] = :quot
56
57
  elsif line =~ Mail::Sanitizer::Constant::SIGN_PATTERN
57
- line_types[i] = :sign
58
- sign = !sign
58
+ if sign_index.nil?
59
+ sign_index = i
60
+ else
61
+ line_types[i, sign_index - i + 1] = Array.new(sign_index - i + 1, :sign)
62
+ sign_index = nil
63
+ end
59
64
  elsif line =~ Mail::Sanitizer::Constant::SIGN_SYMBOL_PATTERN
60
65
  next if look_sign_symbol
61
66
  if line_types[i..-1].include?(:quot)
@@ -67,8 +72,6 @@ module Mail
67
72
  look_sign_symbol = true
68
73
  elsif line =~ /^[[:space:]]*$/
69
74
  line_types[i] = nil
70
- elsif sign
71
- line_types[i] = :sign
72
75
  end
73
76
  end
74
77
 
@@ -93,10 +96,9 @@ module Mail
93
96
  break if keywords.values.all?
94
97
  end
95
98
 
96
- downcased_line = Mail::Sanitizer::String.downcase(line)
97
- if keywords.values.all? || downcased_line =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
98
- (downcased_line =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(line)) ||
99
- (Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
99
+ if keywords.values.all? ||
100
+ Mail::Sanitizer::String.quot_pattern?(line) ||
101
+ Mail::Sanitizer::String.include_datetime_and_email_address?(line)
100
102
  line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
101
103
  break
102
104
  end
@@ -1,7 +1,7 @@
1
1
  module Mail
2
2
  module Sanitizer
3
3
  class String
4
- ADDRESS_REGEXP = /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i
4
+ ADDRESS_REGEXP = /([a-zA-Z0-9_!#$%&`'"*+\-{|}~^\/=?\.]+@[a-zA-Z0-9][a-zA-Z0-9\.\-]+)/
5
5
  SP = "[[:space:]]"
6
6
  DIGIT = "[0-90-9]"
7
7
  YEAR = "(#{DIGIT}{4})#{SP}*年"
@@ -17,13 +17,24 @@ module Mail
17
17
 
18
18
  class << self
19
19
  def split_line(str)
20
- str.split(/[\r\n]/)
20
+ str.split(/\r\n|\r|\n/)
21
21
  end
22
22
 
23
23
  def downcase(str)
24
24
  str.downcase.gsub(/[[:space:]]/, '')
25
25
  end
26
26
 
27
+ def quot_pattern?(str)
28
+ s = downcase(str)
29
+ (s =~ Mail::Sanitizer::Constant::QUOT_PATTERN) ||
30
+ (s =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(str))
31
+ end
32
+
33
+ def include_datetime_and_email_address?(str)
34
+ s = Mail::Sanitizer::String.remove_email_address(str)
35
+ Mail::Sanitizer::String.include_email_address?(str) && Mail::Sanitizer::String.include_datetime?(s)
36
+ end
37
+
27
38
  def include_datetime?(str)
28
39
  str = replace_jp_datetime(str)
29
40
  DateTime.parse(str)
@@ -33,13 +44,11 @@ module Mail
33
44
  end
34
45
 
35
46
  def include_email_address?(str)
36
- texts = str.split(/#{SP}/)
37
- texts.each do |text|
38
- text.strip!
39
- text.gsub!(/^\(|^<|[^[:alpha:]]*$/, '')
40
- return true if ADDRESS_REGEXP === text
41
- end
42
- return false
47
+ ADDRESS_REGEXP.match?(str)
48
+ end
49
+
50
+ def remove_email_address(str)
51
+ str.gsub(ADDRESS_REGEXP, '')
43
52
  end
44
53
 
45
54
  def replace_jp_datetime(str)
@@ -1,5 +1,5 @@
1
1
  module Mail
2
2
  module Sanitizer
3
- VERSION = "0.3.0"
3
+ VERSION = "0.4.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mail-sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - MasatoMiyoshi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-05 00:00:00.000000000 Z
11
+ date: 2022-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
110
  requirements: []
111
- rubygems_version: 3.0.3
111
+ rubygems_version: 3.3.3
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: A simple sanitizer for mail bodies