mail-sanitizer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/mail/sanitizer/constant.rb +1 -1
- data/lib/mail/sanitizer/sanitizer.rb +11 -9
- data/lib/mail/sanitizer/string.rb +18 -9
- data/lib/mail/sanitizer/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5db6a4447245ea29e1e3926f92f7d8f6b0a12859077c5ce3a5a1f835a60de426
|
4
|
+
data.tar.gz: 7c63c81f47f55c08c369261ae5ca23e09d073b8e5b22d7b0f72187b782f8d76a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 46b1d1bf9e1095c29229b17d43394ed60f1087004e6892b0a35828fb517de2ca4e4374afec404cbadcc993df73d511604e304b1504196da163588576434b45bc
|
7
|
+
data.tar.gz: 3ce2b4ab7c2714fe91468eb1d5b6f121318e801e6017a19e922eadd2b4e5455e709211372c369903b7d3ee3ec89627ce3b825f01929ddcdcf4fcc7b6db9a2e71
|
@@ -2,7 +2,7 @@ module Mail
|
|
2
2
|
module Sanitizer
|
3
3
|
class Constant
|
4
4
|
QUOT_SYMBOL_PATTERN = /^>/
|
5
|
-
QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
|
5
|
+
QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby|beginforwardedmessage/
|
6
6
|
QUOT_DATETIME_PATTERN = /^(on|at)/
|
7
7
|
QUOT_KEYWORD_SET = [
|
8
8
|
['from:', 'sent:', 'to:', 'subject:'],
|
@@ -7,6 +7,7 @@ module Mail
|
|
7
7
|
|
8
8
|
def initialize(body)
|
9
9
|
@src = body.dup
|
10
|
+
@src = @src.encode('utf-8', invalid: :replace, undef: :replace) unless @src.nil?
|
10
11
|
@sanitized_body = nil
|
11
12
|
@quot = nil
|
12
13
|
@sign = nil
|
@@ -48,14 +49,18 @@ module Mail
|
|
48
49
|
numrow = lines.size
|
49
50
|
|
50
51
|
line_types = Array.new(numrow, :normal)
|
51
|
-
|
52
|
+
sign_index = nil
|
52
53
|
look_sign_symbol = false
|
53
54
|
lines.each_with_index.reverse_each do |line, i|
|
54
55
|
if line =~ Mail::Sanitizer::Constant::QUOT_SYMBOL_PATTERN
|
55
56
|
line_types[i] = :quot
|
56
57
|
elsif line =~ Mail::Sanitizer::Constant::SIGN_PATTERN
|
57
|
-
|
58
|
-
|
58
|
+
if sign_index.nil?
|
59
|
+
sign_index = i
|
60
|
+
else
|
61
|
+
line_types[i, sign_index - i + 1] = Array.new(sign_index - i + 1, :sign)
|
62
|
+
sign_index = nil
|
63
|
+
end
|
59
64
|
elsif line =~ Mail::Sanitizer::Constant::SIGN_SYMBOL_PATTERN
|
60
65
|
next if look_sign_symbol
|
61
66
|
if line_types[i..-1].include?(:quot)
|
@@ -67,8 +72,6 @@ module Mail
|
|
67
72
|
look_sign_symbol = true
|
68
73
|
elsif line =~ /^[[:space:]]*$/
|
69
74
|
line_types[i] = nil
|
70
|
-
elsif sign
|
71
|
-
line_types[i] = :sign
|
72
75
|
end
|
73
76
|
end
|
74
77
|
|
@@ -93,10 +96,9 @@ module Mail
|
|
93
96
|
break if keywords.values.all?
|
94
97
|
end
|
95
98
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
(Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
|
99
|
+
if keywords.values.all? ||
|
100
|
+
Mail::Sanitizer::String.quot_pattern?(line) ||
|
101
|
+
Mail::Sanitizer::String.include_datetime_and_email_address?(line)
|
100
102
|
line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
|
101
103
|
break
|
102
104
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Mail
|
2
2
|
module Sanitizer
|
3
3
|
class String
|
4
|
-
ADDRESS_REGEXP =
|
4
|
+
ADDRESS_REGEXP = /([a-zA-Z0-9_!#$%&`'"*+\-{|}~^\/=?\.]+@[a-zA-Z0-9][a-zA-Z0-9\.\-]+)/
|
5
5
|
SP = "[[:space:]]"
|
6
6
|
DIGIT = "[0-90-9]"
|
7
7
|
YEAR = "(#{DIGIT}{4})#{SP}*年"
|
@@ -17,13 +17,24 @@ module Mail
|
|
17
17
|
|
18
18
|
class << self
|
19
19
|
def split_line(str)
|
20
|
-
str.split(
|
20
|
+
str.split(/\r\n|\r|\n/)
|
21
21
|
end
|
22
22
|
|
23
23
|
def downcase(str)
|
24
24
|
str.downcase.gsub(/[[:space:]]/, '')
|
25
25
|
end
|
26
26
|
|
27
|
+
def quot_pattern?(str)
|
28
|
+
s = downcase(str)
|
29
|
+
(s =~ Mail::Sanitizer::Constant::QUOT_PATTERN) ||
|
30
|
+
(s =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(str))
|
31
|
+
end
|
32
|
+
|
33
|
+
def include_datetime_and_email_address?(str)
|
34
|
+
s = Mail::Sanitizer::String.remove_email_address(str)
|
35
|
+
Mail::Sanitizer::String.include_email_address?(str) && Mail::Sanitizer::String.include_datetime?(s)
|
36
|
+
end
|
37
|
+
|
27
38
|
def include_datetime?(str)
|
28
39
|
str = replace_jp_datetime(str)
|
29
40
|
DateTime.parse(str)
|
@@ -33,13 +44,11 @@ module Mail
|
|
33
44
|
end
|
34
45
|
|
35
46
|
def include_email_address?(str)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
42
|
-
return false
|
47
|
+
ADDRESS_REGEXP.match?(str)
|
48
|
+
end
|
49
|
+
|
50
|
+
def remove_email_address(str)
|
51
|
+
str.gsub(ADDRESS_REGEXP, '')
|
43
52
|
end
|
44
53
|
|
45
54
|
def replace_jp_datetime(str)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mail-sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MasatoMiyoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
110
|
requirements: []
|
111
|
-
rubygems_version: 3.
|
111
|
+
rubygems_version: 3.3.3
|
112
112
|
signing_key:
|
113
113
|
specification_version: 4
|
114
114
|
summary: A simple sanitizer for mail bodies
|