mail-sanitizer 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/mail/sanitizer/constant.rb +1 -1
- data/lib/mail/sanitizer/sanitizer.rb +7 -20
- data/lib/mail/sanitizer/string.rb +54 -0
- data/lib/mail/sanitizer/version.rb +1 -1
- data/lib/mail/sanitizer.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 658fd411823dcad053eede5f1c678558c216e60a3719c6e027051594734031f4
|
4
|
+
data.tar.gz: ed9a14a70b8d9453f5c0ba23959d0c85926d369f1daf67e0db8025074bcfd164
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acfa6e0931da027d2c016735f650f809f1e90c89596c41cc03be9c1052d2b58818482e75450b4cd2f81a84453e6e0fd629bd33ef49576853aede72b699ff5f51
|
7
|
+
data.tar.gz: 6254de9db9f514f511fa9b2df7947a819409faea88a1ac2228a308cb8407479f4d5c4d438604a24e2554c7058b15abf2d75c7db6f84898c822a45e432d758f3a
|
@@ -2,7 +2,7 @@ module Mail
|
|
2
2
|
module Sanitizer
|
3
3
|
class Constant
|
4
4
|
QUOT_SYMBOL_PATTERN = /^>/
|
5
|
-
QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage
|
5
|
+
QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
|
6
6
|
QUOT_DATETIME_PATTERN = /^(on|at)/
|
7
7
|
QUOT_KEYWORD_SET = [
|
8
8
|
['from:', 'sent:', 'to:', 'subject:'],
|
@@ -22,7 +22,7 @@ module Mail
|
|
22
22
|
def delete_quot_sign(str)
|
23
23
|
return nil, nil, nil if str.nil?
|
24
24
|
|
25
|
-
lines = split_line(str.strip)
|
25
|
+
lines = Mail::Sanitizer::String.split_line(str.strip)
|
26
26
|
quot_lines = []
|
27
27
|
sign_lines = []
|
28
28
|
|
@@ -43,23 +43,8 @@ module Mail
|
|
43
43
|
return lines.join("\n").strip, quot_lines.join("\n").strip, sign_lines.join("\n").strip
|
44
44
|
end
|
45
45
|
|
46
|
-
def split_line(str)
|
47
|
-
str.split(/[\r\n]/)
|
48
|
-
end
|
49
|
-
|
50
|
-
def downcase(str)
|
51
|
-
str.downcase.gsub(/[[:space:]]/, '')
|
52
|
-
end
|
53
|
-
|
54
|
-
def include_datetime?(str)
|
55
|
-
DateTime.parse(str)
|
56
|
-
true
|
57
|
-
rescue
|
58
|
-
false
|
59
|
-
end
|
60
|
-
|
61
46
|
def split_block(str)
|
62
|
-
lines = split_line(str)
|
47
|
+
lines = Mail::Sanitizer::String.split_line(str)
|
63
48
|
numrow = lines.size
|
64
49
|
|
65
50
|
line_types = Array.new(numrow, :normal)
|
@@ -95,7 +80,7 @@ module Mail
|
|
95
80
|
set.each do |key|
|
96
81
|
keywords[key] = false
|
97
82
|
while (i + sidx) < numrow do
|
98
|
-
downcased_line = downcase(lines[i + sidx])
|
83
|
+
downcased_line = Mail::Sanitizer::String.downcase(lines[i + sidx])
|
99
84
|
unless downcased_line.empty?
|
100
85
|
keywords[key] = true if downcased_line =~ /^#{key}/
|
101
86
|
sidx += 1
|
@@ -108,8 +93,10 @@ module Mail
|
|
108
93
|
break if keywords.values.all?
|
109
94
|
end
|
110
95
|
|
111
|
-
|
112
|
-
|
96
|
+
downcased_line = Mail::Sanitizer::String.downcase(line)
|
97
|
+
if keywords.values.all? || downcased_line =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
|
98
|
+
(downcased_line =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(line)) ||
|
99
|
+
(Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
|
113
100
|
line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
|
114
101
|
break
|
115
102
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Mail
|
2
|
+
module Sanitizer
|
3
|
+
class String
|
4
|
+
ADDRESS_REGEXP = /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i
|
5
|
+
SP = "[[:space:]]"
|
6
|
+
DIGIT = "[0-90-9]"
|
7
|
+
YEAR = "(#{DIGIT}{4})#{SP}*年"
|
8
|
+
MONTH = "(#{DIGIT}{1,2})#{SP}*月"
|
9
|
+
DAY = "(#{DIGIT}{1,2})#{SP}*日"
|
10
|
+
HOUR = "(#{DIGIT}+)#{SP}*時"
|
11
|
+
MIN = "(#{DIGIT}+)#{SP}*分"
|
12
|
+
|
13
|
+
PATTERNS = [
|
14
|
+
[/#{YEAR}#{MONTH}#{DAY}/, '\1/\2/\3'],
|
15
|
+
[/#{HOUR}#{MIN}/, '\1:\2']
|
16
|
+
]
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def split_line(str)
|
20
|
+
str.split(/[\r\n]/)
|
21
|
+
end
|
22
|
+
|
23
|
+
def downcase(str)
|
24
|
+
str.downcase.gsub(/[[:space:]]/, '')
|
25
|
+
end
|
26
|
+
|
27
|
+
def include_datetime?(str)
|
28
|
+
str = replace_jp_datetime(str)
|
29
|
+
DateTime.parse(str)
|
30
|
+
true
|
31
|
+
rescue
|
32
|
+
false
|
33
|
+
end
|
34
|
+
|
35
|
+
def include_email_address?(str)
|
36
|
+
texts = str.split(/#{SP}/)
|
37
|
+
texts.each do |text|
|
38
|
+
text.strip!
|
39
|
+
text.gsub!(/^\(|^<|[^[:alpha:]]*$/, '')
|
40
|
+
return true if ADDRESS_REGEXP === text
|
41
|
+
end
|
42
|
+
return false
|
43
|
+
end
|
44
|
+
|
45
|
+
def replace_jp_datetime(str)
|
46
|
+
return nil if str.nil? || str == ''
|
47
|
+
fstr = str.dup
|
48
|
+
PATTERNS.each { |pattern| fstr.gsub!(pattern[0], pattern[1]) }
|
49
|
+
fstr
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/mail/sanitizer.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mail-sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MasatoMiyoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-04-
|
11
|
+
date: 2019-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- lib/mail/sanitizer.rb
|
87
87
|
- lib/mail/sanitizer/constant.rb
|
88
88
|
- lib/mail/sanitizer/sanitizer.rb
|
89
|
+
- lib/mail/sanitizer/string.rb
|
89
90
|
- lib/mail/sanitizer/version.rb
|
90
91
|
- mail-sanitizer.gemspec
|
91
92
|
homepage: https://github.com/MasatoMiyoshi/mail-sanitizer
|