mail-sanitizer 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/mail/sanitizer/constant.rb +1 -1
- data/lib/mail/sanitizer/sanitizer.rb +7 -20
- data/lib/mail/sanitizer/string.rb +54 -0
- data/lib/mail/sanitizer/version.rb +1 -1
- data/lib/mail/sanitizer.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 658fd411823dcad053eede5f1c678558c216e60a3719c6e027051594734031f4
|
4
|
+
data.tar.gz: ed9a14a70b8d9453f5c0ba23959d0c85926d369f1daf67e0db8025074bcfd164
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acfa6e0931da027d2c016735f650f809f1e90c89596c41cc03be9c1052d2b58818482e75450b4cd2f81a84453e6e0fd629bd33ef49576853aede72b699ff5f51
|
7
|
+
data.tar.gz: 6254de9db9f514f511fa9b2df7947a819409faea88a1ac2228a308cb8407479f4d5c4d438604a24e2554c7058b15abf2d75c7db6f84898c822a45e432d758f3a
|
@@ -2,7 +2,7 @@ module Mail
|
|
2
2
|
module Sanitizer
|
3
3
|
class Constant
|
4
4
|
QUOT_SYMBOL_PATTERN = /^>/
|
5
|
-
QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage
|
5
|
+
QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
|
6
6
|
QUOT_DATETIME_PATTERN = /^(on|at)/
|
7
7
|
QUOT_KEYWORD_SET = [
|
8
8
|
['from:', 'sent:', 'to:', 'subject:'],
|
@@ -22,7 +22,7 @@ module Mail
|
|
22
22
|
def delete_quot_sign(str)
|
23
23
|
return nil, nil, nil if str.nil?
|
24
24
|
|
25
|
-
lines = split_line(str.strip)
|
25
|
+
lines = Mail::Sanitizer::String.split_line(str.strip)
|
26
26
|
quot_lines = []
|
27
27
|
sign_lines = []
|
28
28
|
|
@@ -43,23 +43,8 @@ module Mail
|
|
43
43
|
return lines.join("\n").strip, quot_lines.join("\n").strip, sign_lines.join("\n").strip
|
44
44
|
end
|
45
45
|
|
46
|
-
def split_line(str)
|
47
|
-
str.split(/[\r\n]/)
|
48
|
-
end
|
49
|
-
|
50
|
-
def downcase(str)
|
51
|
-
str.downcase.gsub(/[[:space:]]/, '')
|
52
|
-
end
|
53
|
-
|
54
|
-
def include_datetime?(str)
|
55
|
-
DateTime.parse(str)
|
56
|
-
true
|
57
|
-
rescue
|
58
|
-
false
|
59
|
-
end
|
60
|
-
|
61
46
|
def split_block(str)
|
62
|
-
lines = split_line(str)
|
47
|
+
lines = Mail::Sanitizer::String.split_line(str)
|
63
48
|
numrow = lines.size
|
64
49
|
|
65
50
|
line_types = Array.new(numrow, :normal)
|
@@ -95,7 +80,7 @@ module Mail
|
|
95
80
|
set.each do |key|
|
96
81
|
keywords[key] = false
|
97
82
|
while (i + sidx) < numrow do
|
98
|
-
downcased_line = downcase(lines[i + sidx])
|
83
|
+
downcased_line = Mail::Sanitizer::String.downcase(lines[i + sidx])
|
99
84
|
unless downcased_line.empty?
|
100
85
|
keywords[key] = true if downcased_line =~ /^#{key}/
|
101
86
|
sidx += 1
|
@@ -108,8 +93,10 @@ module Mail
|
|
108
93
|
break if keywords.values.all?
|
109
94
|
end
|
110
95
|
|
111
|
-
|
112
|
-
|
96
|
+
downcased_line = Mail::Sanitizer::String.downcase(line)
|
97
|
+
if keywords.values.all? || downcased_line =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
|
98
|
+
(downcased_line =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(line)) ||
|
99
|
+
(Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
|
113
100
|
line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
|
114
101
|
break
|
115
102
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Mail
|
2
|
+
module Sanitizer
|
3
|
+
class String
|
4
|
+
ADDRESS_REGEXP = /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i
|
5
|
+
SP = "[[:space:]]"
|
6
|
+
DIGIT = "[0-90-9]"
|
7
|
+
YEAR = "(#{DIGIT}{4})#{SP}*年"
|
8
|
+
MONTH = "(#{DIGIT}{1,2})#{SP}*月"
|
9
|
+
DAY = "(#{DIGIT}{1,2})#{SP}*日"
|
10
|
+
HOUR = "(#{DIGIT}+)#{SP}*時"
|
11
|
+
MIN = "(#{DIGIT}+)#{SP}*分"
|
12
|
+
|
13
|
+
PATTERNS = [
|
14
|
+
[/#{YEAR}#{MONTH}#{DAY}/, '\1/\2/\3'],
|
15
|
+
[/#{HOUR}#{MIN}/, '\1:\2']
|
16
|
+
]
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def split_line(str)
|
20
|
+
str.split(/[\r\n]/)
|
21
|
+
end
|
22
|
+
|
23
|
+
def downcase(str)
|
24
|
+
str.downcase.gsub(/[[:space:]]/, '')
|
25
|
+
end
|
26
|
+
|
27
|
+
def include_datetime?(str)
|
28
|
+
str = replace_jp_datetime(str)
|
29
|
+
DateTime.parse(str)
|
30
|
+
true
|
31
|
+
rescue
|
32
|
+
false
|
33
|
+
end
|
34
|
+
|
35
|
+
def include_email_address?(str)
|
36
|
+
texts = str.split(/#{SP}/)
|
37
|
+
texts.each do |text|
|
38
|
+
text.strip!
|
39
|
+
text.gsub!(/^\(|^<|[^[:alpha:]]*$/, '')
|
40
|
+
return true if ADDRESS_REGEXP === text
|
41
|
+
end
|
42
|
+
return false
|
43
|
+
end
|
44
|
+
|
45
|
+
def replace_jp_datetime(str)
|
46
|
+
return nil if str.nil? || str == ''
|
47
|
+
fstr = str.dup
|
48
|
+
PATTERNS.each { |pattern| fstr.gsub!(pattern[0], pattern[1]) }
|
49
|
+
fstr
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/mail/sanitizer.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mail-sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MasatoMiyoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-04-
|
11
|
+
date: 2019-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- lib/mail/sanitizer.rb
|
87
87
|
- lib/mail/sanitizer/constant.rb
|
88
88
|
- lib/mail/sanitizer/sanitizer.rb
|
89
|
+
- lib/mail/sanitizer/string.rb
|
89
90
|
- lib/mail/sanitizer/version.rb
|
90
91
|
- mail-sanitizer.gemspec
|
91
92
|
homepage: https://github.com/MasatoMiyoshi/mail-sanitizer
|