mail-sanitizer 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26454d01da259601ec46f36f437fbe94131a934e17a409e4f1904ef7f56a2ffe
4
- data.tar.gz: 7f02a19cf2490a73c5348b0b0e191429486f9464dc88fbefda542fb4d4d922c0
3
+ metadata.gz: 658fd411823dcad053eede5f1c678558c216e60a3719c6e027051594734031f4
4
+ data.tar.gz: ed9a14a70b8d9453f5c0ba23959d0c85926d369f1daf67e0db8025074bcfd164
5
5
  SHA512:
6
- metadata.gz: 182d3b9e083e5238d72525bbff8da388727baa41c3a3b523d366cc89145aacf475ce103f78c0203c7ff537b981ade35916af18ef82e731200657b4fcc2d48a0a
7
- data.tar.gz: 4be0f0b0b81e61b09d6a707c45d6863fef66a1316efe68706f9aaaa939e3258d067121a658befc17c7ee41081b9499897200ba00d154307831f00039d808fbbd
6
+ metadata.gz: acfa6e0931da027d2c016735f650f809f1e90c89596c41cc03be9c1052d2b58818482e75450b4cd2f81a84453e6e0fd629bd33ef49576853aede72b699ff5f51
7
+ data.tar.gz: 6254de9db9f514f511fa9b2df7947a819409faea88a1ac2228a308cb8407479f4d5c4d438604a24e2554c7058b15abf2d75c7db6f84898c822a45e432d758f3a
@@ -2,7 +2,7 @@ module Mail
2
2
  module Sanitizer
3
3
  class Constant
4
4
  QUOT_SYMBOL_PATTERN = /^>/
5
- QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-/
5
+ QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
6
6
  QUOT_DATETIME_PATTERN = /^(on|at)/
7
7
  QUOT_KEYWORD_SET = [
8
8
  ['from:', 'sent:', 'to:', 'subject:'],
@@ -22,7 +22,7 @@ module Mail
22
22
  def delete_quot_sign(str)
23
23
  return nil, nil, nil if str.nil?
24
24
 
25
- lines = split_line(str.strip)
25
+ lines = Mail::Sanitizer::String.split_line(str.strip)
26
26
  quot_lines = []
27
27
  sign_lines = []
28
28
 
@@ -43,23 +43,8 @@ module Mail
43
43
  return lines.join("\n").strip, quot_lines.join("\n").strip, sign_lines.join("\n").strip
44
44
  end
45
45
 
46
- def split_line(str)
47
- str.split(/[\r\n]/)
48
- end
49
-
50
- def downcase(str)
51
- str.downcase.gsub(/[[:space:]]/, '')
52
- end
53
-
54
- def include_datetime?(str)
55
- DateTime.parse(str)
56
- true
57
- rescue
58
- false
59
- end
60
-
61
46
  def split_block(str)
62
- lines = split_line(str)
47
+ lines = Mail::Sanitizer::String.split_line(str)
63
48
  numrow = lines.size
64
49
 
65
50
  line_types = Array.new(numrow, :normal)
@@ -95,7 +80,7 @@ module Mail
95
80
  set.each do |key|
96
81
  keywords[key] = false
97
82
  while (i + sidx) < numrow do
98
- downcased_line = downcase(lines[i + sidx])
83
+ downcased_line = Mail::Sanitizer::String.downcase(lines[i + sidx])
99
84
  unless downcased_line.empty?
100
85
  keywords[key] = true if downcased_line =~ /^#{key}/
101
86
  sidx += 1
@@ -108,8 +93,10 @@ module Mail
108
93
  break if keywords.values.all?
109
94
  end
110
95
 
111
- if keywords.values.all? || downcase(line) =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
112
- (downcase(line) =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && include_datetime?(line))
96
+ downcased_line = Mail::Sanitizer::String.downcase(line)
97
+ if keywords.values.all? || downcased_line =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
98
+ (downcased_line =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(line)) ||
99
+ (Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
113
100
  line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
114
101
  break
115
102
  end
@@ -0,0 +1,54 @@
1
+ module Mail
2
+ module Sanitizer
3
+ class String
4
+ ADDRESS_REGEXP = /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i
5
+ SP = "[[:space:]]"
6
+ DIGIT = "[0-90-9]"
7
+ YEAR = "(#{DIGIT}{4})#{SP}*年"
8
+ MONTH = "(#{DIGIT}{1,2})#{SP}*月"
9
+ DAY = "(#{DIGIT}{1,2})#{SP}*日"
10
+ HOUR = "(#{DIGIT}+)#{SP}*時"
11
+ MIN = "(#{DIGIT}+)#{SP}*分"
12
+
13
+ PATTERNS = [
14
+ [/#{YEAR}#{MONTH}#{DAY}/, '\1/\2/\3'],
15
+ [/#{HOUR}#{MIN}/, '\1:\2']
16
+ ]
17
+
18
+ class << self
19
+ def split_line(str)
20
+ str.split(/[\r\n]/)
21
+ end
22
+
23
+ def downcase(str)
24
+ str.downcase.gsub(/[[:space:]]/, '')
25
+ end
26
+
27
+ def include_datetime?(str)
28
+ str = replace_jp_datetime(str)
29
+ DateTime.parse(str)
30
+ true
31
+ rescue
32
+ false
33
+ end
34
+
35
+ def include_email_address?(str)
36
+ texts = str.split(/#{SP}/)
37
+ texts.each do |text|
38
+ text.strip!
39
+ text.gsub!(/^\(|^<|[^[:alpha:]]*$/, '')
40
+ return true if ADDRESS_REGEXP === text
41
+ end
42
+ return false
43
+ end
44
+
45
+ def replace_jp_datetime(str)
46
+ return nil if str.nil? || str == ''
47
+ fstr = str.dup
48
+ PATTERNS.each { |pattern| fstr.gsub!(pattern[0], pattern[1]) }
49
+ fstr
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -1,5 +1,5 @@
1
1
  module Mail
2
2
  module Sanitizer
3
- VERSION = "0.2.0"
3
+ VERSION = "0.3.0"
4
4
  end
5
5
  end
@@ -1,5 +1,6 @@
1
1
  require "mail/sanitizer/version"
2
2
  require "mail/sanitizer/constant"
3
+ require "mail/sanitizer/string"
3
4
  require "mail/sanitizer/sanitizer"
4
5
 
5
6
  module Mail
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mail-sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - MasatoMiyoshi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-04 00:00:00.000000000 Z
11
+ date: 2019-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -86,6 +86,7 @@ files:
86
86
  - lib/mail/sanitizer.rb
87
87
  - lib/mail/sanitizer/constant.rb
88
88
  - lib/mail/sanitizer/sanitizer.rb
89
+ - lib/mail/sanitizer/string.rb
89
90
  - lib/mail/sanitizer/version.rb
90
91
  - mail-sanitizer.gemspec
91
92
  homepage: https://github.com/MasatoMiyoshi/mail-sanitizer