mail-sanitizer 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26454d01da259601ec46f36f437fbe94131a934e17a409e4f1904ef7f56a2ffe
4
- data.tar.gz: 7f02a19cf2490a73c5348b0b0e191429486f9464dc88fbefda542fb4d4d922c0
3
+ metadata.gz: 658fd411823dcad053eede5f1c678558c216e60a3719c6e027051594734031f4
4
+ data.tar.gz: ed9a14a70b8d9453f5c0ba23959d0c85926d369f1daf67e0db8025074bcfd164
5
5
  SHA512:
6
- metadata.gz: 182d3b9e083e5238d72525bbff8da388727baa41c3a3b523d366cc89145aacf475ce103f78c0203c7ff537b981ade35916af18ef82e731200657b4fcc2d48a0a
7
- data.tar.gz: 4be0f0b0b81e61b09d6a707c45d6863fef66a1316efe68706f9aaaa939e3258d067121a658befc17c7ee41081b9499897200ba00d154307831f00039d808fbbd
6
+ metadata.gz: acfa6e0931da027d2c016735f650f809f1e90c89596c41cc03be9c1052d2b58818482e75450b4cd2f81a84453e6e0fd629bd33ef49576853aede72b699ff5f51
7
+ data.tar.gz: 6254de9db9f514f511fa9b2df7947a819409faea88a1ac2228a308cb8407479f4d5c4d438604a24e2554c7058b15abf2d75c7db6f84898c822a45e432d758f3a
@@ -2,7 +2,7 @@ module Mail
2
2
  module Sanitizer
3
3
  class Constant
4
4
  QUOT_SYMBOL_PATTERN = /^>/
5
- QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-/
5
+ QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
6
6
  QUOT_DATETIME_PATTERN = /^(on|at)/
7
7
  QUOT_KEYWORD_SET = [
8
8
  ['from:', 'sent:', 'to:', 'subject:'],
@@ -22,7 +22,7 @@ module Mail
22
22
  def delete_quot_sign(str)
23
23
  return nil, nil, nil if str.nil?
24
24
 
25
- lines = split_line(str.strip)
25
+ lines = Mail::Sanitizer::String.split_line(str.strip)
26
26
  quot_lines = []
27
27
  sign_lines = []
28
28
 
@@ -43,23 +43,8 @@ module Mail
43
43
  return lines.join("\n").strip, quot_lines.join("\n").strip, sign_lines.join("\n").strip
44
44
  end
45
45
 
46
- def split_line(str)
47
- str.split(/[\r\n]/)
48
- end
49
-
50
- def downcase(str)
51
- str.downcase.gsub(/[[:space:]]/, '')
52
- end
53
-
54
- def include_datetime?(str)
55
- DateTime.parse(str)
56
- true
57
- rescue
58
- false
59
- end
60
-
61
46
  def split_block(str)
62
- lines = split_line(str)
47
+ lines = Mail::Sanitizer::String.split_line(str)
63
48
  numrow = lines.size
64
49
 
65
50
  line_types = Array.new(numrow, :normal)
@@ -95,7 +80,7 @@ module Mail
95
80
  set.each do |key|
96
81
  keywords[key] = false
97
82
  while (i + sidx) < numrow do
98
- downcased_line = downcase(lines[i + sidx])
83
+ downcased_line = Mail::Sanitizer::String.downcase(lines[i + sidx])
99
84
  unless downcased_line.empty?
100
85
  keywords[key] = true if downcased_line =~ /^#{key}/
101
86
  sidx += 1
@@ -108,8 +93,10 @@ module Mail
108
93
  break if keywords.values.all?
109
94
  end
110
95
 
111
- if keywords.values.all? || downcase(line) =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
112
- (downcase(line) =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && include_datetime?(line))
96
+ downcased_line = Mail::Sanitizer::String.downcase(line)
97
+ if keywords.values.all? || downcased_line =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
98
+ (downcased_line =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(line)) ||
99
+ (Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
113
100
  line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
114
101
  break
115
102
  end
@@ -0,0 +1,54 @@
1
+ module Mail
2
+ module Sanitizer
3
+ class String
4
+ ADDRESS_REGEXP = /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i
5
+ SP = "[[:space:]]"
6
+ DIGIT = "[0-90-9]"
7
+ YEAR = "(#{DIGIT}{4})#{SP}*年"
8
+ MONTH = "(#{DIGIT}{1,2})#{SP}*月"
9
+ DAY = "(#{DIGIT}{1,2})#{SP}*日"
10
+ HOUR = "(#{DIGIT}+)#{SP}*時"
11
+ MIN = "(#{DIGIT}+)#{SP}*分"
12
+
13
+ PATTERNS = [
14
+ [/#{YEAR}#{MONTH}#{DAY}/, '\1/\2/\3'],
15
+ [/#{HOUR}#{MIN}/, '\1:\2']
16
+ ]
17
+
18
+ class << self
19
+ def split_line(str)
20
+ str.split(/[\r\n]/)
21
+ end
22
+
23
+ def downcase(str)
24
+ str.downcase.gsub(/[[:space:]]/, '')
25
+ end
26
+
27
+ def include_datetime?(str)
28
+ str = replace_jp_datetime(str)
29
+ DateTime.parse(str)
30
+ true
31
+ rescue
32
+ false
33
+ end
34
+
35
+ def include_email_address?(str)
36
+ texts = str.split(/#{SP}/)
37
+ texts.each do |text|
38
+ text.strip!
39
+ text.gsub!(/^\(|^<|[^[:alpha:]]*$/, '')
40
+ return true if ADDRESS_REGEXP === text
41
+ end
42
+ return false
43
+ end
44
+
45
+ def replace_jp_datetime(str)
46
+ return nil if str.nil? || str == ''
47
+ fstr = str.dup
48
+ PATTERNS.each { |pattern| fstr.gsub!(pattern[0], pattern[1]) }
49
+ fstr
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -1,5 +1,5 @@
1
1
  module Mail
2
2
  module Sanitizer
3
- VERSION = "0.2.0"
3
+ VERSION = "0.3.0"
4
4
  end
5
5
  end
@@ -1,5 +1,6 @@
1
1
  require "mail/sanitizer/version"
2
2
  require "mail/sanitizer/constant"
3
+ require "mail/sanitizer/string"
3
4
  require "mail/sanitizer/sanitizer"
4
5
 
5
6
  module Mail
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mail-sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - MasatoMiyoshi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-04 00:00:00.000000000 Z
11
+ date: 2019-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -86,6 +86,7 @@ files:
86
86
  - lib/mail/sanitizer.rb
87
87
  - lib/mail/sanitizer/constant.rb
88
88
  - lib/mail/sanitizer/sanitizer.rb
89
+ - lib/mail/sanitizer/string.rb
89
90
  - lib/mail/sanitizer/version.rb
90
91
  - mail-sanitizer.gemspec
91
92
  homepage: https://github.com/MasatoMiyoshi/mail-sanitizer