mail-sanitizer 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 658fd411823dcad053eede5f1c678558c216e60a3719c6e027051594734031f4
4
- data.tar.gz: ed9a14a70b8d9453f5c0ba23959d0c85926d369f1daf67e0db8025074bcfd164
3
+ metadata.gz: 68bc7ac18fa8f7c6ab281621af51b38ef15182c34c994dcc6eb40ad3b5e96395
4
+ data.tar.gz: e218cbd8dd5ba270b61195ed1f8295bc18468ad1659129070da9bcde8c0c2067
5
5
  SHA512:
6
- metadata.gz: acfa6e0931da027d2c016735f650f809f1e90c89596c41cc03be9c1052d2b58818482e75450b4cd2f81a84453e6e0fd629bd33ef49576853aede72b699ff5f51
7
- data.tar.gz: 6254de9db9f514f511fa9b2df7947a819409faea88a1ac2228a308cb8407479f4d5c4d438604a24e2554c7058b15abf2d75c7db6f84898c822a45e432d758f3a
6
+ metadata.gz: 7b436a57c57a46d4d745565fdfd382ee48b7d739653146426fc44f30ec76e92f2a9459706c801288b16142ce047f5c1cd73fe0970184247a45592f9321b9093a
7
+ data.tar.gz: f31f2e76788bafa8c4c518402331126757acd57e10ca469c03d1c4e6bd347d679a5674d0efda9028528a560daffac10870034290b979eda6600ef05fa835610c
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+ runs-on: ubuntu-latest
8
+
9
+ strategy:
10
+ fail-fast: false
11
+ matrix:
12
+ ruby: ['2.6', '2.7', '3.0', '3.1', '3.2']
13
+
14
+ name: ruby ${{ matrix.ruby }}
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: ruby/setup-ruby@v1
19
+ with:
20
+ ruby-version: ${{ matrix.ruby }}
21
+ bundler-cache: true
22
+ - name: Run test
23
+ run: |
24
+ bundle exec rspec
@@ -2,7 +2,7 @@ module Mail
2
2
  module Sanitizer
3
3
  class Constant
4
4
  QUOT_SYMBOL_PATTERN = /^>/
5
- QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
5
+ QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby|beginforwardedmessage/
6
6
  QUOT_DATETIME_PATTERN = /^(on|at)/
7
7
  QUOT_KEYWORD_SET = [
8
8
  ['from:', 'sent:', 'to:', 'subject:'],
@@ -7,6 +7,7 @@ module Mail
7
7
 
8
8
  def initialize(body)
9
9
  @src = body.dup
10
+ @src = @src.encode('utf-8', invalid: :replace, undef: :replace) unless @src.nil?
10
11
  @sanitized_body = nil
11
12
  @quot = nil
12
13
  @sign = nil
@@ -48,14 +49,18 @@ module Mail
48
49
  numrow = lines.size
49
50
 
50
51
  line_types = Array.new(numrow, :normal)
51
- sign = false
52
+ sign_index = nil
52
53
  look_sign_symbol = false
53
54
  lines.each_with_index.reverse_each do |line, i|
54
55
  if line =~ Mail::Sanitizer::Constant::QUOT_SYMBOL_PATTERN
55
56
  line_types[i] = :quot
56
57
  elsif line =~ Mail::Sanitizer::Constant::SIGN_PATTERN
57
- line_types[i] = :sign
58
- sign = !sign
58
+ if sign_index.nil?
59
+ sign_index = i
60
+ else
61
+ line_types[i, sign_index - i + 1] = Array.new(sign_index - i + 1, :sign)
62
+ sign_index = nil
63
+ end
59
64
  elsif line =~ Mail::Sanitizer::Constant::SIGN_SYMBOL_PATTERN
60
65
  next if look_sign_symbol
61
66
  if line_types[i..-1].include?(:quot)
@@ -67,8 +72,6 @@ module Mail
67
72
  look_sign_symbol = true
68
73
  elsif line =~ /^[[:space:]]*$/
69
74
  line_types[i] = nil
70
- elsif sign
71
- line_types[i] = :sign
72
75
  end
73
76
  end
74
77
 
@@ -93,10 +96,9 @@ module Mail
93
96
  break if keywords.values.all?
94
97
  end
95
98
 
96
- downcased_line = Mail::Sanitizer::String.downcase(line)
97
- if keywords.values.all? || downcased_line =~ Mail::Sanitizer::Constant::QUOT_PATTERN ||
98
- (downcased_line =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(line)) ||
99
- (Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
99
+ if keywords.values.all? ||
100
+ Mail::Sanitizer::String.quot_pattern?(line) ||
101
+ Mail::Sanitizer::String.include_datetime_and_email_address?(line)
100
102
  line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
101
103
  break
102
104
  end
@@ -1,7 +1,7 @@
1
1
  module Mail
2
2
  module Sanitizer
3
3
  class String
4
- ADDRESS_REGEXP = /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i
4
+ ADDRESS_REGEXP = /([a-zA-Z0-9_!#$%&`'"*+\-{|}~^\/=?\.]+@[a-zA-Z0-9][a-zA-Z0-9\.\-]+)/
5
5
  SP = "[[:space:]]"
6
6
  DIGIT = "[0-90-9]"
7
7
  YEAR = "(#{DIGIT}{4})#{SP}*年"
@@ -17,13 +17,24 @@ module Mail
17
17
 
18
18
  class << self
19
19
  def split_line(str)
20
- str.split(/[\r\n]/)
20
+ str.split(/\r\n|\r|\n/)
21
21
  end
22
22
 
23
23
  def downcase(str)
24
24
  str.downcase.gsub(/[[:space:]]/, '')
25
25
  end
26
26
 
27
+ def quot_pattern?(str)
28
+ s = downcase(str)
29
+ (s =~ Mail::Sanitizer::Constant::QUOT_PATTERN) ||
30
+ (s =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(str))
31
+ end
32
+
33
+ def include_datetime_and_email_address?(str)
34
+ s = Mail::Sanitizer::String.remove_email_address(str)
35
+ Mail::Sanitizer::String.include_email_address?(str) && Mail::Sanitizer::String.include_datetime?(s)
36
+ end
37
+
27
38
  def include_datetime?(str)
28
39
  str = replace_jp_datetime(str)
29
40
  DateTime.parse(str)
@@ -33,13 +44,11 @@ module Mail
33
44
  end
34
45
 
35
46
  def include_email_address?(str)
36
- texts = str.split(/#{SP}/)
37
- texts.each do |text|
38
- text.strip!
39
- text.gsub!(/^\(|^<|[^[:alpha:]]*$/, '')
40
- return true if ADDRESS_REGEXP === text
41
- end
42
- return false
47
+ ADDRESS_REGEXP.match?(str)
48
+ end
49
+
50
+ def remove_email_address(str)
51
+ str.gsub(ADDRESS_REGEXP, '')
43
52
  end
44
53
 
45
54
  def replace_jp_datetime(str)
@@ -1,5 +1,5 @@
1
1
  module Mail
2
2
  module Sanitizer
3
- VERSION = "0.3.0"
3
+ VERSION = "0.4.1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mail-sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - MasatoMiyoshi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-05 00:00:00.000000000 Z
11
+ date: 2023-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -73,9 +73,9 @@ executables: []
73
73
  extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
+ - ".github/workflows/ci.yml"
76
77
  - ".gitignore"
77
78
  - ".rspec"
78
- - ".travis.yml"
79
79
  - CODE_OF_CONDUCT.md
80
80
  - Gemfile
81
81
  - LICENSE.txt
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
110
  requirements: []
111
- rubygems_version: 3.0.3
111
+ rubygems_version: 3.3.3
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: A simple sanitizer for mail bodies
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- sudo: false
3
- language: ruby
4
- cache: bundler
5
- rvm:
6
- - 2.6.1
7
- before_install: gem install bundler -v 1.17.2