mail-sanitizer 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +24 -0
- data/lib/mail/sanitizer/constant.rb +1 -1
- data/lib/mail/sanitizer/sanitizer.rb +11 -9
- data/lib/mail/sanitizer/string.rb +18 -9
- data/lib/mail/sanitizer/version.rb +1 -1
- metadata +4 -4
- data/.travis.yml +0 -7
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 68bc7ac18fa8f7c6ab281621af51b38ef15182c34c994dcc6eb40ad3b5e96395
         | 
| 4 | 
            +
              data.tar.gz: e218cbd8dd5ba270b61195ed1f8295bc18468ad1659129070da9bcde8c0c2067
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 7b436a57c57a46d4d745565fdfd382ee48b7d739653146426fc44f30ec76e92f2a9459706c801288b16142ce047f5c1cd73fe0970184247a45592f9321b9093a
         | 
| 7 | 
            +
              data.tar.gz: f31f2e76788bafa8c4c518402331126757acd57e10ca469c03d1c4e6bd347d679a5674d0efda9028528a560daffac10870034290b979eda6600ef05fa835610c
         | 
| @@ -0,0 +1,24 @@ | |
| 1 | 
            +
            name: CI
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            on: [push, pull_request]
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            jobs:
         | 
| 6 | 
            +
              test:
         | 
| 7 | 
            +
                runs-on: ubuntu-latest
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                strategy:
         | 
| 10 | 
            +
                  fail-fast: false
         | 
| 11 | 
            +
                  matrix:
         | 
| 12 | 
            +
                    ruby: ['2.6', '2.7', '3.0', '3.1', '3.2']
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                name: ruby ${{ matrix.ruby }}
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                steps:
         | 
| 17 | 
            +
                  - uses: actions/checkout@v4
         | 
| 18 | 
            +
                  - uses: ruby/setup-ruby@v1
         | 
| 19 | 
            +
                    with:
         | 
| 20 | 
            +
                      ruby-version: ${{ matrix.ruby }}
         | 
| 21 | 
            +
                      bundler-cache: true
         | 
| 22 | 
            +
                  - name: Run test
         | 
| 23 | 
            +
                    run: |
         | 
| 24 | 
            +
                      bundle exec rspec
         | 
| @@ -2,7 +2,7 @@ module Mail | |
| 2 2 | 
             
              module Sanitizer
         | 
| 3 3 | 
             
                class Constant
         | 
| 4 4 | 
             
                  QUOT_SYMBOL_PATTERN = /^>/
         | 
| 5 | 
            -
                  QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby/
         | 
| 5 | 
            +
                  QUOT_PATTERN = /wrote:|-originalmessage-|-forwardedmessage-|forwardedby|beginforwardedmessage/
         | 
| 6 6 | 
             
                  QUOT_DATETIME_PATTERN = /^(on|at)/
         | 
| 7 7 | 
             
                  QUOT_KEYWORD_SET = [
         | 
| 8 8 | 
             
                    ['from:', 'sent:', 'to:', 'subject:'],
         | 
| @@ -7,6 +7,7 @@ module Mail | |
| 7 7 |  | 
| 8 8 | 
             
                  def initialize(body)
         | 
| 9 9 | 
             
                    @src = body.dup
         | 
| 10 | 
            +
                    @src = @src.encode('utf-8', invalid: :replace, undef: :replace) unless @src.nil?
         | 
| 10 11 | 
             
                    @sanitized_body = nil
         | 
| 11 12 | 
             
                    @quot = nil
         | 
| 12 13 | 
             
                    @sign = nil
         | 
| @@ -48,14 +49,18 @@ module Mail | |
| 48 49 | 
             
                    numrow = lines.size
         | 
| 49 50 |  | 
| 50 51 | 
             
                    line_types = Array.new(numrow, :normal)
         | 
| 51 | 
            -
                     | 
| 52 | 
            +
                    sign_index = nil
         | 
| 52 53 | 
             
                    look_sign_symbol = false
         | 
| 53 54 | 
             
                    lines.each_with_index.reverse_each do |line, i|
         | 
| 54 55 | 
             
                      if line =~ Mail::Sanitizer::Constant::QUOT_SYMBOL_PATTERN
         | 
| 55 56 | 
             
                        line_types[i] = :quot
         | 
| 56 57 | 
             
                      elsif line =~ Mail::Sanitizer::Constant::SIGN_PATTERN
         | 
| 57 | 
            -
                         | 
| 58 | 
            -
             | 
| 58 | 
            +
                        if sign_index.nil?
         | 
| 59 | 
            +
                          sign_index = i
         | 
| 60 | 
            +
                        else
         | 
| 61 | 
            +
                          line_types[i, sign_index - i + 1] = Array.new(sign_index - i + 1, :sign)
         | 
| 62 | 
            +
                          sign_index = nil
         | 
| 63 | 
            +
                        end
         | 
| 59 64 | 
             
                      elsif line =~ Mail::Sanitizer::Constant::SIGN_SYMBOL_PATTERN
         | 
| 60 65 | 
             
                        next if look_sign_symbol
         | 
| 61 66 | 
             
                        if line_types[i..-1].include?(:quot)
         | 
| @@ -67,8 +72,6 @@ module Mail | |
| 67 72 | 
             
                        look_sign_symbol = true
         | 
| 68 73 | 
             
                      elsif line =~ /^[[:space:]]*$/
         | 
| 69 74 | 
             
                        line_types[i] = nil
         | 
| 70 | 
            -
                      elsif sign
         | 
| 71 | 
            -
                        line_types[i] = :sign
         | 
| 72 75 | 
             
                      end
         | 
| 73 76 | 
             
                    end
         | 
| 74 77 |  | 
| @@ -93,10 +96,9 @@ module Mail | |
| 93 96 | 
             
                        break if keywords.values.all?
         | 
| 94 97 | 
             
                      end
         | 
| 95 98 |  | 
| 96 | 
            -
                       | 
| 97 | 
            -
             | 
| 98 | 
            -
                          | 
| 99 | 
            -
                         (Mail::Sanitizer::String.include_datetime?(line) && Mail::Sanitizer::String.include_email_address?(line))
         | 
| 99 | 
            +
                      if keywords.values.all? ||
         | 
| 100 | 
            +
                         Mail::Sanitizer::String.quot_pattern?(line) ||
         | 
| 101 | 
            +
                         Mail::Sanitizer::String.include_datetime_and_email_address?(line)
         | 
| 100 102 | 
             
                        line_types[i, numrow - i] = Array.new(numrow - i, :quot) unless line_types[i] == :quot
         | 
| 101 103 | 
             
                        break
         | 
| 102 104 | 
             
                      end
         | 
| @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            module Mail
         | 
| 2 2 | 
             
              module Sanitizer
         | 
| 3 3 | 
             
                class String
         | 
| 4 | 
            -
                  ADDRESS_REGEXP =  | 
| 4 | 
            +
                  ADDRESS_REGEXP = /([a-zA-Z0-9_!#$%&`'"*+\-{|}~^\/=?\.]+@[a-zA-Z0-9][a-zA-Z0-9\.\-]+)/
         | 
| 5 5 | 
             
                  SP    = "[[:space:]]"
         | 
| 6 6 | 
             
                  DIGIT = "[0-90-9]"
         | 
| 7 7 | 
             
                  YEAR  = "(#{DIGIT}{4})#{SP}*年"
         | 
| @@ -17,13 +17,24 @@ module Mail | |
| 17 17 |  | 
| 18 18 | 
             
                  class << self
         | 
| 19 19 | 
             
                    def split_line(str)
         | 
| 20 | 
            -
                      str.split( | 
| 20 | 
            +
                      str.split(/\r\n|\r|\n/)
         | 
| 21 21 | 
             
                    end
         | 
| 22 22 |  | 
| 23 23 | 
             
                    def downcase(str)
         | 
| 24 24 | 
             
                      str.downcase.gsub(/[[:space:]]/, '')
         | 
| 25 25 | 
             
                    end
         | 
| 26 26 |  | 
| 27 | 
            +
                    def quot_pattern?(str)
         | 
| 28 | 
            +
                      s = downcase(str)
         | 
| 29 | 
            +
                      (s =~ Mail::Sanitizer::Constant::QUOT_PATTERN) ||
         | 
| 30 | 
            +
                      (s =~ Mail::Sanitizer::Constant::QUOT_DATETIME_PATTERN && Mail::Sanitizer::String.include_datetime?(str))
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    def include_datetime_and_email_address?(str)
         | 
| 34 | 
            +
                      s = Mail::Sanitizer::String.remove_email_address(str)
         | 
| 35 | 
            +
                      Mail::Sanitizer::String.include_email_address?(str) && Mail::Sanitizer::String.include_datetime?(s)
         | 
| 36 | 
            +
                    end
         | 
| 37 | 
            +
             | 
| 27 38 | 
             
                    def include_datetime?(str)
         | 
| 28 39 | 
             
                      str = replace_jp_datetime(str)
         | 
| 29 40 | 
             
                      DateTime.parse(str)
         | 
| @@ -33,13 +44,11 @@ module Mail | |
| 33 44 | 
             
                    end
         | 
| 34 45 |  | 
| 35 46 | 
             
                    def include_email_address?(str)
         | 
| 36 | 
            -
                       | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
                      end
         | 
| 42 | 
            -
                      return false
         | 
| 47 | 
            +
                      ADDRESS_REGEXP.match?(str)
         | 
| 48 | 
            +
                    end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                    def remove_email_address(str)
         | 
| 51 | 
            +
                      str.gsub(ADDRESS_REGEXP, '')
         | 
| 43 52 | 
             
                    end
         | 
| 44 53 |  | 
| 45 54 | 
             
                    def replace_jp_datetime(str)
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: mail-sanitizer
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.4.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - MasatoMiyoshi
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2023-10-16 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         | 
| @@ -73,9 +73,9 @@ executables: [] | |
| 73 73 | 
             
            extensions: []
         | 
| 74 74 | 
             
            extra_rdoc_files: []
         | 
| 75 75 | 
             
            files:
         | 
| 76 | 
            +
            - ".github/workflows/ci.yml"
         | 
| 76 77 | 
             
            - ".gitignore"
         | 
| 77 78 | 
             
            - ".rspec"
         | 
| 78 | 
            -
            - ".travis.yml"
         | 
| 79 79 | 
             
            - CODE_OF_CONDUCT.md
         | 
| 80 80 | 
             
            - Gemfile
         | 
| 81 81 | 
             
            - LICENSE.txt
         | 
| @@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 108 108 | 
             
                - !ruby/object:Gem::Version
         | 
| 109 109 | 
             
                  version: '0'
         | 
| 110 110 | 
             
            requirements: []
         | 
| 111 | 
            -
            rubygems_version: 3. | 
| 111 | 
            +
            rubygems_version: 3.3.3
         | 
| 112 112 | 
             
            signing_key: 
         | 
| 113 113 | 
             
            specification_version: 4
         | 
| 114 114 | 
             
            summary: A simple sanitizer for mail bodies
         |