email_inquire 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module EmailInquire
4
+ module Helper
5
+
6
+ extend self
7
+
8
+ def first_value(array, &block)
9
+ array.lazy.map(&block).find(&:itself)
10
+ end
11
+
12
+ end
13
+ end
@@ -1,181 +1,55 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "damerau-levenshtein"
4
- require "set"
3
+ require "email_inquire/helper"
4
+ require "email_inquire/response"
5
+ require "email_inquire/validator/common_provider"
6
+ require "email_inquire/validator/common_provider_mistake"
7
+ require "email_inquire/validator/commonly_mistaken_domain"
8
+ require "email_inquire/validator/commonly_mistaken_tld"
9
+ require "email_inquire/validator/country_code_tld"
10
+ require "email_inquire/validator/custom_invalid_domain"
11
+ require "email_inquire/validator/custom_valid_domain"
12
+ require "email_inquire/validator/email_format"
13
+ require "email_inquire/validator/known_invalid_domain"
14
+ require "email_inquire/validator/one_time_provider"
15
+ require "email_inquire/validator/unique_domain_provider"
5
16
 
6
17
  module EmailInquire
7
18
  class Inquirer
8
19
 
9
- class << self
10
-
11
- private
12
-
13
- def load_data(filename)
14
- data = File.read("#{__dir__}/../../data/#{filename}.txt")
15
- lines = data.split("\n")
16
- lines.reject! { |line| line[0] == "#" }
17
-
18
- lines.to_set
19
- end
20
-
21
- end
22
-
23
20
  def initialize(email)
24
- @email = email.downcase
25
-
26
- parse_email
27
- end
28
-
29
- attr_reader :domain, :email, :name
30
-
31
- VALIDATORS = %i[
32
- validate_custom_valid_domains
33
- validate_common_domains
34
- validate_one_time_providers
35
- validate_known_invalid_domains
36
- validate_custom_invalid_domains
37
- validate_common_domain_mistakes
38
- validate_cc_tld
39
- validate_common_tld_mistakes
40
- validate_domains_with_unique_tld
41
- ].freeze
42
-
43
- def validate
44
- email_validator = EmailValidator.new(email)
45
- unless email_validator.valid?
46
- response.invalid!
47
- return response
48
- end
49
-
50
- VALIDATORS.each do |validator|
51
- send(validator)
52
- break if response.valid? || response.invalid?
53
- end
54
-
55
- # default
56
- response.valid! unless response.status?
57
-
58
- response
59
- end
60
-
61
- private
62
-
63
- def parse_email
64
- @name, @domain = email.split("@")
65
- end
66
-
67
- def response
68
- @response ||=
69
- Response.new.tap do |response|
70
- response.email = email
71
- end
72
- end
73
-
74
- COMMON_DOMAIN_MISTAKES = {
75
- /google(?!mail)/ => "gmail.com",
76
- /windows.*\.com/ => "live.com",
77
- }.freeze
78
-
79
- def validate_common_domain_mistakes
80
- COMMON_DOMAIN_MISTAKES.each do |mistake, reference|
81
- break if domain == reference # valid!
82
-
83
- if mistake =~ domain
84
- response.hint!(domain: reference)
85
- break
86
- end
87
- end
21
+ @email = email&.downcase
88
22
  end
89
23
 
90
- COMMON_DOMAINS = load_data("common_providers").freeze
91
-
92
- def validate_common_domains
93
- return response.valid! if COMMON_DOMAINS.include?(domain)
24
+ attr_reader :email
94
25
 
95
- COMMON_DOMAINS.each do |reference|
96
- distance = ::DamerauLevenshtein.distance(domain, reference, 2, 3)
97
- if distance <= 1
98
- response.hint!(domain: reference)
99
- break
100
- end
101
- end
102
- end
26
+ VALIDATORS = [
27
+ # Format first
28
+ EmailInquire::Validator::EmailFormat,
103
29
 
104
- COMMON_TLD_MISTAKES = {
105
- ".combr" => ".com.br",
106
- ".cojp" => ".co.jp",
107
- ".couk" => ".co.uk",
108
- ".com.com" => ".com",
109
- }.freeze
30
+ # Custom overrides
31
+ EmailInquire::Validator::CustomValidDomain,
32
+ EmailInquire::Validator::CustomInvalidDomain,
110
33
 
111
- def validate_common_tld_mistakes
112
- COMMON_TLD_MISTAKES.each do |mistake, reference|
113
- break if !mistake.end_with?(reference) && domain.end_with?(reference)
34
+ # Always valid domains
35
+ EmailInquire::Validator::CommonProvider,
114
36
 
115
- if domain.end_with?(mistake)
116
- response.hint!(domain: domain.gsub(/#{mistake}\z/, reference))
117
- break
118
- end
119
- end
120
- end
37
+ # Invalid domains
38
+ EmailInquire::Validator::KnownInvalidDomain,
39
+ EmailInquire::Validator::OneTimeProvider,
121
40
 
122
- VALID_CC_TLDS = [
123
- [".jp", ".co.jp", load_data("jp_tld").freeze],
124
- [".uk", ".co.uk", load_data("uk_tld").freeze],
125
- [".br", ".com.br", load_data("br_tld").freeze],
41
+ # Hints
42
+ EmailInquire::Validator::CommonProviderMistake,
43
+ EmailInquire::Validator::CommonlyMistakenDomain,
44
+ EmailInquire::Validator::CommonlyMistakenTld,
45
+ EmailInquire::Validator::CountryCodeTld,
46
+ EmailInquire::Validator::UniqueDomainProvider,
126
47
  ].freeze
127
48
 
128
- def validate_cc_tld
129
- VALID_CC_TLDS.each do |tld, sld, valid_tlds|
130
- next unless domain.end_with?(tld)
131
-
132
- next if valid_tlds.any? do |reference|
133
- domain.end_with?(reference)
134
- end
135
-
136
- _, com, tld_without_dot = sld.split(".")
137
-
138
- new_domain = domain.dup
139
- new_domain.gsub!(/\.[a-z]{2,#{com.length}}\.#{tld_without_dot}\z/, sld)
140
- new_domain.gsub!(/(?<!\.)#{com}\.#{tld_without_dot}\z/, sld)
141
- new_domain.gsub!(/(?<!\.#{com})\.#{tld_without_dot}\z/, sld)
142
- response.hint!(domain: new_domain) if new_domain != domain
143
- end
144
- end
145
-
146
- UNIQUE_TLD_DOMAINS = load_data("unique_domain_providers").freeze
147
-
148
- def validate_domains_with_unique_tld
149
- base, tld = domain.split(".")
150
-
151
- UNIQUE_TLD_DOMAINS.each do |reference|
152
- reference_base, reference_tld = reference.split(".")
153
-
154
- if base == reference_base && tld != reference_tld
155
- response.hint!(domain: reference)
156
- break
157
- end
158
- end
159
- end
160
-
161
- ONE_TIME_EMAIL_PROVIDERS = load_data("one_time_email_providers").freeze
162
-
163
- def validate_one_time_providers
164
- response.invalid! if ONE_TIME_EMAIL_PROVIDERS.include?(domain)
165
- end
166
-
167
- KNOWN_INVALID_DOMAINS = load_data("known_invalid_domains").freeze
168
-
169
- def validate_known_invalid_domains
170
- response.invalid! if KNOWN_INVALID_DOMAINS.include?(domain)
171
- end
172
-
173
- def validate_custom_invalid_domains
174
- response.invalid! if EmailInquire.custom_invalid_domains.include?(domain)
175
- end
49
+ def validate
50
+ response = Helper.first_value(VALIDATORS) { |validator| validator.validate(email) }
176
51
 
177
- def validate_custom_valid_domains
178
- response.valid! if EmailInquire.custom_valid_domains.include?(domain)
52
+ response || Response.new(email: email).valid!
179
53
  end
180
54
 
181
55
  end
@@ -3,25 +3,34 @@
3
3
  module EmailInquire
4
4
  class Response
5
5
 
6
- attr_accessor :email, :replacement, :status
6
+ attr_reader :email
7
+ attr_accessor :replacement, :status
7
8
 
8
- def hint!(domain: nil)
9
+ def initialize(email:)
10
+ @email = email
11
+ end
12
+
13
+ def hint!(domain:)
9
14
  self.status = :hint
10
15
 
11
16
  old_name, _old_domain = email.split("@")
12
- self.replacement = "#{old_name}@#{domain}" if domain
17
+ self.replacement = "#{old_name}@#{domain}"
18
+
19
+ self
13
20
  end
14
21
 
15
22
  def hint?
16
- status == :hint
23
+ status.equal?(:hint)
17
24
  end
18
25
 
19
26
  def invalid!
20
27
  self.status = :invalid
28
+
29
+ self
21
30
  end
22
31
 
23
32
  def invalid?
24
- status == :invalid
33
+ status.equal?(:invalid)
25
34
  end
26
35
 
27
36
  def status?
@@ -30,10 +39,12 @@ module EmailInquire
30
39
 
31
40
  def valid!
32
41
  self.status = :valid
42
+
43
+ self
33
44
  end
34
45
 
35
46
  def valid?
36
- status == :valid
47
+ status.equal?(:valid)
37
48
  end
38
49
 
39
50
  end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "email_inquire/response"
5
+
6
+ module EmailInquire
7
+ module Validator
8
+ class Base
9
+
10
+ class << self
11
+
12
+ def validate(email)
13
+ new(email).validate
14
+ end
15
+
16
+ private :new
17
+
18
+ private
19
+
20
+ def load_data(filename)
21
+ data = File.read("#{__dir__}/../../../data/#{filename}.txt")
22
+ lines = data.split("\n")
23
+ lines.reject! { |line| line[0] == "#" }
24
+
25
+ lines.to_set
26
+ end
27
+
28
+ end
29
+
30
+ def initialize(email)
31
+ @email = email
32
+ @name, @domain = email&.split("@", 2)
33
+ end
34
+
35
+ attr_reader :domain, :email, :name
36
+
37
+ def validate
38
+ raise NotImplementedError
39
+ end
40
+
41
+ private
42
+
43
+ def response
44
+ @response ||= Response.new(email: email)
45
+ end
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "damerau-levenshtein"
4
+ require "email_inquire/validator/base"
5
+
6
+ module EmailInquire
7
+ module Validator
8
+ class CommonProvider < Base
9
+
10
+ DOMAINS = load_data("common_providers").freeze
11
+
12
+ def validate
13
+ response.valid! if DOMAINS.include?(domain)
14
+ end
15
+
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "damerau-levenshtein"
4
+ require "email_inquire/validator/base"
5
+ require "email_inquire/validator/common_provider"
6
+
7
+ module EmailInquire
8
+ module Validator
9
+ class CommonProviderMistake < Base
10
+
11
+ def validate
12
+ return if CommonProvider::DOMAINS.include?(domain)
13
+
14
+ replacement_domain =
15
+ CommonProvider::DOMAINS.find do |reference|
16
+ distance = DamerauLevenshtein.distance(domain, reference)
17
+
18
+ distance.equal?(1)
19
+ end
20
+
21
+ response.hint!(domain: replacement_domain) if replacement_domain
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "email_inquire/validator/base"
4
+
5
+ module EmailInquire
6
+ module Validator
7
+ class CommonlyMistakenDomain < Base
8
+
9
+ MISTAKES = {
10
+ /google(?!mail)/ => "gmail.com",
11
+ /windows.*\.com/ => "live.com",
12
+ }.freeze
13
+
14
+ def validate
15
+ return response.valid! if MISTAKES.value?(domain)
16
+
17
+ _mistake, reference =
18
+ MISTAKES.find do |mistake, _reference|
19
+ mistake =~ domain
20
+ end
21
+
22
+ response.hint!(domain: reference) if reference
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "email_inquire/validator/base"
4
+
5
+ module EmailInquire
6
+ module Validator
7
+ class CommonlyMistakenTld < Base
8
+
9
+ MISTAKES = {
10
+ ".combr" => ".com.br",
11
+ ".cojp" => ".co.jp",
12
+ ".couk" => ".co.uk",
13
+ ".com.com" => ".com",
14
+ }.freeze
15
+
16
+ def validate
17
+ mistake, reference =
18
+ MISTAKES.find do |mistake, reference|
19
+ next if !mistake.end_with?(reference) && domain.end_with?(reference)
20
+
21
+ domain.end_with?(mistake)
22
+ end
23
+
24
+ response.hint!(domain: domain.sub(/#{mistake}\z/, reference)) if reference
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "email_inquire/helper"
4
+ require "email_inquire/validator/base"
5
+ require "email_inquire/validator/common_provider"
6
+
7
+ module EmailInquire
8
+ module Validator
9
+ class CountryCodeTld < Base
10
+
11
+ COUNTRY_CODE_TLDS = [
12
+ # TLD, generic com, all generic, registration with TLD only is possible
13
+ ["jp", "co", load_data("country_code_tld/jp").freeze, true].freeze,
14
+ ["uk", "co", load_data("country_code_tld/uk").freeze, true].freeze,
15
+ ["br", "com", load_data("country_code_tld/br").freeze, true].freeze,
16
+ ].freeze
17
+
18
+ def initialize(email)
19
+ super(email)
20
+
21
+ *@rest, @sld, @tld = domain.split(".")
22
+ end
23
+
24
+ def validate
25
+ Helper.first_value(
26
+ COUNTRY_CODE_TLDS
27
+ ) do |cctld, generic_com, all_generics, registration_with_cctld|
28
+ validate_cctld(cctld, generic_com, all_generics, registration_with_cctld)
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :rest, :sld, :tld
35
+
36
+ def hint_for_approx_cctld(cctld, all_generics)
37
+ matching_generic = all_generics.find { |generic| tld.eql?("#{generic}#{cctld}") }
38
+ return unless matching_generic
39
+
40
+ replacement = [*rest, sld, matching_generic, cctld].join(".")
41
+ response.hint!(domain: replacement)
42
+ end
43
+
44
+ def hint_for_common_provider(cctld, generic_com)
45
+ provider_domain = [
46
+ sld,
47
+ generic_com,
48
+ cctld,
49
+ ].join(".")
50
+
51
+ return unless CommonProvider::DOMAINS.include?(provider_domain)
52
+
53
+ response.hint!(domain: provider_domain)
54
+ end
55
+
56
+ def hint_for_generic_com(cctld, generic_com)
57
+ replacement = [
58
+ *rest,
59
+ (sld if sld.length > 2),
60
+ generic_com,
61
+ cctld,
62
+ ].compact.join(".")
63
+
64
+ response.hint!(domain: replacement)
65
+ end
66
+
67
+ def hint_for_generic_at_end_of_sld(cctld, all_generics)
68
+ generic_at_end_of_sld = all_generics.find { |generic| sld.end_with?(generic) }
69
+ return unless generic_at_end_of_sld
70
+
71
+ replacement = [
72
+ *rest,
73
+ sld.sub(/#{generic_at_end_of_sld}\z/, ""),
74
+ generic_at_end_of_sld,
75
+ cctld,
76
+ ].join(".")
77
+
78
+ response.hint!(domain: replacement)
79
+ end
80
+
81
+ def validate_cctld(cctld, generic_com, all_generics, registration_with_cctld)
82
+ return hint_for_approx_cctld(cctld, all_generics) unless tld.eql?(cctld)
83
+
84
+ if all_generics.include?(sld)
85
+ return response.invalid! if rest.empty?
86
+
87
+ return
88
+ end
89
+
90
+ (
91
+ hint_for_generic_at_end_of_sld(cctld, all_generics) ||
92
+ hint_for_common_provider(cctld, generic_com)
93
+ ).tap do |hint|
94
+ return hint if hint
95
+ end
96
+
97
+ hint_for_generic_com(cctld, generic_com) if sld.length <= 2 || !registration_with_cctld
98
+ end
99
+
100
+ end
101
+ end
102
+ end