validators 3.0.5 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3e94056e45b8404eae96009fa335483fb73199503fc91acb7b479ce9bb82080
4
- data.tar.gz: 3370e2642ca60032100724e39151ffdefbe59c45b67ed2edd9bdf8d2dc3306b9
3
+ metadata.gz: 862ad7c2a49bcc38d917336b109047a330c6de6c6da16d83ea7953ddefe70096
4
+ data.tar.gz: fb5e0b9db3ec0fdbc064eb6467cb36fab3d0f4bab6f29a5c8439c8d5edeb46fd
5
5
  SHA512:
6
- metadata.gz: 3077af8d5234423bde493d62c7e7cd309c687c6c00f2be55bfcecb4ada65006133619cdc440144f9137db9c4dd737c103ea57969b914507ef471cda1cc677b35
7
- data.tar.gz: e98b9f84cf22cb2cd606cc0888ca3b72e95ac933cfe1674ea0108c3a288462da739fa36321178d7e77d455bf10c021afc99af19ccad22943ffaaf35affef68c7
6
+ metadata.gz: b8842cf3ee2161ef2fe18c384efdb5ff6407c135560a95b91689d29b8ff65d87272c599288de55a6422dfaa6ff4be3b0b00fbd7ea8a8472e47bc08acfb973a85
7
+ data.tar.gz: 7a6fe664d6ef7a33735b8bf41cc198f8509a5712f788a2db93d6f2c4b39678ad6965eea1557fb6e43551a4d947a0cfd2e89c16a53c0721b6ada4390baa8cf293
data/.gitignore CHANGED
@@ -3,3 +3,4 @@ pkg/*
3
3
  .bundle
4
4
  Gemfile.lock
5
5
  /coverage
6
+ /data/disposable/*
@@ -28,3 +28,11 @@ Style/IfUnlessModifier:
28
28
 
29
29
  Metrics/MethodLength:
30
30
  Enabled: false
31
+
32
+ Metrics/BlockLength:
33
+ Exclude:
34
+ - bin/**/*
35
+ - "*.gemspec"
36
+
37
+ Layout/EmptyLinesAroundAttributeAccessor:
38
+ Enabled: false
data/README.md CHANGED
@@ -32,7 +32,7 @@ class User < ActiveRecord::Base
32
32
  end
33
33
  ```
34
34
 
35
- By default, it rejects disposable e-mails (e.g. mailinator). This loads ~15kb, but you can disable this validation by setting `disposable: true`.
35
+ By default, it rejects disposable e-mails (e.g. mailinator). This loads a lot of data (~1.7MB), but you can disable this validation by setting `disposable: true`.
36
36
 
37
37
  ```ruby
38
38
  class User < ActiveRecord::Base
@@ -55,7 +55,7 @@ class User < ActiveRecord::Base
55
55
  validates_url_format_of :site
56
56
 
57
57
  # validates TLD against list of valid TLD.
58
- # Loads ~5kb of text.
58
+ # Loads ~10KB of text.
59
59
  validates_url_format_of :site, tld: true
60
60
  end
61
61
  ```
@@ -143,9 +143,53 @@ class Server < ActiveRecord::Base
143
143
  end
144
144
  ```
145
145
 
146
+ ### validates_username / validates_subdomain
147
+
148
+ A valid username/subdomain follows the hostname label validation:
149
+
150
+ - maximum length is 63 characters
151
+ - allowed characters are a-z, A-Z, 0-9 and hyphen
152
+ - cannot begin or end with a hyphen
153
+ - cannot consist of numeric values only
154
+
155
+ The compiled list will be used for both username and subdomain validations.
156
+ This validation loads ~20KB of text.
157
+
158
+ ```ruby
159
+ class Server < ActiveRecord::Base
160
+ validates_subdomain :subdomain
161
+ end
162
+
163
+ class User < ActiveRecord::Base
164
+ validates_username :username
165
+ end
166
+ ```
167
+
168
+ You can also provide your own list if you want. Any string that starts with `/`
169
+ will be parsed with `Regexp.compile`.
170
+
171
+ ```ruby
172
+ ReservedUsernames = Validators::ReservedHostnames.parse_list([
173
+ "www",
174
+ "/www[0-9-]+/"
175
+ ])
176
+
177
+ class User < ActiveRecord::Base
178
+ validates_username :username, in: ReservedUsernames
179
+ end
180
+ ```
181
+
182
+ To disable the reserved validation, use `reserved: false`.
183
+
184
+ ```ruby
185
+ class User < ActiveRecord::Base
186
+ validates_username :username, reserved: false
187
+ end
188
+ ```
189
+
146
190
  ## Maintainer
147
191
 
148
- * [Nando Vieira](http://simplesideias.com.br)
192
+ * [Nando Vieira](http://nandovieira.com)
149
193
 
150
194
  ## License
151
195
 
@@ -1,35 +1,230 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require "open-uri"
5
- require "json"
4
+ require_relative "helpers"
6
5
 
7
- urls = %w[
8
- https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json
9
- https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json
10
- https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt
11
- https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt
12
- ]
6
+ def ten_minute_mail
7
+ path = "disposable/10minutemail.txt"
8
+ url = "https://10minutemail.com/session/address"
9
+
10
+ 20.times do
11
+ refresh_list(url: url, path: path) do |response|
12
+ _account, host = response.data.fetch("address").split("@")
13
+
14
+ [host]
15
+ end
16
+
17
+ sleep random_timeout
18
+ end
19
+ end
20
+
21
+ def temp_mail
22
+ path = "disposable/tempmail.txt"
23
+ url = "https://api4.temp-mail.org/request/domains/format/json"
24
+
25
+ refresh_list(url: url, path: path) do |response|
26
+ response.data.map {|domain| domain.tr("@", "") }
27
+ end
28
+ end
29
+
30
+ def temp_mail_address
31
+ path = "disposable/tempmailaddress.txt"
32
+ url = "https://www.tempmailaddress.com/index/index"
33
+
34
+ refresh_list(url: url, path: path) do |response|
35
+ data = JSON.parse(
36
+ response.body.gsub(/[^-,:\w@.{}"]/, ""),
37
+ symbolize_names: true
38
+ )
39
+ [data[:email].split("@").last]
40
+ end
41
+ end
42
+
43
+ def tempmail_io
44
+ path = "disposable/tempmail_io.txt"
45
+ url = "https://api.internal.temp-mail.io/api/v2/domains"
46
+
47
+ refresh_list(url: url, path: path) do |response|
48
+ response.data["domains"]
49
+ end
50
+ end
51
+
52
+ def gmailnator
53
+ emails = []
54
+
55
+ 5.times do
56
+ url = "https://gmailnator.com/bulk-emails"
57
+ default_headers = {"user-agent" => USER_AGENT.sample}
58
+
59
+ response = Aitch.get(url: url, headers: default_headers)
60
+
61
+ throw "Received #{response.status} when getting CSRF token" unless response.ok?
62
+
63
+ cookie_header = response.headers["set-cookie"]
64
+ attr = response.data.css("#csrf_token").first
65
+ csrf_token = attr[:value]
66
+ csrf_field = attr[:name]
67
+
68
+ response = Aitch.post(
69
+ url: url,
70
+ params: {email_list: "1000", email: [3], csrf_field => csrf_token},
71
+ headers: default_headers.merge({"cookie" => cookie_header})
72
+ )
73
+
74
+ throw "Received #{response.status} when fetching list" unless response.ok?
75
+
76
+ emails += response.data.css("#email-list-message a").map do |node|
77
+ mailbox, domain = node.text.gsub(/\+[^@]+/, "").split("@")
78
+ mailbox = mailbox.gsub(/\./m, "")
79
+ "#{mailbox}@#{domain}"
80
+ end
81
+
82
+ sleep random_timeout
83
+ end
84
+
85
+ append_to_file("disposable/gmailnator.txt", emails)
86
+ end
87
+
88
+ def domain_scraping(name, url, selector)
89
+ timeout(10) do
90
+ puts "=> Scraping #{url}"
91
+
92
+ selector, value_selector = selector.split("::")
93
+ path = "disposable/#{name}.txt"
94
+ host_regex = /@?(.*?(\.[^.]+)+)/
95
+
96
+ refresh_list(url: url, path: path) do |response|
97
+ new_domains = response
98
+ .data
99
+ .css(selector)
100
+ .map {|element| process_scraping(element, value_selector) }
101
+
102
+ new_domains = new_domains
103
+ .map(&:squish)
104
+ .reject(&:empty?)
105
+ .map {|domain| domain[host_regex, 1]&.squish&.tr("@", "") }
106
+ .reject(&:nil?)
107
+ .reject(&:empty?)
108
+ .map {|domain| domain.gsub(/\s*\((.*?)\)/, "") }
109
+
110
+ raise "No #{name} hosts found" if new_domains.empty?
111
+
112
+ new_domains
113
+ end
114
+ end
115
+ rescue StandardError => error
116
+ puts "=> [ERROR] Unable to scrape #{url}; #{error.class}: #{error.message}"
117
+ []
118
+ end
119
+
120
+ def process_scraping(element, value_selector)
121
+ value = nil
122
+
123
+ case value_selector
124
+ when "text()"
125
+ value = element.text
126
+ when /^attr\((.*?)\)/
127
+ value = element[Regexp.last_match(1)]
128
+ else
129
+ element.attributes.each do |_name, attr|
130
+ attr = attr.value.to_s
131
+ value = attr if attr =~ host_regex
132
+ end
133
+ end
134
+
135
+ raise "no value found: #{element} (value_selector: #{value_selector})" unless value
136
+
137
+ value
138
+ end
139
+
140
+ def load_github_url(url)
141
+ puts "=> Fetching #{url}"
142
+
143
+ basename = URI.parse(url).path[%r{/([^/]+/[^/]+)}, 1].tr("/", "_").tr("-", "_")
144
+ path = "disposable/#{basename}.txt"
145
+ domains = load_file(path)
13
146
 
14
- domains = urls.each_with_object([]) do |url, buffer|
15
147
  ext = File.extname(url)
16
148
 
17
- result = case ext
18
- when ".json"
19
- JSON.parse(URI.open(url).read)
20
- when ".txt"
21
- URI.open(url).read.lines.map(&:chomp)
22
- else
23
- raise "Unknown extension"
24
- end
149
+ domains += case ext
150
+ when ".json"
151
+ JSON.parse(http_request(:get, url).body)
152
+ when ".txt"
153
+ http_request(:get, url).body.lines.map(&:chomp)
154
+ else
155
+ raise "Unknown extension"
156
+ end
157
+
158
+ append_to_file(path, domains)
159
+ domains
160
+ rescue StandardError => error
161
+ puts "=> Unable to load #{url}; #{error.class}: #{error.message}"
162
+ []
163
+ end
164
+
165
+ threads = []
166
+
167
+ threads << thread { load_github_url("https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json") }
168
+ threads << thread { load_github_url("https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json") }
169
+ threads << thread { load_github_url("https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt") }
170
+ threads << thread { load_github_url("https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt") }
171
+ threads << thread { load_github_url("https://raw.githubusercontent.com/maxmalysh/disposable-emails/master/disposable_emails/data/domains.txt") }
172
+ threads << thread { load_github_url("https://raw.githubusercontent.com/jespernissen/disposable-maildomain-list/master/disposable-maildomain-list.txt") }
173
+ threads << thread { load_github_url("https://raw.githubusercontent.com/wesbos/burner-email-providers/master/emails.txt") }
174
+ threads << thread { load_github_url("https://gist.github.com/fnando/dafe542cac13f831bbf5521a55248116/raw/disposable.txt") }
175
+ threads << thread { ten_minute_mail }
176
+ threads << thread { temp_mail }
177
+ threads << thread { temp_mail_address }
178
+ threads << thread { tempmail_io }
179
+ threads << thread { load_file("disposable/disposable_manually_added.txt") }
180
+ threads << thread { domain_scraping("guerrillamail", "https://www.guerrillamail.com/", "select option::attr(value)") }
181
+ threads << thread { domain_scraping("moakt", "https://www.moakt.com", "select option::attr(value)") }
182
+ threads << thread { domain_scraping("tempr", "https://tempr.email/", "select[name=DomainId] option::text()") }
183
+ threads << thread { domain_scraping("yepmail", "https://yepmail.co/", "select[name=domain] option::text()") }
184
+ threads << thread { domain_scraping("fake_email_generator", "https://fakemailgenerator.net", "[data-mailhost]::attr(data-mailhost)") }
185
+ threads << thread { domain_scraping("tempemails", "https://www.tempemails.net/", "select[name=domain] option::attr(value)") }
186
+ threads << thread { domain_scraping("clipmails", "https://clipmails.com/", "select[name=domain] option::attr(value)") }
187
+ threads << thread { domain_scraping("1secmail", "https://www.1secmail.com/", "select[id=domain] option::attr(value)") }
188
+ threads << thread { domain_scraping("emailfake", "https://generator.email", ".tt-suggestion p::text()") }
189
+ threads << thread { domain_scraping("emailfake", "https://emailfake.com/", ".tt-suggestion p::text()") }
190
+ threads << thread { domain_scraping("emailfake", "https://email-fake.com/", ".tt-suggestion p::text()") }
191
+ threads << thread { domain_scraping("receivemail", "https://www.receivemail.org/", "select[name=domain] option::text()") }
192
+ threads << thread { domain_scraping("itemp", "https://itemp.email", "select[name=domain] option::text()") }
193
+ threads << thread { domain_scraping("cs", "https://www.cs.email", "select[id=gm-host-select] option::text()") }
194
+ threads << thread { domain_scraping("tempmail", "https://tempmail.io/settings/", "select[id=domain] option::text()") }
195
+ threads << thread { domain_scraping("tempemail", "https://tempemail.co", "select[name=email_domain] option::text()") }
196
+ threads << thread { domain_scraping("tmail", "https://mytemp-email.com/", "a.domain-selector::text()") }
25
197
 
26
- buffer.push(*result)
198
+ threads.each_slice(5) do |slice|
199
+ slice.each(&:join)
27
200
  end
28
201
 
29
- domains.map!(&:downcase)
30
- domains.uniq!
31
- domains.sort!
202
+ threads.clear
32
203
 
33
- File.open("./data/disposable.json", "w") do |file|
34
- file << JSON.pretty_generate(domains)
204
+ domains = []
205
+
206
+ puts "=> Loading disposable_domains.txt"
207
+ domains += File.read("#{__dir__}/../data/disposable_domains.txt").lines.map(&:chomp)
208
+
209
+ puts "=> Loading disposable/*.txt"
210
+ Dir["./data/disposable/**/*.txt"].map do |file|
211
+ file = File.expand_path(file)
212
+ domains += File.read(file).lines.map(&:chomp).flatten.compact
35
213
  end
214
+
215
+ ignore_domains = %w[gmail.com hotmail.com]
216
+
217
+ puts "=> Normalize domains (count: #{domains.size})"
218
+ domains = domains
219
+ .uniq
220
+ .map {|domain| RootDomain.call(domain.split("@").last.downcase) }
221
+ .compact
222
+ .uniq
223
+ .reject {|domain| ignore_domains.include?(domain) }
224
+
225
+ puts "=> Saving domains (count: #{domains.size})"
226
+ save_file("disposable_domains.txt", domains)
227
+
228
+ emails = gmailnator
229
+ puts "=> Saving email proxies (count: #{emails.size})"
230
+ save_file("disposable_emails.txt", emails)
@@ -1,17 +1,20 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require "open-uri"
5
- require "json"
4
+ require_relative "helpers"
6
5
 
7
- tlds = URI.open("https://data.iana.org/TLD/tlds-alpha-by-domain.txt").read.lines
6
+ tlds = http_request(:get, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt").body.lines
8
7
  tlds.shift # remove update notice
9
8
 
10
9
  tlds.map!(&:downcase)
11
10
  tlds.map!(&:strip)
12
- tlds.sort!
13
- tlds.uniq!
11
+ tlds.map! {|tld| SimpleIDN.to_ascii(tld) }
14
12
 
15
- File.open("./data/tld.json", "w") do |file|
16
- file << JSON.pretty_generate(tlds)
17
- end
13
+ save_file("tld.txt", tlds)
14
+
15
+ country_tlds = JSON.parse(http_request(:get, "https://github.com/samayo/country-json/raw/master/src/country-by-domain-tld.json").body, symbolize_names: true)
16
+ country_tlds = country_tlds
17
+ .reject {|info| info[:tld].nil? }
18
+ .map {|info| info[:tld].gsub(/^\./, "") }
19
+
20
+ save_file("country_tlds.txt", country_tlds)
@@ -0,0 +1,235 @@
1
+ ad
2
+ ae
3
+ af
4
+ ag
5
+ ai
6
+ al
7
+ am
8
+ an
9
+ ao
10
+ aq
11
+ ar
12
+ as
13
+ at
14
+ au
15
+ aw
16
+ az
17
+ ba
18
+ bb
19
+ bd
20
+ be
21
+ bf
22
+ bg
23
+ bh
24
+ bi
25
+ bj
26
+ bm
27
+ bn
28
+ bo
29
+ br
30
+ bs
31
+ bt
32
+ bv
33
+ bw
34
+ by
35
+ bz
36
+ ca
37
+ cc
38
+ cd
39
+ cf
40
+ cg
41
+ ch
42
+ ci
43
+ ck
44
+ cl
45
+ cm
46
+ cn
47
+ co
48
+ cr
49
+ cu
50
+ cv
51
+ cx
52
+ cy
53
+ cz
54
+ de
55
+ dj
56
+ dk
57
+ dm
58
+ do
59
+ dz
60
+ ec
61
+ ee
62
+ eg
63
+ eh
64
+ er
65
+ es
66
+ et
67
+ fi
68
+ fj
69
+ fk
70
+ fr
71
+ ga
72
+ gb
73
+ gd
74
+ ge
75
+ gf
76
+ gh
77
+ gi
78
+ gl
79
+ gm
80
+ gn
81
+ gp
82
+ gq
83
+ gr
84
+ gs
85
+ gt
86
+ gu
87
+ gw
88
+ gy
89
+ hk
90
+ hm
91
+ hn
92
+ hr
93
+ ht
94
+ hu
95
+ id
96
+ ie
97
+ il
98
+ in
99
+ io
100
+ iq
101
+ ir
102
+ is
103
+ it
104
+ jm
105
+ jo
106
+ jp
107
+ ke
108
+ kg
109
+ kh
110
+ ki
111
+ km
112
+ kn
113
+ kp
114
+ kr
115
+ kw
116
+ ky
117
+ kz
118
+ la
119
+ lb
120
+ lc
121
+ li
122
+ lk
123
+ lr
124
+ ls
125
+ lt
126
+ lu
127
+ lv
128
+ ly
129
+ ma
130
+ mc
131
+ md
132
+ mg
133
+ mh
134
+ mk
135
+ ml
136
+ mm
137
+ mn
138
+ mo
139
+ mp
140
+ mq
141
+ mr
142
+ ms
143
+ mt
144
+ mu
145
+ mv
146
+ mw
147
+ mx
148
+ my
149
+ mz
150
+ na
151
+ nc
152
+ ne
153
+ nf
154
+ ng
155
+ ni
156
+ nl
157
+ no
158
+ np
159
+ nr
160
+ nu
161
+ nz
162
+ om
163
+ pa
164
+ pe
165
+ pf
166
+ pg
167
+ ph
168
+ pk
169
+ pl
170
+ pm
171
+ pn
172
+ pr
173
+ ps
174
+ pt
175
+ pw
176
+ py
177
+ qa
178
+ re
179
+ ro
180
+ ru
181
+ rw
182
+ sa
183
+ sb
184
+ sc
185
+ sd
186
+ se
187
+ sg
188
+ sh
189
+ si
190
+ sj
191
+ sk
192
+ sl
193
+ sm
194
+ sn
195
+ so
196
+ sr
197
+ ss
198
+ st
199
+ sv
200
+ sy
201
+ sz
202
+ tc
203
+ td
204
+ tf
205
+ tg
206
+ th
207
+ tj
208
+ tk
209
+ tl
210
+ tm
211
+ tn
212
+ to
213
+ tr
214
+ tt
215
+ tv
216
+ tz
217
+ ua
218
+ ug
219
+ us
220
+ uy
221
+ uz
222
+ va
223
+ vc
224
+ ve
225
+ vg
226
+ vi
227
+ vn
228
+ vu
229
+ wf
230
+ ws
231
+ ye
232
+ yt
233
+ za
234
+ zm
235
+ zw