validators 3.0.5 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3e94056e45b8404eae96009fa335483fb73199503fc91acb7b479ce9bb82080
4
- data.tar.gz: 3370e2642ca60032100724e39151ffdefbe59c45b67ed2edd9bdf8d2dc3306b9
3
+ metadata.gz: 862ad7c2a49bcc38d917336b109047a330c6de6c6da16d83ea7953ddefe70096
4
+ data.tar.gz: fb5e0b9db3ec0fdbc064eb6467cb36fab3d0f4bab6f29a5c8439c8d5edeb46fd
5
5
  SHA512:
6
- metadata.gz: 3077af8d5234423bde493d62c7e7cd309c687c6c00f2be55bfcecb4ada65006133619cdc440144f9137db9c4dd737c103ea57969b914507ef471cda1cc677b35
7
- data.tar.gz: e98b9f84cf22cb2cd606cc0888ca3b72e95ac933cfe1674ea0108c3a288462da739fa36321178d7e77d455bf10c021afc99af19ccad22943ffaaf35affef68c7
6
+ metadata.gz: b8842cf3ee2161ef2fe18c384efdb5ff6407c135560a95b91689d29b8ff65d87272c599288de55a6422dfaa6ff4be3b0b00fbd7ea8a8472e47bc08acfb973a85
7
+ data.tar.gz: 7a6fe664d6ef7a33735b8bf41cc198f8509a5712f788a2db93d6f2c4b39678ad6965eea1557fb6e43551a4d947a0cfd2e89c16a53c0721b6ada4390baa8cf293
data/.gitignore CHANGED
@@ -3,3 +3,4 @@ pkg/*
3
3
  .bundle
4
4
  Gemfile.lock
5
5
  /coverage
6
+ /data/disposable/*
@@ -28,3 +28,11 @@ Style/IfUnlessModifier:
28
28
 
29
29
  Metrics/MethodLength:
30
30
  Enabled: false
31
+
32
+ Metrics/BlockLength:
33
+ Exclude:
34
+ - bin/**/*
35
+ - "*.gemspec"
36
+
37
+ Layout/EmptyLinesAroundAttributeAccessor:
38
+ Enabled: false
data/README.md CHANGED
@@ -32,7 +32,7 @@ class User < ActiveRecord::Base
32
32
  end
33
33
  ```
34
34
 
35
- By default, it rejects disposable e-mails (e.g. mailinator). This loads ~15kb, but you can disable this validation by setting `disposable: true`.
35
+ By default, it rejects disposable e-mails (e.g. mailinator). This loads a lot of data (~1.7MB), but you can disable this validation by setting `disposable: true`.
36
36
 
37
37
  ```ruby
38
38
  class User < ActiveRecord::Base
@@ -55,7 +55,7 @@ class User < ActiveRecord::Base
55
55
  validates_url_format_of :site
56
56
 
57
57
  # validates TLD against list of valid TLD.
58
- # Loads ~5kb of text.
58
+ # Loads ~10KB of text.
59
59
  validates_url_format_of :site, tld: true
60
60
  end
61
61
  ```
@@ -143,9 +143,53 @@ class Server < ActiveRecord::Base
143
143
  end
144
144
  ```
145
145
 
146
+ ### validates_username / validates_subdomain
147
+
148
+ A valid username/subdomain follows the hostname label validation:
149
+
150
+ - maximum length is 63 characters
151
+ - allowed characters are a-z, A-Z, 0-9 and hyphen
152
+ - cannot begin or end with a hyphen
153
+ - cannot consist of numeric values only
154
+
155
+ The compiled list will be used for both username and subdomain validations.
156
+ This validation loads ~20KB of text.
157
+
158
+ ```ruby
159
+ class Server < ActiveRecord::Base
160
+ validates_subdomain :subdomain
161
+ end
162
+
163
+ class User < ActiveRecord::Base
164
+ validates_username :username
165
+ end
166
+ ```
167
+
168
+ You can also provide your own list if you want. Any string that starts with `/`
169
+ will be parsed with `Regexp.compile`.
170
+
171
+ ```ruby
172
+ ReservedUsernames = Validators::ReservedHostnames.parse_list([
173
+ "www",
174
+ "/www[0-9-]+/"
175
+ ])
176
+
177
+ class User < ActiveRecord::Base
178
+ validates_username :username, in: ReservedUsernames
179
+ end
180
+ ```
181
+
182
+ To disable the reserved validation, use `reserved: false`.
183
+
184
+ ```ruby
185
+ class User < ActiveRecord::Base
186
+ validates_username :username, reserved: false
187
+ end
188
+ ```
189
+
146
190
  ## Maintainer
147
191
 
148
- * [Nando Vieira](http://simplesideias.com.br)
192
+ * [Nando Vieira](http://nandovieira.com)
149
193
 
150
194
  ## License
151
195
 
@@ -1,35 +1,230 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require "open-uri"
5
- require "json"
4
+ require_relative "helpers"
6
5
 
7
- urls = %w[
8
- https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json
9
- https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json
10
- https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt
11
- https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt
12
- ]
6
+ def ten_minute_mail
7
+ path = "disposable/10minutemail.txt"
8
+ url = "https://10minutemail.com/session/address"
9
+
10
+ 20.times do
11
+ refresh_list(url: url, path: path) do |response|
12
+ _account, host = response.data.fetch("address").split("@")
13
+
14
+ [host]
15
+ end
16
+
17
+ sleep random_timeout
18
+ end
19
+ end
20
+
21
+ def temp_mail
22
+ path = "disposable/tempmail.txt"
23
+ url = "https://api4.temp-mail.org/request/domains/format/json"
24
+
25
+ refresh_list(url: url, path: path) do |response|
26
+ response.data.map {|domain| domain.tr("@", "") }
27
+ end
28
+ end
29
+
30
+ def temp_mail_address
31
+ path = "disposable/tempmailaddress.txt"
32
+ url = "https://www.tempmailaddress.com/index/index"
33
+
34
+ refresh_list(url: url, path: path) do |response|
35
+ data = JSON.parse(
36
+ response.body.gsub(/[^-,:\w@.{}"]/, ""),
37
+ symbolize_names: true
38
+ )
39
+ [data[:email].split("@").last]
40
+ end
41
+ end
42
+
43
+ def tempmail_io
44
+ path = "disposable/tempmail_io.txt"
45
+ url = "https://api.internal.temp-mail.io/api/v2/domains"
46
+
47
+ refresh_list(url: url, path: path) do |response|
48
+ response.data["domains"]
49
+ end
50
+ end
51
+
52
+ def gmailnator
53
+ emails = []
54
+
55
+ 5.times do
56
+ url = "https://gmailnator.com/bulk-emails"
57
+ default_headers = {"user-agent" => USER_AGENT.sample}
58
+
59
+ response = Aitch.get(url: url, headers: default_headers)
60
+
61
+ throw "Received #{response.status} when getting CSRF token" unless response.ok?
62
+
63
+ cookie_header = response.headers["set-cookie"]
64
+ attr = response.data.css("#csrf_token").first
65
+ csrf_token = attr[:value]
66
+ csrf_field = attr[:name]
67
+
68
+ response = Aitch.post(
69
+ url: url,
70
+ params: {email_list: "1000", email: [3], csrf_field => csrf_token},
71
+ headers: default_headers.merge({"cookie" => cookie_header})
72
+ )
73
+
74
+ throw "Received #{response.status} when fetching list" unless response.ok?
75
+
76
+ emails += response.data.css("#email-list-message a").map do |node|
77
+ mailbox, domain = node.text.gsub(/\+[^@]+/, "").split("@")
78
+ mailbox = mailbox.gsub(/\./m, "")
79
+ "#{mailbox}@#{domain}"
80
+ end
81
+
82
+ sleep random_timeout
83
+ end
84
+
85
+ append_to_file("disposable/gmailnator.txt", emails)
86
+ end
87
+
88
+ def domain_scraping(name, url, selector)
89
+ timeout(10) do
90
+ puts "=> Scraping #{url}"
91
+
92
+ selector, value_selector = selector.split("::")
93
+ path = "disposable/#{name}.txt"
94
+ host_regex = /@?(.*?(\.[^.]+)+)/
95
+
96
+ refresh_list(url: url, path: path) do |response|
97
+ new_domains = response
98
+ .data
99
+ .css(selector)
100
+ .map {|element| process_scraping(element, value_selector) }
101
+
102
+ new_domains = new_domains
103
+ .map(&:squish)
104
+ .reject(&:empty?)
105
+ .map {|domain| domain[host_regex, 1]&.squish&.tr("@", "") }
106
+ .reject(&:nil?)
107
+ .reject(&:empty?)
108
+ .map {|domain| domain.gsub(/\s*\((.*?)\)/, "") }
109
+
110
+ raise "No #{name} hosts found" if new_domains.empty?
111
+
112
+ new_domains
113
+ end
114
+ end
115
+ rescue StandardError => error
116
+ puts "=> [ERROR] Unable to scrape #{url}; #{error.class}: #{error.message}"
117
+ []
118
+ end
119
+
120
+ def process_scraping(element, value_selector)
121
+ value = nil
122
+
123
+ case value_selector
124
+ when "text()"
125
+ value = element.text
126
+ when /^attr\((.*?)\)/
127
+ value = element[Regexp.last_match(1)]
128
+ else
129
+ element.attributes.each do |_name, attr|
130
+ attr = attr.value.to_s
131
+ value = attr if attr =~ host_regex
132
+ end
133
+ end
134
+
135
+ raise "no value found: #{element} (value_selector: #{value_selector})" unless value
136
+
137
+ value
138
+ end
139
+
140
+ def load_github_url(url)
141
+ puts "=> Fetching #{url}"
142
+
143
+ basename = URI.parse(url).path[%r{/([^/]+/[^/]+)}, 1].tr("/", "_").tr("-", "_")
144
+ path = "disposable/#{basename}.txt"
145
+ domains = load_file(path)
13
146
 
14
- domains = urls.each_with_object([]) do |url, buffer|
15
147
  ext = File.extname(url)
16
148
 
17
- result = case ext
18
- when ".json"
19
- JSON.parse(URI.open(url).read)
20
- when ".txt"
21
- URI.open(url).read.lines.map(&:chomp)
22
- else
23
- raise "Unknown extension"
24
- end
149
+ domains += case ext
150
+ when ".json"
151
+ JSON.parse(http_request(:get, url).body)
152
+ when ".txt"
153
+ http_request(:get, url).body.lines.map(&:chomp)
154
+ else
155
+ raise "Unknown extension"
156
+ end
157
+
158
+ append_to_file(path, domains)
159
+ domains
160
+ rescue StandardError => error
161
+ puts "=> Unable to load #{url}; #{error.class}: #{error.message}"
162
+ []
163
+ end
164
+
165
+ threads = []
166
+
167
+ threads << thread { load_github_url("https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json") }
168
+ threads << thread { load_github_url("https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json") }
169
+ threads << thread { load_github_url("https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt") }
170
+ threads << thread { load_github_url("https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt") }
171
+ threads << thread { load_github_url("https://raw.githubusercontent.com/maxmalysh/disposable-emails/master/disposable_emails/data/domains.txt") }
172
+ threads << thread { load_github_url("https://raw.githubusercontent.com/jespernissen/disposable-maildomain-list/master/disposable-maildomain-list.txt") }
173
+ threads << thread { load_github_url("https://raw.githubusercontent.com/wesbos/burner-email-providers/master/emails.txt") }
174
+ threads << thread { load_github_url("https://gist.github.com/fnando/dafe542cac13f831bbf5521a55248116/raw/disposable.txt") }
175
+ threads << thread { ten_minute_mail }
176
+ threads << thread { temp_mail }
177
+ threads << thread { temp_mail_address }
178
+ threads << thread { tempmail_io }
179
+ threads << thread { load_file("disposable/disposable_manually_added.txt") }
180
+ threads << thread { domain_scraping("guerrillamail", "https://www.guerrillamail.com/", "select option::attr(value)") }
181
+ threads << thread { domain_scraping("moakt", "https://www.moakt.com", "select option::attr(value)") }
182
+ threads << thread { domain_scraping("tempr", "https://tempr.email/", "select[name=DomainId] option::text()") }
183
+ threads << thread { domain_scraping("yepmail", "https://yepmail.co/", "select[name=domain] option::text()") }
184
+ threads << thread { domain_scraping("fake_email_generator", "https://fakemailgenerator.net", "[data-mailhost]::attr(data-mailhost)") }
185
+ threads << thread { domain_scraping("tempemails", "https://www.tempemails.net/", "select[name=domain] option::attr(value)") }
186
+ threads << thread { domain_scraping("clipmails", "https://clipmails.com/", "select[name=domain] option::attr(value)") }
187
+ threads << thread { domain_scraping("1secmail", "https://www.1secmail.com/", "select[id=domain] option::attr(value)") }
188
+ threads << thread { domain_scraping("emailfake", "https://generator.email", ".tt-suggestion p::text()") }
189
+ threads << thread { domain_scraping("emailfake", "https://emailfake.com/", ".tt-suggestion p::text()") }
190
+ threads << thread { domain_scraping("emailfake", "https://email-fake.com/", ".tt-suggestion p::text()") }
191
+ threads << thread { domain_scraping("receivemail", "https://www.receivemail.org/", "select[name=domain] option::text()") }
192
+ threads << thread { domain_scraping("itemp", "https://itemp.email", "select[name=domain] option::text()") }
193
+ threads << thread { domain_scraping("cs", "https://www.cs.email", "select[id=gm-host-select] option::text()") }
194
+ threads << thread { domain_scraping("tempmail", "https://tempmail.io/settings/", "select[id=domain] option::text()") }
195
+ threads << thread { domain_scraping("tempemail", "https://tempemail.co", "select[name=email_domain] option::text()") }
196
+ threads << thread { domain_scraping("tmail", "https://mytemp-email.com/", "a.domain-selector::text()") }
25
197
 
26
- buffer.push(*result)
198
+ threads.each_slice(5) do |slice|
199
+ slice.each(&:join)
27
200
  end
28
201
 
29
- domains.map!(&:downcase)
30
- domains.uniq!
31
- domains.sort!
202
+ threads.clear
32
203
 
33
- File.open("./data/disposable.json", "w") do |file|
34
- file << JSON.pretty_generate(domains)
204
+ domains = []
205
+
206
+ puts "=> Loading disposable_domains.txt"
207
+ domains += File.read("#{__dir__}/../data/disposable_domains.txt").lines.map(&:chomp)
208
+
209
+ puts "=> Loading disposable/*.txt"
210
+ Dir["./data/disposable/**/*.txt"].map do |file|
211
+ file = File.expand_path(file)
212
+ domains += File.read(file).lines.map(&:chomp).flatten.compact
35
213
  end
214
+
215
+ ignore_domains = %w[gmail.com hotmail.com]
216
+
217
+ puts "=> Normalize domains (count: #{domains.size})"
218
+ domains = domains
219
+ .uniq
220
+ .map {|domain| RootDomain.call(domain.split("@").last.downcase) }
221
+ .compact
222
+ .uniq
223
+ .reject {|domain| ignore_domains.include?(domain) }
224
+
225
+ puts "=> Saving domains (count: #{domains.size})"
226
+ save_file("disposable_domains.txt", domains)
227
+
228
+ emails = gmailnator
229
+ puts "=> Saving email proxies (count: #{emails.size})"
230
+ save_file("disposable_emails.txt", emails)
@@ -1,17 +1,20 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require "open-uri"
5
- require "json"
4
+ require_relative "helpers"
6
5
 
7
- tlds = URI.open("https://data.iana.org/TLD/tlds-alpha-by-domain.txt").read.lines
6
+ tlds = http_request(:get, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt").body.lines
8
7
  tlds.shift # remove update notice
9
8
 
10
9
  tlds.map!(&:downcase)
11
10
  tlds.map!(&:strip)
12
- tlds.sort!
13
- tlds.uniq!
11
+ tlds.map! {|tld| SimpleIDN.to_ascii(tld) }
14
12
 
15
- File.open("./data/tld.json", "w") do |file|
16
- file << JSON.pretty_generate(tlds)
17
- end
13
+ save_file("tld.txt", tlds)
14
+
15
+ country_tlds = JSON.parse(http_request(:get, "https://github.com/samayo/country-json/raw/master/src/country-by-domain-tld.json").body, symbolize_names: true)
16
+ country_tlds = country_tlds
17
+ .reject {|info| info[:tld].nil? }
18
+ .map {|info| info[:tld].gsub(/^\./, "") }
19
+
20
+ save_file("country_tlds.txt", country_tlds)
@@ -0,0 +1,235 @@
1
+ ad
2
+ ae
3
+ af
4
+ ag
5
+ ai
6
+ al
7
+ am
8
+ an
9
+ ao
10
+ aq
11
+ ar
12
+ as
13
+ at
14
+ au
15
+ aw
16
+ az
17
+ ba
18
+ bb
19
+ bd
20
+ be
21
+ bf
22
+ bg
23
+ bh
24
+ bi
25
+ bj
26
+ bm
27
+ bn
28
+ bo
29
+ br
30
+ bs
31
+ bt
32
+ bv
33
+ bw
34
+ by
35
+ bz
36
+ ca
37
+ cc
38
+ cd
39
+ cf
40
+ cg
41
+ ch
42
+ ci
43
+ ck
44
+ cl
45
+ cm
46
+ cn
47
+ co
48
+ cr
49
+ cu
50
+ cv
51
+ cx
52
+ cy
53
+ cz
54
+ de
55
+ dj
56
+ dk
57
+ dm
58
+ do
59
+ dz
60
+ ec
61
+ ee
62
+ eg
63
+ eh
64
+ er
65
+ es
66
+ et
67
+ fi
68
+ fj
69
+ fk
70
+ fr
71
+ ga
72
+ gb
73
+ gd
74
+ ge
75
+ gf
76
+ gh
77
+ gi
78
+ gl
79
+ gm
80
+ gn
81
+ gp
82
+ gq
83
+ gr
84
+ gs
85
+ gt
86
+ gu
87
+ gw
88
+ gy
89
+ hk
90
+ hm
91
+ hn
92
+ hr
93
+ ht
94
+ hu
95
+ id
96
+ ie
97
+ il
98
+ in
99
+ io
100
+ iq
101
+ ir
102
+ is
103
+ it
104
+ jm
105
+ jo
106
+ jp
107
+ ke
108
+ kg
109
+ kh
110
+ ki
111
+ km
112
+ kn
113
+ kp
114
+ kr
115
+ kw
116
+ ky
117
+ kz
118
+ la
119
+ lb
120
+ lc
121
+ li
122
+ lk
123
+ lr
124
+ ls
125
+ lt
126
+ lu
127
+ lv
128
+ ly
129
+ ma
130
+ mc
131
+ md
132
+ mg
133
+ mh
134
+ mk
135
+ ml
136
+ mm
137
+ mn
138
+ mo
139
+ mp
140
+ mq
141
+ mr
142
+ ms
143
+ mt
144
+ mu
145
+ mv
146
+ mw
147
+ mx
148
+ my
149
+ mz
150
+ na
151
+ nc
152
+ ne
153
+ nf
154
+ ng
155
+ ni
156
+ nl
157
+ no
158
+ np
159
+ nr
160
+ nu
161
+ nz
162
+ om
163
+ pa
164
+ pe
165
+ pf
166
+ pg
167
+ ph
168
+ pk
169
+ pl
170
+ pm
171
+ pn
172
+ pr
173
+ ps
174
+ pt
175
+ pw
176
+ py
177
+ qa
178
+ re
179
+ ro
180
+ ru
181
+ rw
182
+ sa
183
+ sb
184
+ sc
185
+ sd
186
+ se
187
+ sg
188
+ sh
189
+ si
190
+ sj
191
+ sk
192
+ sl
193
+ sm
194
+ sn
195
+ so
196
+ sr
197
+ ss
198
+ st
199
+ sv
200
+ sy
201
+ sz
202
+ tc
203
+ td
204
+ tf
205
+ tg
206
+ th
207
+ tj
208
+ tk
209
+ tl
210
+ tm
211
+ tn
212
+ to
213
+ tr
214
+ tt
215
+ tv
216
+ tz
217
+ ua
218
+ ug
219
+ us
220
+ uy
221
+ uz
222
+ va
223
+ vc
224
+ ve
225
+ vg
226
+ vi
227
+ vn
228
+ vu
229
+ wf
230
+ ws
231
+ ye
232
+ yt
233
+ za
234
+ zm
235
+ zw