email_data 1601156760
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/FUNDING.yml +3 -0
- data/.gitignore +10 -0
- data/.rubocop.yml +7 -0
- data/.travis.yml +6 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +90 -0
- data/Rakefile +12 -0
- data/VERSION +1 -0
- data/bin/console +14 -0
- data/bin/helpers.rb +229 -0
- data/bin/setup +8 -0
- data/bin/sync +7 -0
- data/bin/sync-disposable-emails +213 -0
- data/bin/sync-tld +28 -0
- data/data/country_tlds.txt +235 -0
- data/data/disposable_domains.txt +111121 -0
- data/data/disposable_emails.txt +38 -0
- data/data/free_email_domains.txt +111 -0
- data/data/manual/disposable_domains.txt +0 -0
- data/data/manual/disposable_emails.txt +0 -0
- data/data/manual/free_email_domains.txt +111 -0
- data/data/tlds.txt +1508 -0
- data/email_data.gemspec +38 -0
- data/lib/email_data.rb +39 -0
- data/lib/email_data/version.rb +5 -0
- metadata +184 -0
data/bin/setup
ADDED
data/bin/sync
ADDED
@@ -0,0 +1,213 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require_relative "helpers"
|
5
|
+
|
6
|
+
def ten_minute_mail
|
7
|
+
path = "disposable/10minutemail.txt"
|
8
|
+
url = "https://10minutemail.com/session/address"
|
9
|
+
|
10
|
+
20.times do
|
11
|
+
refresh_list(url: url, path: path) do |response|
|
12
|
+
_account, host = response.data.fetch("address").split("@")
|
13
|
+
|
14
|
+
[host]
|
15
|
+
end
|
16
|
+
|
17
|
+
sleep random_timeout
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def temp_mail
|
22
|
+
path = "disposable/tempmail.txt"
|
23
|
+
url = "https://api4.temp-mail.org/request/domains/format/json"
|
24
|
+
|
25
|
+
refresh_list(url: url, path: path) do |response|
|
26
|
+
response.data.map {|domain| domain.tr("@", "") }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def temp_mail_address
|
31
|
+
path = "disposable/tempmailaddress.txt"
|
32
|
+
url = "https://www.tempmailaddress.com/index/index"
|
33
|
+
|
34
|
+
refresh_list(url: url, path: path) do |response|
|
35
|
+
data = JSON.parse(
|
36
|
+
response.body.gsub(/[^-,:\w@.{}"]/, ""),
|
37
|
+
symbolize_names: true
|
38
|
+
)
|
39
|
+
[data[:email].split("@").last]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tempmail_io
|
44
|
+
path = "disposable/tempmail_io.txt"
|
45
|
+
url = "https://api.internal.temp-mail.io/api/v2/domains"
|
46
|
+
|
47
|
+
refresh_list(url: url, path: path) do |response|
|
48
|
+
response.data["domains"]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def gmailnator
|
53
|
+
emails = []
|
54
|
+
|
55
|
+
5.times do
|
56
|
+
url = "https://gmailnator.com/bulk-emails"
|
57
|
+
default_headers = {"user-agent" => USER_AGENT.sample}
|
58
|
+
|
59
|
+
response = Aitch.get(url: url, headers: default_headers)
|
60
|
+
|
61
|
+
unless response.ok?
|
62
|
+
throw "Received #{response.status} when getting CSRF token"
|
63
|
+
end
|
64
|
+
|
65
|
+
cookie_header = response.headers["set-cookie"]
|
66
|
+
attr = response.data.css("#csrf_token").first
|
67
|
+
csrf_token = attr[:value]
|
68
|
+
csrf_field = attr[:name]
|
69
|
+
|
70
|
+
response = Aitch.post(
|
71
|
+
url: url,
|
72
|
+
params: {email_list: "1000", email: [3], csrf_field => csrf_token},
|
73
|
+
headers: default_headers.merge({"cookie" => cookie_header})
|
74
|
+
)
|
75
|
+
|
76
|
+
throw "Received #{response.status} when fetching list" unless response.ok?
|
77
|
+
|
78
|
+
emails += response.data.css("#email-list-message a").map do |node|
|
79
|
+
mailbox, domain = node.text.gsub(/\+[^@]+/, "").split("@")
|
80
|
+
mailbox = mailbox.gsub(/\./m, "")
|
81
|
+
"#{mailbox}@#{domain}"
|
82
|
+
end
|
83
|
+
|
84
|
+
sleep random_timeout
|
85
|
+
end
|
86
|
+
|
87
|
+
append_to_file("disposable/gmailnator.txt", emails)
|
88
|
+
end
|
89
|
+
|
90
|
+
def domain_scraping(name, url, selector)
|
91
|
+
timeout(10) do
|
92
|
+
puts "=> Scraping #{url}"
|
93
|
+
|
94
|
+
selector, value_selector = selector.split("::")
|
95
|
+
path = "disposable/#{name}.txt"
|
96
|
+
host_regex = /@?(.*?(\.[^.]+)+)/
|
97
|
+
|
98
|
+
refresh_list(url: url, path: path) do |response|
|
99
|
+
new_domains = response
|
100
|
+
.data
|
101
|
+
.css(selector)
|
102
|
+
.map {|element| process_scraping(element, value_selector) }
|
103
|
+
|
104
|
+
new_domains = new_domains
|
105
|
+
.map(&:squish)
|
106
|
+
.reject(&:empty?)
|
107
|
+
.map {|domain| domain[host_regex, 1]&.squish&.tr("@", "") }
|
108
|
+
.reject(&:nil?)
|
109
|
+
.reject(&:empty?)
|
110
|
+
.map {|domain| domain.gsub(/\s*\((.*?)\)/, "") }
|
111
|
+
|
112
|
+
raise "No #{name} hosts found" if new_domains.empty?
|
113
|
+
|
114
|
+
new_domains
|
115
|
+
end
|
116
|
+
end
|
117
|
+
rescue StandardError => error
|
118
|
+
puts "=> [ERROR] Unable to scrape #{url}; #{error.class}: #{error.message}"
|
119
|
+
[]
|
120
|
+
end
|
121
|
+
|
122
|
+
def process_scraping(element, value_selector)
|
123
|
+
value = nil
|
124
|
+
|
125
|
+
case value_selector
|
126
|
+
when "text()"
|
127
|
+
value = element.text
|
128
|
+
when /^attr\((.*?)\)/
|
129
|
+
value = element[Regexp.last_match(1)]
|
130
|
+
else
|
131
|
+
element.attributes.each do |_name, attr|
|
132
|
+
attr = attr.value.to_s
|
133
|
+
value = attr if attr =~ host_regex
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
unless value
|
138
|
+
raise "no value found: #{element} (value_selector: #{value_selector})"
|
139
|
+
end
|
140
|
+
|
141
|
+
value
|
142
|
+
end
|
143
|
+
|
144
|
+
threads = []
|
145
|
+
|
146
|
+
threads << thread { load_github_url("https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json") }
|
147
|
+
threads << thread { load_github_url("https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json") }
|
148
|
+
threads << thread { load_github_url("https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt") }
|
149
|
+
threads << thread { load_github_url("https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt") }
|
150
|
+
threads << thread { load_github_url("https://raw.githubusercontent.com/maxmalysh/disposable-emails/master/disposable_emails/data/domains.txt") }
|
151
|
+
threads << thread { load_github_url("https://raw.githubusercontent.com/jespernissen/disposable-maildomain-list/master/disposable-maildomain-list.txt") }
|
152
|
+
threads << thread { load_github_url("https://raw.githubusercontent.com/wesbos/burner-email-providers/master/emails.txt") }
|
153
|
+
threads << thread { load_github_url("https://gist.github.com/fnando/dafe542cac13f831bbf5521a55248116/raw/disposable.txt") }
|
154
|
+
threads << thread { ten_minute_mail }
|
155
|
+
threads << thread { temp_mail }
|
156
|
+
threads << thread { temp_mail_address }
|
157
|
+
threads << thread { tempmail_io }
|
158
|
+
threads << thread { load_file("disposable/disposable_manually_added.txt") }
|
159
|
+
threads << thread { domain_scraping("guerrillamail", "https://www.guerrillamail.com/", "select option::attr(value)") }
|
160
|
+
threads << thread { domain_scraping("moakt", "https://www.moakt.com", "select option::attr(value)") }
|
161
|
+
threads << thread { domain_scraping("tempr", "https://tempr.email/", "select[name=DomainId] option::text()") }
|
162
|
+
threads << thread { domain_scraping("yepmail", "https://yepmail.co/", "select[name=domain] option::text()") }
|
163
|
+
threads << thread { domain_scraping("fake_email_generator", "https://fakemailgenerator.net", "[data-mailhost]::attr(data-mailhost)") }
|
164
|
+
threads << thread { domain_scraping("tempemails", "https://www.tempemails.net/", "select[name=domain] option::attr(value)") }
|
165
|
+
threads << thread { domain_scraping("clipmails", "https://clipmails.com/", "select[name=domain] option::attr(value)") }
|
166
|
+
threads << thread { domain_scraping("1secmail", "https://www.1secmail.com/", "select[id=domain] option::attr(value)") }
|
167
|
+
threads << thread { domain_scraping("emailfake", "https://generator.email", ".tt-suggestion p::text()") }
|
168
|
+
threads << thread { domain_scraping("emailfake", "https://emailfake.com/", ".tt-suggestion p::text()") }
|
169
|
+
threads << thread { domain_scraping("emailfake", "https://email-fake.com/", ".tt-suggestion p::text()") }
|
170
|
+
threads << thread { domain_scraping("receivemail", "https://www.receivemail.org/", "select[name=domain] option::text()") }
|
171
|
+
threads << thread { domain_scraping("itemp", "https://itemp.email", "select[name=domain] option::text()") }
|
172
|
+
threads << thread { domain_scraping("cs", "https://www.cs.email", "select[id=gm-host-select] option::text()") }
|
173
|
+
threads << thread { domain_scraping("tempmail", "https://tempmail.io/settings/", "select[id=domain] option::text()") }
|
174
|
+
threads << thread { domain_scraping("tempemail", "https://tempemail.co", "select[name=email_domain] option::text()") }
|
175
|
+
threads << thread { domain_scraping("tmail", "https://mytemp-email.com/", "a.domain-selector::text()") }
|
176
|
+
|
177
|
+
threads.each_slice(5) do |slice|
|
178
|
+
slice.each(&:join)
|
179
|
+
end
|
180
|
+
|
181
|
+
threads.clear
|
182
|
+
|
183
|
+
domains = []
|
184
|
+
|
185
|
+
puts "=> Loading disposable_domains.txt"
|
186
|
+
domains += normalize_list(File.read("#{__dir__}/../data/disposable_domains.txt").lines)
|
187
|
+
|
188
|
+
puts "=> Loading manual/disposable_domains.txt"
|
189
|
+
domains += normalize_list(File.read("#{__dir__}/../data/manual/disposable_domains.txt").lines)
|
190
|
+
|
191
|
+
puts "=> Loading disposable/*.txt"
|
192
|
+
Dir["./data/disposable/**/*.txt"].map do |file|
|
193
|
+
file = File.expand_path(file)
|
194
|
+
domains += normalize_list(File.read(file).lines)
|
195
|
+
end
|
196
|
+
|
197
|
+
ignore_domains = %w[gmail.com hotmail.com]
|
198
|
+
|
199
|
+
puts "=> Normalize domains (count: #{domains.size})"
|
200
|
+
domains = domains
|
201
|
+
.uniq
|
202
|
+
.map {|domain| RootDomain.call(domain.split("@").last.downcase) }
|
203
|
+
.compact
|
204
|
+
.uniq
|
205
|
+
.reject {|domain| ignore_domains.include?(domain) }
|
206
|
+
|
207
|
+
puts "=> Saving domains (count: #{domains.size})"
|
208
|
+
save_file("disposable_domains.txt", domains)
|
209
|
+
|
210
|
+
emails = gmailnator
|
211
|
+
emails += normalize_list(File.read("#{__dir__}/../data/manual/disposable_emails.txt").lines)
|
212
|
+
puts "=> Saving email proxies (count: #{emails.size})"
|
213
|
+
save_file("disposable_emails.txt", emails)
|
data/bin/sync-tld
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require_relative "helpers"
|
5
|
+
|
6
|
+
puts "=> Fetching tld list"
|
7
|
+
tlds = http_request(:get, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt").body.lines
|
8
|
+
tlds.shift # remove update notice
|
9
|
+
|
10
|
+
tlds = tlds
|
11
|
+
.map(&:downcase)
|
12
|
+
.map(&:strip)
|
13
|
+
.map {|tld| SimpleIDN.to_ascii(tld) }
|
14
|
+
|
15
|
+
puts "=> Saving tlds.txt"
|
16
|
+
save_file("tlds.txt", normalize_list(tlds))
|
17
|
+
|
18
|
+
puts "=> Fetching country tld list"
|
19
|
+
country_tlds = JSON.parse(
|
20
|
+
http_request(:get, "https://github.com/samayo/country-json/raw/master/src/country-by-domain-tld.json").body,
|
21
|
+
symbolize_names: true
|
22
|
+
)
|
23
|
+
country_tlds = country_tlds
|
24
|
+
.reject {|info| info[:tld].nil? }
|
25
|
+
.map {|info| info[:tld].gsub(/^\./, "") }
|
26
|
+
|
27
|
+
puts "=> Saving country_tlds.txt"
|
28
|
+
save_file("country_tlds.txt", normalize_list(country_tlds))
|
@@ -0,0 +1,235 @@
|
|
1
|
+
ad
|
2
|
+
ae
|
3
|
+
af
|
4
|
+
ag
|
5
|
+
ai
|
6
|
+
al
|
7
|
+
am
|
8
|
+
an
|
9
|
+
ao
|
10
|
+
aq
|
11
|
+
ar
|
12
|
+
as
|
13
|
+
at
|
14
|
+
au
|
15
|
+
aw
|
16
|
+
az
|
17
|
+
ba
|
18
|
+
bb
|
19
|
+
bd
|
20
|
+
be
|
21
|
+
bf
|
22
|
+
bg
|
23
|
+
bh
|
24
|
+
bi
|
25
|
+
bj
|
26
|
+
bm
|
27
|
+
bn
|
28
|
+
bo
|
29
|
+
br
|
30
|
+
bs
|
31
|
+
bt
|
32
|
+
bv
|
33
|
+
bw
|
34
|
+
by
|
35
|
+
bz
|
36
|
+
ca
|
37
|
+
cc
|
38
|
+
cd
|
39
|
+
cf
|
40
|
+
cg
|
41
|
+
ch
|
42
|
+
ci
|
43
|
+
ck
|
44
|
+
cl
|
45
|
+
cm
|
46
|
+
cn
|
47
|
+
co
|
48
|
+
cr
|
49
|
+
cu
|
50
|
+
cv
|
51
|
+
cx
|
52
|
+
cy
|
53
|
+
cz
|
54
|
+
de
|
55
|
+
dj
|
56
|
+
dk
|
57
|
+
dm
|
58
|
+
do
|
59
|
+
dz
|
60
|
+
ec
|
61
|
+
ee
|
62
|
+
eg
|
63
|
+
eh
|
64
|
+
er
|
65
|
+
es
|
66
|
+
et
|
67
|
+
fi
|
68
|
+
fj
|
69
|
+
fk
|
70
|
+
fr
|
71
|
+
ga
|
72
|
+
gb
|
73
|
+
gd
|
74
|
+
ge
|
75
|
+
gf
|
76
|
+
gh
|
77
|
+
gi
|
78
|
+
gl
|
79
|
+
gm
|
80
|
+
gn
|
81
|
+
gp
|
82
|
+
gq
|
83
|
+
gr
|
84
|
+
gs
|
85
|
+
gt
|
86
|
+
gu
|
87
|
+
gw
|
88
|
+
gy
|
89
|
+
hk
|
90
|
+
hm
|
91
|
+
hn
|
92
|
+
hr
|
93
|
+
ht
|
94
|
+
hu
|
95
|
+
id
|
96
|
+
ie
|
97
|
+
il
|
98
|
+
in
|
99
|
+
io
|
100
|
+
iq
|
101
|
+
ir
|
102
|
+
is
|
103
|
+
it
|
104
|
+
jm
|
105
|
+
jo
|
106
|
+
jp
|
107
|
+
ke
|
108
|
+
kg
|
109
|
+
kh
|
110
|
+
ki
|
111
|
+
km
|
112
|
+
kn
|
113
|
+
kp
|
114
|
+
kr
|
115
|
+
kw
|
116
|
+
ky
|
117
|
+
kz
|
118
|
+
la
|
119
|
+
lb
|
120
|
+
lc
|
121
|
+
li
|
122
|
+
lk
|
123
|
+
lr
|
124
|
+
ls
|
125
|
+
lt
|
126
|
+
lu
|
127
|
+
lv
|
128
|
+
ly
|
129
|
+
ma
|
130
|
+
mc
|
131
|
+
md
|
132
|
+
mg
|
133
|
+
mh
|
134
|
+
mk
|
135
|
+
ml
|
136
|
+
mm
|
137
|
+
mn
|
138
|
+
mo
|
139
|
+
mp
|
140
|
+
mq
|
141
|
+
mr
|
142
|
+
ms
|
143
|
+
mt
|
144
|
+
mu
|
145
|
+
mv
|
146
|
+
mw
|
147
|
+
mx
|
148
|
+
my
|
149
|
+
mz
|
150
|
+
na
|
151
|
+
nc
|
152
|
+
ne
|
153
|
+
nf
|
154
|
+
ng
|
155
|
+
ni
|
156
|
+
nl
|
157
|
+
no
|
158
|
+
np
|
159
|
+
nr
|
160
|
+
nu
|
161
|
+
nz
|
162
|
+
om
|
163
|
+
pa
|
164
|
+
pe
|
165
|
+
pf
|
166
|
+
pg
|
167
|
+
ph
|
168
|
+
pk
|
169
|
+
pl
|
170
|
+
pm
|
171
|
+
pn
|
172
|
+
pr
|
173
|
+
ps
|
174
|
+
pt
|
175
|
+
pw
|
176
|
+
py
|
177
|
+
qa
|
178
|
+
re
|
179
|
+
ro
|
180
|
+
ru
|
181
|
+
rw
|
182
|
+
sa
|
183
|
+
sb
|
184
|
+
sc
|
185
|
+
sd
|
186
|
+
se
|
187
|
+
sg
|
188
|
+
sh
|
189
|
+
si
|
190
|
+
sj
|
191
|
+
sk
|
192
|
+
sl
|
193
|
+
sm
|
194
|
+
sn
|
195
|
+
so
|
196
|
+
sr
|
197
|
+
ss
|
198
|
+
st
|
199
|
+
sv
|
200
|
+
sy
|
201
|
+
sz
|
202
|
+
tc
|
203
|
+
td
|
204
|
+
tf
|
205
|
+
tg
|
206
|
+
th
|
207
|
+
tj
|
208
|
+
tk
|
209
|
+
tl
|
210
|
+
tm
|
211
|
+
tn
|
212
|
+
to
|
213
|
+
tr
|
214
|
+
tt
|
215
|
+
tv
|
216
|
+
tz
|
217
|
+
ua
|
218
|
+
ug
|
219
|
+
us
|
220
|
+
uy
|
221
|
+
uz
|
222
|
+
va
|
223
|
+
vc
|
224
|
+
ve
|
225
|
+
vg
|
226
|
+
vi
|
227
|
+
vn
|
228
|
+
vu
|
229
|
+
wf
|
230
|
+
ws
|
231
|
+
ye
|
232
|
+
yt
|
233
|
+
za
|
234
|
+
zm
|
235
|
+
zw
|