email_data 1601156760

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ./bin/sync-tld
4
+ echo
5
+ ./bin/sync-free-emails
6
+ echo
7
+ ./bin/sync-disposable-emails
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ def ten_minute_mail
7
+ path = "disposable/10minutemail.txt"
8
+ url = "https://10minutemail.com/session/address"
9
+
10
+ 20.times do
11
+ refresh_list(url: url, path: path) do |response|
12
+ _account, host = response.data.fetch("address").split("@")
13
+
14
+ [host]
15
+ end
16
+
17
+ sleep random_timeout
18
+ end
19
+ end
20
+
21
+ def temp_mail
22
+ path = "disposable/tempmail.txt"
23
+ url = "https://api4.temp-mail.org/request/domains/format/json"
24
+
25
+ refresh_list(url: url, path: path) do |response|
26
+ response.data.map {|domain| domain.tr("@", "") }
27
+ end
28
+ end
29
+
30
+ def temp_mail_address
31
+ path = "disposable/tempmailaddress.txt"
32
+ url = "https://www.tempmailaddress.com/index/index"
33
+
34
+ refresh_list(url: url, path: path) do |response|
35
+ data = JSON.parse(
36
+ response.body.gsub(/[^-,:\w@.{}"]/, ""),
37
+ symbolize_names: true
38
+ )
39
+ [data[:email].split("@").last]
40
+ end
41
+ end
42
+
43
+ def tempmail_io
44
+ path = "disposable/tempmail_io.txt"
45
+ url = "https://api.internal.temp-mail.io/api/v2/domains"
46
+
47
+ refresh_list(url: url, path: path) do |response|
48
+ response.data["domains"]
49
+ end
50
+ end
51
+
52
+ def gmailnator
53
+ emails = []
54
+
55
+ 5.times do
56
+ url = "https://gmailnator.com/bulk-emails"
57
+ default_headers = {"user-agent" => USER_AGENT.sample}
58
+
59
+ response = Aitch.get(url: url, headers: default_headers)
60
+
61
+ unless response.ok?
62
+ throw "Received #{response.status} when getting CSRF token"
63
+ end
64
+
65
+ cookie_header = response.headers["set-cookie"]
66
+ attr = response.data.css("#csrf_token").first
67
+ csrf_token = attr[:value]
68
+ csrf_field = attr[:name]
69
+
70
+ response = Aitch.post(
71
+ url: url,
72
+ params: {email_list: "1000", email: [3], csrf_field => csrf_token},
73
+ headers: default_headers.merge({"cookie" => cookie_header})
74
+ )
75
+
76
+ throw "Received #{response.status} when fetching list" unless response.ok?
77
+
78
+ emails += response.data.css("#email-list-message a").map do |node|
79
+ mailbox, domain = node.text.gsub(/\+[^@]+/, "").split("@")
80
+ mailbox = mailbox.gsub(/\./m, "")
81
+ "#{mailbox}@#{domain}"
82
+ end
83
+
84
+ sleep random_timeout
85
+ end
86
+
87
+ append_to_file("disposable/gmailnator.txt", emails)
88
+ end
89
+
90
+ def domain_scraping(name, url, selector)
91
+ timeout(10) do
92
+ puts "=> Scraping #{url}"
93
+
94
+ selector, value_selector = selector.split("::")
95
+ path = "disposable/#{name}.txt"
96
+ host_regex = /@?(.*?(\.[^.]+)+)/
97
+
98
+ refresh_list(url: url, path: path) do |response|
99
+ new_domains = response
100
+ .data
101
+ .css(selector)
102
+ .map {|element| process_scraping(element, value_selector) }
103
+
104
+ new_domains = new_domains
105
+ .map(&:squish)
106
+ .reject(&:empty?)
107
+ .map {|domain| domain[host_regex, 1]&.squish&.tr("@", "") }
108
+ .reject(&:nil?)
109
+ .reject(&:empty?)
110
+ .map {|domain| domain.gsub(/\s*\((.*?)\)/, "") }
111
+
112
+ raise "No #{name} hosts found" if new_domains.empty?
113
+
114
+ new_domains
115
+ end
116
+ end
117
+ rescue StandardError => error
118
+ puts "=> [ERROR] Unable to scrape #{url}; #{error.class}: #{error.message}"
119
+ []
120
+ end
121
+
122
+ def process_scraping(element, value_selector)
123
+ value = nil
124
+
125
+ case value_selector
126
+ when "text()"
127
+ value = element.text
128
+ when /^attr\((.*?)\)/
129
+ value = element[Regexp.last_match(1)]
130
+ else
131
+ element.attributes.each do |_name, attr|
132
+ attr = attr.value.to_s
133
+ value = attr if attr =~ host_regex
134
+ end
135
+ end
136
+
137
+ unless value
138
+ raise "no value found: #{element} (value_selector: #{value_selector})"
139
+ end
140
+
141
+ value
142
+ end
143
+
144
+ threads = []
145
+
146
+ threads << thread { load_github_url("https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json") }
147
+ threads << thread { load_github_url("https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json") }
148
+ threads << thread { load_github_url("https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt") }
149
+ threads << thread { load_github_url("https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt") }
150
+ threads << thread { load_github_url("https://raw.githubusercontent.com/maxmalysh/disposable-emails/master/disposable_emails/data/domains.txt") }
151
+ threads << thread { load_github_url("https://raw.githubusercontent.com/jespernissen/disposable-maildomain-list/master/disposable-maildomain-list.txt") }
152
+ threads << thread { load_github_url("https://raw.githubusercontent.com/wesbos/burner-email-providers/master/emails.txt") }
153
+ threads << thread { load_github_url("https://gist.github.com/fnando/dafe542cac13f831bbf5521a55248116/raw/disposable.txt") }
154
+ threads << thread { ten_minute_mail }
155
+ threads << thread { temp_mail }
156
+ threads << thread { temp_mail_address }
157
+ threads << thread { tempmail_io }
158
+ threads << thread { load_file("disposable/disposable_manually_added.txt") }
159
+ threads << thread { domain_scraping("guerrillamail", "https://www.guerrillamail.com/", "select option::attr(value)") }
160
+ threads << thread { domain_scraping("moakt", "https://www.moakt.com", "select option::attr(value)") }
161
+ threads << thread { domain_scraping("tempr", "https://tempr.email/", "select[name=DomainId] option::text()") }
162
+ threads << thread { domain_scraping("yepmail", "https://yepmail.co/", "select[name=domain] option::text()") }
163
+ threads << thread { domain_scraping("fake_email_generator", "https://fakemailgenerator.net", "[data-mailhost]::attr(data-mailhost)") }
164
+ threads << thread { domain_scraping("tempemails", "https://www.tempemails.net/", "select[name=domain] option::attr(value)") }
165
+ threads << thread { domain_scraping("clipmails", "https://clipmails.com/", "select[name=domain] option::attr(value)") }
166
+ threads << thread { domain_scraping("1secmail", "https://www.1secmail.com/", "select[id=domain] option::attr(value)") }
167
+ threads << thread { domain_scraping("emailfake", "https://generator.email", ".tt-suggestion p::text()") }
168
+ threads << thread { domain_scraping("emailfake", "https://emailfake.com/", ".tt-suggestion p::text()") }
169
+ threads << thread { domain_scraping("emailfake", "https://email-fake.com/", ".tt-suggestion p::text()") }
170
+ threads << thread { domain_scraping("receivemail", "https://www.receivemail.org/", "select[name=domain] option::text()") }
171
+ threads << thread { domain_scraping("itemp", "https://itemp.email", "select[name=domain] option::text()") }
172
+ threads << thread { domain_scraping("cs", "https://www.cs.email", "select[id=gm-host-select] option::text()") }
173
+ threads << thread { domain_scraping("tempmail", "https://tempmail.io/settings/", "select[id=domain] option::text()") }
174
+ threads << thread { domain_scraping("tempemail", "https://tempemail.co", "select[name=email_domain] option::text()") }
175
+ threads << thread { domain_scraping("tmail", "https://mytemp-email.com/", "a.domain-selector::text()") }
176
+
177
+ threads.each_slice(5) do |slice|
178
+ slice.each(&:join)
179
+ end
180
+
181
+ threads.clear
182
+
183
+ domains = []
184
+
185
+ puts "=> Loading disposable_domains.txt"
186
+ domains += normalize_list(File.read("#{__dir__}/../data/disposable_domains.txt").lines)
187
+
188
+ puts "=> Loading manual/disposable_domains.txt"
189
+ domains += normalize_list(File.read("#{__dir__}/../data/manual/disposable_domains.txt").lines)
190
+
191
+ puts "=> Loading disposable/*.txt"
192
+ Dir["./data/disposable/**/*.txt"].map do |file|
193
+ file = File.expand_path(file)
194
+ domains += normalize_list(File.read(file).lines)
195
+ end
196
+
197
+ ignore_domains = %w[gmail.com hotmail.com]
198
+
199
+ puts "=> Normalize domains (count: #{domains.size})"
200
+ domains = domains
201
+ .uniq
202
+ .map {|domain| RootDomain.call(domain.split("@").last.downcase) }
203
+ .compact
204
+ .uniq
205
+ .reject {|domain| ignore_domains.include?(domain) }
206
+
207
+ puts "=> Saving domains (count: #{domains.size})"
208
+ save_file("disposable_domains.txt", domains)
209
+
210
+ emails = gmailnator
211
+ emails += normalize_list(File.read("#{__dir__}/../data/manual/disposable_emails.txt").lines)
212
+ puts "=> Saving email proxies (count: #{emails.size})"
213
+ save_file("disposable_emails.txt", emails)
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ puts "=> Fetching tld list"
7
+ tlds = http_request(:get, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt").body.lines
8
+ tlds.shift # remove update notice
9
+
10
+ tlds = tlds
11
+ .map(&:downcase)
12
+ .map(&:strip)
13
+ .map {|tld| SimpleIDN.to_ascii(tld) }
14
+
15
+ puts "=> Saving tlds.txt"
16
+ save_file("tlds.txt", normalize_list(tlds))
17
+
18
+ puts "=> Fetching country tld list"
19
+ country_tlds = JSON.parse(
20
+ http_request(:get, "https://github.com/samayo/country-json/raw/master/src/country-by-domain-tld.json").body,
21
+ symbolize_names: true
22
+ )
23
+ country_tlds = country_tlds
24
+ .reject {|info| info[:tld].nil? }
25
+ .map {|info| info[:tld].gsub(/^\./, "") }
26
+
27
+ puts "=> Saving country_tlds.txt"
28
+ save_file("country_tlds.txt", normalize_list(country_tlds))
@@ -0,0 +1,235 @@
1
+ ad
2
+ ae
3
+ af
4
+ ag
5
+ ai
6
+ al
7
+ am
8
+ an
9
+ ao
10
+ aq
11
+ ar
12
+ as
13
+ at
14
+ au
15
+ aw
16
+ az
17
+ ba
18
+ bb
19
+ bd
20
+ be
21
+ bf
22
+ bg
23
+ bh
24
+ bi
25
+ bj
26
+ bm
27
+ bn
28
+ bo
29
+ br
30
+ bs
31
+ bt
32
+ bv
33
+ bw
34
+ by
35
+ bz
36
+ ca
37
+ cc
38
+ cd
39
+ cf
40
+ cg
41
+ ch
42
+ ci
43
+ ck
44
+ cl
45
+ cm
46
+ cn
47
+ co
48
+ cr
49
+ cu
50
+ cv
51
+ cx
52
+ cy
53
+ cz
54
+ de
55
+ dj
56
+ dk
57
+ dm
58
+ do
59
+ dz
60
+ ec
61
+ ee
62
+ eg
63
+ eh
64
+ er
65
+ es
66
+ et
67
+ fi
68
+ fj
69
+ fk
70
+ fr
71
+ ga
72
+ gb
73
+ gd
74
+ ge
75
+ gf
76
+ gh
77
+ gi
78
+ gl
79
+ gm
80
+ gn
81
+ gp
82
+ gq
83
+ gr
84
+ gs
85
+ gt
86
+ gu
87
+ gw
88
+ gy
89
+ hk
90
+ hm
91
+ hn
92
+ hr
93
+ ht
94
+ hu
95
+ id
96
+ ie
97
+ il
98
+ in
99
+ io
100
+ iq
101
+ ir
102
+ is
103
+ it
104
+ jm
105
+ jo
106
+ jp
107
+ ke
108
+ kg
109
+ kh
110
+ ki
111
+ km
112
+ kn
113
+ kp
114
+ kr
115
+ kw
116
+ ky
117
+ kz
118
+ la
119
+ lb
120
+ lc
121
+ li
122
+ lk
123
+ lr
124
+ ls
125
+ lt
126
+ lu
127
+ lv
128
+ ly
129
+ ma
130
+ mc
131
+ md
132
+ mg
133
+ mh
134
+ mk
135
+ ml
136
+ mm
137
+ mn
138
+ mo
139
+ mp
140
+ mq
141
+ mr
142
+ ms
143
+ mt
144
+ mu
145
+ mv
146
+ mw
147
+ mx
148
+ my
149
+ mz
150
+ na
151
+ nc
152
+ ne
153
+ nf
154
+ ng
155
+ ni
156
+ nl
157
+ no
158
+ np
159
+ nr
160
+ nu
161
+ nz
162
+ om
163
+ pa
164
+ pe
165
+ pf
166
+ pg
167
+ ph
168
+ pk
169
+ pl
170
+ pm
171
+ pn
172
+ pr
173
+ ps
174
+ pt
175
+ pw
176
+ py
177
+ qa
178
+ re
179
+ ro
180
+ ru
181
+ rw
182
+ sa
183
+ sb
184
+ sc
185
+ sd
186
+ se
187
+ sg
188
+ sh
189
+ si
190
+ sj
191
+ sk
192
+ sl
193
+ sm
194
+ sn
195
+ so
196
+ sr
197
+ ss
198
+ st
199
+ sv
200
+ sy
201
+ sz
202
+ tc
203
+ td
204
+ tf
205
+ tg
206
+ th
207
+ tj
208
+ tk
209
+ tl
210
+ tm
211
+ tn
212
+ to
213
+ tr
214
+ tt
215
+ tv
216
+ tz
217
+ ua
218
+ ug
219
+ us
220
+ uy
221
+ uz
222
+ va
223
+ vc
224
+ ve
225
+ vg
226
+ vi
227
+ vn
228
+ vu
229
+ wf
230
+ ws
231
+ ye
232
+ yt
233
+ za
234
+ zm
235
+ zw