email_data 1601156760

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ./bin/sync-tld
4
+ echo
5
+ ./bin/sync-free-emails
6
+ echo
7
+ ./bin/sync-disposable-emails
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ def ten_minute_mail
7
+ path = "disposable/10minutemail.txt"
8
+ url = "https://10minutemail.com/session/address"
9
+
10
+ 20.times do
11
+ refresh_list(url: url, path: path) do |response|
12
+ _account, host = response.data.fetch("address").split("@")
13
+
14
+ [host]
15
+ end
16
+
17
+ sleep random_timeout
18
+ end
19
+ end
20
+
21
+ def temp_mail
22
+ path = "disposable/tempmail.txt"
23
+ url = "https://api4.temp-mail.org/request/domains/format/json"
24
+
25
+ refresh_list(url: url, path: path) do |response|
26
+ response.data.map {|domain| domain.tr("@", "") }
27
+ end
28
+ end
29
+
30
+ def temp_mail_address
31
+ path = "disposable/tempmailaddress.txt"
32
+ url = "https://www.tempmailaddress.com/index/index"
33
+
34
+ refresh_list(url: url, path: path) do |response|
35
+ data = JSON.parse(
36
+ response.body.gsub(/[^-,:\w@.{}"]/, ""),
37
+ symbolize_names: true
38
+ )
39
+ [data[:email].split("@").last]
40
+ end
41
+ end
42
+
43
+ def tempmail_io
44
+ path = "disposable/tempmail_io.txt"
45
+ url = "https://api.internal.temp-mail.io/api/v2/domains"
46
+
47
+ refresh_list(url: url, path: path) do |response|
48
+ response.data["domains"]
49
+ end
50
+ end
51
+
52
+ def gmailnator
53
+ emails = []
54
+
55
+ 5.times do
56
+ url = "https://gmailnator.com/bulk-emails"
57
+ default_headers = {"user-agent" => USER_AGENT.sample}
58
+
59
+ response = Aitch.get(url: url, headers: default_headers)
60
+
61
+ unless response.ok?
62
+ throw "Received #{response.status} when getting CSRF token"
63
+ end
64
+
65
+ cookie_header = response.headers["set-cookie"]
66
+ attr = response.data.css("#csrf_token").first
67
+ csrf_token = attr[:value]
68
+ csrf_field = attr[:name]
69
+
70
+ response = Aitch.post(
71
+ url: url,
72
+ params: {email_list: "1000", email: [3], csrf_field => csrf_token},
73
+ headers: default_headers.merge({"cookie" => cookie_header})
74
+ )
75
+
76
+ throw "Received #{response.status} when fetching list" unless response.ok?
77
+
78
+ emails += response.data.css("#email-list-message a").map do |node|
79
+ mailbox, domain = node.text.gsub(/\+[^@]+/, "").split("@")
80
+ mailbox = mailbox.gsub(/\./m, "")
81
+ "#{mailbox}@#{domain}"
82
+ end
83
+
84
+ sleep random_timeout
85
+ end
86
+
87
+ append_to_file("disposable/gmailnator.txt", emails)
88
+ end
89
+
90
+ def domain_scraping(name, url, selector)
91
+ timeout(10) do
92
+ puts "=> Scraping #{url}"
93
+
94
+ selector, value_selector = selector.split("::")
95
+ path = "disposable/#{name}.txt"
96
+ host_regex = /@?(.*?(\.[^.]+)+)/
97
+
98
+ refresh_list(url: url, path: path) do |response|
99
+ new_domains = response
100
+ .data
101
+ .css(selector)
102
+ .map {|element| process_scraping(element, value_selector) }
103
+
104
+ new_domains = new_domains
105
+ .map(&:squish)
106
+ .reject(&:empty?)
107
+ .map {|domain| domain[host_regex, 1]&.squish&.tr("@", "") }
108
+ .reject(&:nil?)
109
+ .reject(&:empty?)
110
+ .map {|domain| domain.gsub(/\s*\((.*?)\)/, "") }
111
+
112
+ raise "No #{name} hosts found" if new_domains.empty?
113
+
114
+ new_domains
115
+ end
116
+ end
117
+ rescue StandardError => error
118
+ puts "=> [ERROR] Unable to scrape #{url}; #{error.class}: #{error.message}"
119
+ []
120
+ end
121
+
122
+ def process_scraping(element, value_selector)
123
+ value = nil
124
+
125
+ case value_selector
126
+ when "text()"
127
+ value = element.text
128
+ when /^attr\((.*?)\)/
129
+ value = element[Regexp.last_match(1)]
130
+ else
131
+ element.attributes.each do |_name, attr|
132
+ attr = attr.value.to_s
133
+ value = attr if attr =~ host_regex
134
+ end
135
+ end
136
+
137
+ unless value
138
+ raise "no value found: #{element} (value_selector: #{value_selector})"
139
+ end
140
+
141
+ value
142
+ end
143
+
144
+ threads = []
145
+
146
+ threads << thread { load_github_url("https://raw.githubusercontent.com/ivolo/disposable-email-domains/master/index.json") }
147
+ threads << thread { load_github_url("https://raw.githubusercontent.com/andreis/disposable-email-domains/master/domains.json") }
148
+ threads << thread { load_github_url("https://raw.githubusercontent.com/FGRibreau/mailchecker/master/list.txt") }
149
+ threads << thread { load_github_url("https://raw.githubusercontent.com/willwhite/freemail/master/data/disposable.txt") }
150
+ threads << thread { load_github_url("https://raw.githubusercontent.com/maxmalysh/disposable-emails/master/disposable_emails/data/domains.txt") }
151
+ threads << thread { load_github_url("https://raw.githubusercontent.com/jespernissen/disposable-maildomain-list/master/disposable-maildomain-list.txt") }
152
+ threads << thread { load_github_url("https://raw.githubusercontent.com/wesbos/burner-email-providers/master/emails.txt") }
153
+ threads << thread { load_github_url("https://gist.github.com/fnando/dafe542cac13f831bbf5521a55248116/raw/disposable.txt") }
154
+ threads << thread { ten_minute_mail }
155
+ threads << thread { temp_mail }
156
+ threads << thread { temp_mail_address }
157
+ threads << thread { tempmail_io }
158
+ threads << thread { load_file("disposable/disposable_manually_added.txt") }
159
+ threads << thread { domain_scraping("guerrillamail", "https://www.guerrillamail.com/", "select option::attr(value)") }
160
+ threads << thread { domain_scraping("moakt", "https://www.moakt.com", "select option::attr(value)") }
161
+ threads << thread { domain_scraping("tempr", "https://tempr.email/", "select[name=DomainId] option::text()") }
162
+ threads << thread { domain_scraping("yepmail", "https://yepmail.co/", "select[name=domain] option::text()") }
163
+ threads << thread { domain_scraping("fake_email_generator", "https://fakemailgenerator.net", "[data-mailhost]::attr(data-mailhost)") }
164
+ threads << thread { domain_scraping("tempemails", "https://www.tempemails.net/", "select[name=domain] option::attr(value)") }
165
+ threads << thread { domain_scraping("clipmails", "https://clipmails.com/", "select[name=domain] option::attr(value)") }
166
+ threads << thread { domain_scraping("1secmail", "https://www.1secmail.com/", "select[id=domain] option::attr(value)") }
167
+ threads << thread { domain_scraping("emailfake", "https://generator.email", ".tt-suggestion p::text()") }
168
+ threads << thread { domain_scraping("emailfake", "https://emailfake.com/", ".tt-suggestion p::text()") }
169
+ threads << thread { domain_scraping("emailfake", "https://email-fake.com/", ".tt-suggestion p::text()") }
170
+ threads << thread { domain_scraping("receivemail", "https://www.receivemail.org/", "select[name=domain] option::text()") }
171
+ threads << thread { domain_scraping("itemp", "https://itemp.email", "select[name=domain] option::text()") }
172
+ threads << thread { domain_scraping("cs", "https://www.cs.email", "select[id=gm-host-select] option::text()") }
173
+ threads << thread { domain_scraping("tempmail", "https://tempmail.io/settings/", "select[id=domain] option::text()") }
174
+ threads << thread { domain_scraping("tempemail", "https://tempemail.co", "select[name=email_domain] option::text()") }
175
+ threads << thread { domain_scraping("tmail", "https://mytemp-email.com/", "a.domain-selector::text()") }
176
+
177
+ threads.each_slice(5) do |slice|
178
+ slice.each(&:join)
179
+ end
180
+
181
+ threads.clear
182
+
183
+ domains = []
184
+
185
+ puts "=> Loading disposable_domains.txt"
186
+ domains += normalize_list(File.read("#{__dir__}/../data/disposable_domains.txt").lines)
187
+
188
+ puts "=> Loading manual/disposable_domains.txt"
189
+ domains += normalize_list(File.read("#{__dir__}/../data/manual/disposable_domains.txt").lines)
190
+
191
+ puts "=> Loading disposable/*.txt"
192
+ Dir["./data/disposable/**/*.txt"].map do |file|
193
+ file = File.expand_path(file)
194
+ domains += normalize_list(File.read(file).lines)
195
+ end
196
+
197
+ ignore_domains = %w[gmail.com hotmail.com]
198
+
199
+ puts "=> Normalize domains (count: #{domains.size})"
200
+ domains = domains
201
+ .uniq
202
+ .map {|domain| RootDomain.call(domain.split("@").last.downcase) }
203
+ .compact
204
+ .uniq
205
+ .reject {|domain| ignore_domains.include?(domain) }
206
+
207
+ puts "=> Saving domains (count: #{domains.size})"
208
+ save_file("disposable_domains.txt", domains)
209
+
210
+ emails = gmailnator
211
+ emails += normalize_list(File.read("#{__dir__}/../data/manual/disposable_emails.txt").lines)
212
+ puts "=> Saving email proxies (count: #{emails.size})"
213
+ save_file("disposable_emails.txt", emails)
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ puts "=> Fetching tld list"
7
+ tlds = http_request(:get, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt").body.lines
8
+ tlds.shift # remove update notice
9
+
10
+ tlds = tlds
11
+ .map(&:downcase)
12
+ .map(&:strip)
13
+ .map {|tld| SimpleIDN.to_ascii(tld) }
14
+
15
+ puts "=> Saving tlds.txt"
16
+ save_file("tlds.txt", normalize_list(tlds))
17
+
18
+ puts "=> Fetching country tld list"
19
+ country_tlds = JSON.parse(
20
+ http_request(:get, "https://github.com/samayo/country-json/raw/master/src/country-by-domain-tld.json").body,
21
+ symbolize_names: true
22
+ )
23
+ country_tlds = country_tlds
24
+ .reject {|info| info[:tld].nil? }
25
+ .map {|info| info[:tld].gsub(/^\./, "") }
26
+
27
+ puts "=> Saving country_tlds.txt"
28
+ save_file("country_tlds.txt", normalize_list(country_tlds))
@@ -0,0 +1,235 @@
1
+ ad
2
+ ae
3
+ af
4
+ ag
5
+ ai
6
+ al
7
+ am
8
+ an
9
+ ao
10
+ aq
11
+ ar
12
+ as
13
+ at
14
+ au
15
+ aw
16
+ az
17
+ ba
18
+ bb
19
+ bd
20
+ be
21
+ bf
22
+ bg
23
+ bh
24
+ bi
25
+ bj
26
+ bm
27
+ bn
28
+ bo
29
+ br
30
+ bs
31
+ bt
32
+ bv
33
+ bw
34
+ by
35
+ bz
36
+ ca
37
+ cc
38
+ cd
39
+ cf
40
+ cg
41
+ ch
42
+ ci
43
+ ck
44
+ cl
45
+ cm
46
+ cn
47
+ co
48
+ cr
49
+ cu
50
+ cv
51
+ cx
52
+ cy
53
+ cz
54
+ de
55
+ dj
56
+ dk
57
+ dm
58
+ do
59
+ dz
60
+ ec
61
+ ee
62
+ eg
63
+ eh
64
+ er
65
+ es
66
+ et
67
+ fi
68
+ fj
69
+ fk
70
+ fr
71
+ ga
72
+ gb
73
+ gd
74
+ ge
75
+ gf
76
+ gh
77
+ gi
78
+ gl
79
+ gm
80
+ gn
81
+ gp
82
+ gq
83
+ gr
84
+ gs
85
+ gt
86
+ gu
87
+ gw
88
+ gy
89
+ hk
90
+ hm
91
+ hn
92
+ hr
93
+ ht
94
+ hu
95
+ id
96
+ ie
97
+ il
98
+ in
99
+ io
100
+ iq
101
+ ir
102
+ is
103
+ it
104
+ jm
105
+ jo
106
+ jp
107
+ ke
108
+ kg
109
+ kh
110
+ ki
111
+ km
112
+ kn
113
+ kp
114
+ kr
115
+ kw
116
+ ky
117
+ kz
118
+ la
119
+ lb
120
+ lc
121
+ li
122
+ lk
123
+ lr
124
+ ls
125
+ lt
126
+ lu
127
+ lv
128
+ ly
129
+ ma
130
+ mc
131
+ md
132
+ mg
133
+ mh
134
+ mk
135
+ ml
136
+ mm
137
+ mn
138
+ mo
139
+ mp
140
+ mq
141
+ mr
142
+ ms
143
+ mt
144
+ mu
145
+ mv
146
+ mw
147
+ mx
148
+ my
149
+ mz
150
+ na
151
+ nc
152
+ ne
153
+ nf
154
+ ng
155
+ ni
156
+ nl
157
+ no
158
+ np
159
+ nr
160
+ nu
161
+ nz
162
+ om
163
+ pa
164
+ pe
165
+ pf
166
+ pg
167
+ ph
168
+ pk
169
+ pl
170
+ pm
171
+ pn
172
+ pr
173
+ ps
174
+ pt
175
+ pw
176
+ py
177
+ qa
178
+ re
179
+ ro
180
+ ru
181
+ rw
182
+ sa
183
+ sb
184
+ sc
185
+ sd
186
+ se
187
+ sg
188
+ sh
189
+ si
190
+ sj
191
+ sk
192
+ sl
193
+ sm
194
+ sn
195
+ so
196
+ sr
197
+ ss
198
+ st
199
+ sv
200
+ sy
201
+ sz
202
+ tc
203
+ td
204
+ tf
205
+ tg
206
+ th
207
+ tj
208
+ tk
209
+ tl
210
+ tm
211
+ tn
212
+ to
213
+ tr
214
+ tt
215
+ tv
216
+ tz
217
+ ua
218
+ ug
219
+ us
220
+ uy
221
+ uz
222
+ va
223
+ vc
224
+ ve
225
+ vg
226
+ vi
227
+ vn
228
+ vu
229
+ wf
230
+ ws
231
+ ye
232
+ yt
233
+ za
234
+ zm
235
+ zw