ha-finder 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ha-finder.rb +2 -65
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 971996ca24a63ecf4e615e6eb9f14c2908fc8b52
|
4
|
+
data.tar.gz: 6269340cb8bda5d69ce50d0bb002537e23061906
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2b137099bead37642123e3715e67403c395d19b97b6ac0b1baff296083b6ec9699f61f17a484eb753a8a492a3fddb2f7de7dbcfcd3bf5a2525f8c35f15e98ba7
|
7
|
+
data.tar.gz: 390c761b4a46d513d4cf0bfaeef299ecc1ac6c9c7f0f5e502a815a247dffbdaca4c3afbe5e0dd21eaf688029edf2948bd790ab6298229c0d13d0400ee3d7f9f3
|
data/lib/ha-finder.rb
CHANGED
@@ -1,68 +1,5 @@
|
|
1
|
-
require 'whois'
|
2
|
-
require 'whois-parser'
|
3
|
-
require 'csv'
|
4
|
-
require 'set'
|
5
|
-
require 'simpleidn'
|
6
1
|
|
7
|
-
|
8
|
-
def latin_confusables_map
|
9
|
-
Hash[
|
10
|
-
'a' => 'а',
|
11
|
-
'c' => 'с',
|
12
|
-
'd' => 'ԁ',
|
13
|
-
'e' => 'е',
|
14
|
-
'h' => 'һ',
|
15
|
-
'i' => 'і',
|
16
|
-
'j' => 'ј',
|
17
|
-
# 'k' => 'ҟ',
|
18
|
-
'l' => 'ӏ',
|
19
|
-
'm' => 'м',
|
20
|
-
'n' => 'п',
|
21
|
-
'o' => 'о',
|
22
|
-
'p' => 'р',
|
23
|
-
'q' => 'ԛ',
|
24
|
-
'r' => 'г',
|
25
|
-
's' => 'ѕ',
|
26
|
-
# 'u' => 'џ',
|
27
|
-
'w' => 'ԝ',
|
28
|
-
'x' => 'х',
|
29
|
-
'y' => 'у',
|
30
|
-
]
|
31
|
-
end
|
32
|
-
|
33
|
-
def latin_confusables
|
34
|
-
latin_confusables = latin_confusables_map.keys.to_set
|
35
|
-
(0..9).each{|num| latin_confusables.add num.to_s; latin_confusables_map[num.to_s] = num.to_s }
|
36
|
-
return latin_confusables
|
37
|
-
end
|
38
|
-
|
39
|
-
def perform
|
40
|
-
c = Whois::Client.new
|
2
|
+
require 'ha-finder/run'
|
41
3
|
|
42
|
-
|
43
|
-
|
44
|
-
domains = CSV.read('./top-1m.csv').map(&:last)
|
45
|
-
domains.each do |domain|
|
46
|
-
domain_name, tld = domain.split('.', 2)
|
47
|
-
if Set[*domain_name.chars].subset?(latin_confusables)
|
48
|
-
cyrillic_domain = Array.new
|
49
|
-
domain_name.each_char do |char|
|
50
|
-
cyrillic_domain.push latin_confusables_map[char]
|
51
|
-
end
|
52
|
-
cyrillic_domain = cyrillic_domain.join
|
53
|
-
cyrillic_domain += '.'
|
54
|
-
cyrillic_domain += tld
|
55
|
-
punycode_domain = SimpleIDN.to_ascii(cyrillic_domain)
|
56
|
-
|
57
|
-
begin
|
58
|
-
record = Whois.whois(punycode_domain).parser
|
59
|
-
if !record.registered?
|
60
|
-
puts "#{domain} (#{cyrillic_domain})"
|
61
|
-
end
|
62
|
-
rescue
|
63
|
-
puts "--can't parse-- #{domain} (#{cyrillic_domain})"
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
4
|
+
module HaFinder
|
68
5
|
end
|