ha-finder 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ha-finder.rb +2 -65
- metadata +1 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 971996ca24a63ecf4e615e6eb9f14c2908fc8b52
         | 
| 4 | 
            +
              data.tar.gz: 6269340cb8bda5d69ce50d0bb002537e23061906
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 2b137099bead37642123e3715e67403c395d19b97b6ac0b1baff296083b6ec9699f61f17a484eb753a8a492a3fddb2f7de7dbcfcd3bf5a2525f8c35f15e98ba7
         | 
| 7 | 
            +
              data.tar.gz: 390c761b4a46d513d4cf0bfaeef299ecc1ac6c9c7f0f5e502a815a247dffbdaca4c3afbe5e0dd21eaf688029edf2948bd790ab6298229c0d13d0400ee3d7f9f3
         | 
    
        data/lib/ha-finder.rb
    CHANGED
    
    | @@ -1,68 +1,5 @@ | |
| 1 | 
            -
            require 'whois'
         | 
| 2 | 
            -
            require 'whois-parser'
         | 
| 3 | 
            -
            require 'csv'
         | 
| 4 | 
            -
            require 'set'
         | 
| 5 | 
            -
            require 'simpleidn'
         | 
| 6 1 |  | 
| 7 | 
            -
             | 
| 8 | 
            -
              def latin_confusables_map
         | 
| 9 | 
            -
                Hash[
         | 
| 10 | 
            -
                  'a' => 'а',
         | 
| 11 | 
            -
                  'c' => 'с',
         | 
| 12 | 
            -
                  'd' => 'ԁ',
         | 
| 13 | 
            -
                  'e' => 'е',
         | 
| 14 | 
            -
                  'h' => 'һ',
         | 
| 15 | 
            -
                  'i' => 'і',
         | 
| 16 | 
            -
                  'j' => 'ј',
         | 
| 17 | 
            -
                  # 'k' => 'ҟ',
         | 
| 18 | 
            -
                  'l' => 'ӏ',
         | 
| 19 | 
            -
                  'm' => 'м',
         | 
| 20 | 
            -
                  'n' => 'п',
         | 
| 21 | 
            -
                  'o' => 'о',
         | 
| 22 | 
            -
                  'p' => 'р',
         | 
| 23 | 
            -
                  'q' => 'ԛ',
         | 
| 24 | 
            -
                  'r' => 'г',
         | 
| 25 | 
            -
                  's' => 'ѕ',
         | 
| 26 | 
            -
                  # 'u' => 'џ',
         | 
| 27 | 
            -
                  'w' => 'ԝ',
         | 
| 28 | 
            -
                  'x' => 'х',
         | 
| 29 | 
            -
                  'y' => 'у',
         | 
| 30 | 
            -
                ]
         | 
| 31 | 
            -
              end
         | 
| 32 | 
            -
             | 
| 33 | 
            -
              def latin_confusables
         | 
| 34 | 
            -
                latin_confusables = latin_confusables_map.keys.to_set
         | 
| 35 | 
            -
                (0..9).each{|num| latin_confusables.add num.to_s; latin_confusables_map[num.to_s] = num.to_s }
         | 
| 36 | 
            -
                return latin_confusables
         | 
| 37 | 
            -
              end
         | 
| 38 | 
            -
             | 
| 39 | 
            -
              def perform 
         | 
| 40 | 
            -
                c = Whois::Client.new
         | 
| 2 | 
            +
            require 'ha-finder/run'
         | 
| 41 3 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
                domains = CSV.read('./top-1m.csv').map(&:last)
         | 
| 45 | 
            -
                domains.each do |domain|
         | 
| 46 | 
            -
                  domain_name, tld = domain.split('.', 2)
         | 
| 47 | 
            -
                  if Set[*domain_name.chars].subset?(latin_confusables)
         | 
| 48 | 
            -
                    cyrillic_domain = Array.new
         | 
| 49 | 
            -
                    domain_name.each_char do |char|
         | 
| 50 | 
            -
                      cyrillic_domain.push latin_confusables_map[char]
         | 
| 51 | 
            -
                    end
         | 
| 52 | 
            -
                    cyrillic_domain = cyrillic_domain.join
         | 
| 53 | 
            -
                    cyrillic_domain += '.'
         | 
| 54 | 
            -
                    cyrillic_domain += tld
         | 
| 55 | 
            -
                    punycode_domain = SimpleIDN.to_ascii(cyrillic_domain)
         | 
| 56 | 
            -
             | 
| 57 | 
            -
                    begin 
         | 
| 58 | 
            -
                      record = Whois.whois(punycode_domain).parser
         | 
| 59 | 
            -
                      if !record.registered?
         | 
| 60 | 
            -
                        puts "#{domain} (#{cyrillic_domain})"
         | 
| 61 | 
            -
                      end
         | 
| 62 | 
            -
                    rescue
         | 
| 63 | 
            -
                      puts "--can't parse-- #{domain} (#{cyrillic_domain})"
         | 
| 64 | 
            -
                    end
         | 
| 65 | 
            -
                  end
         | 
| 66 | 
            -
                end
         | 
| 67 | 
            -
              end
         | 
| 4 | 
            +
            module HaFinder
         | 
| 68 5 | 
             
            end
         |