domain_extraction 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +46 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/data/public_suffix_list.dat +11603 -0
- data/domain_extraction.gemspec +25 -0
- data/lib/domain_extraction.rb +64 -0
- data/lib/domain_extraction/version.rb +3 -0
- metadata +99 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'domain_extraction/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "domain_extraction"
|
8
|
+
spec.version = DomainExtraction::VERSION
|
9
|
+
spec.authors = ["Brendon Murphy"]
|
10
|
+
spec.email = ["xternal1+github@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Extract domain information from hostnames.}
|
13
|
+
spec.description = spec.summary
|
14
|
+
spec.homepage = "https://github.com/Kajabi/domain_extraction"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
25
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require "domain_extraction/version"
|
2
|
+
|
3
|
+
class DomainExtraction
|
4
|
+
def initialize(dat_source = File.open("#{File.dirname(__FILE__)}/../data/public_suffix_list.dat", "r:UTF-8"))
|
5
|
+
@tlds = Set.new
|
6
|
+
load_tlds(dat_source)
|
7
|
+
end
|
8
|
+
|
9
|
+
def extract_domain(hostname)
|
10
|
+
if tld = extract_tld(hostname)
|
11
|
+
hostname[/([^.]+\.#{tld}$)/, 1]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def extract_tld(hostname)
|
16
|
+
tld = nil
|
17
|
+
|
18
|
+
possible_domains_for_host(hostname).each do |domain|
|
19
|
+
if tlds.include?(domain) || domain_matches_wildcard_tld?(domain)
|
20
|
+
tld = domain
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
tld
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
attr_reader :tlds
|
30
|
+
|
31
|
+
def load_tlds(dat_source)
|
32
|
+
dat_source.each_line do |line|
|
33
|
+
line = line.strip
|
34
|
+
unless line.empty? || line.start_with?("//")
|
35
|
+
tlds << line
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns a breakdown of descending domains for a given host
|
41
|
+
#
|
42
|
+
# For example, members.example.co.uk would provide:
|
43
|
+
#
|
44
|
+
# uk
|
45
|
+
# co.uk
|
46
|
+
# example.co.uk
|
47
|
+
# members.example.co.uk
|
48
|
+
#
|
49
|
+
def possible_domains_for_host(host)
|
50
|
+
host_parts = host.split(".")
|
51
|
+
domain = nil
|
52
|
+
|
53
|
+
Enumerator.new do |yielder|
|
54
|
+
while part = host_parts.pop
|
55
|
+
domain = "#{part}.#{domain}".sub(/\.$/, '')
|
56
|
+
yielder << domain
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def domain_matches_wildcard_tld?(domain)
|
62
|
+
tlds.include?(domain.sub(/.+?\./, "*."))
|
63
|
+
end
|
64
|
+
end
|
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: domain_extraction
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brendon Murphy
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-06-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.12'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.12'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description: Extract domain information from hostnames.
|
56
|
+
email:
|
57
|
+
- xternal1+github@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- ".travis.yml"
|
65
|
+
- Gemfile
|
66
|
+
- LICENSE.txt
|
67
|
+
- README.md
|
68
|
+
- Rakefile
|
69
|
+
- bin/console
|
70
|
+
- bin/setup
|
71
|
+
- data/public_suffix_list.dat
|
72
|
+
- domain_extraction.gemspec
|
73
|
+
- lib/domain_extraction.rb
|
74
|
+
- lib/domain_extraction/version.rb
|
75
|
+
homepage: https://github.com/Kajabi/domain_extraction
|
76
|
+
licenses:
|
77
|
+
- MIT
|
78
|
+
metadata: {}
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options: []
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 2.4.5
|
96
|
+
signing_key:
|
97
|
+
specification_version: 4
|
98
|
+
summary: Extract domain information from hostnames.
|
99
|
+
test_files: []
|