domain_extraction 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'domain_extraction/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "domain_extraction"
8
+ spec.version = DomainExtraction::VERSION
9
+ spec.authors = ["Brendon Murphy"]
10
+ spec.email = ["xternal1+github@gmail.com"]
11
+
12
+ spec.summary = %q{Extract domain information from hostnames.}
13
+ spec.description = spec.summary
14
+ spec.homepage = "https://github.com/Kajabi/domain_extraction"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.12"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+ end
@@ -0,0 +1,64 @@
1
+ require "domain_extraction/version"
2
+
3
+ class DomainExtraction
4
+ def initialize(dat_source = File.open("#{File.dirname(__FILE__)}/../data/public_suffix_list.dat", "r:UTF-8"))
5
+ @tlds = Set.new
6
+ load_tlds(dat_source)
7
+ end
8
+
9
+ def extract_domain(hostname)
10
+ if tld = extract_tld(hostname)
11
+ hostname[/([^.]+\.#{tld}$)/, 1]
12
+ end
13
+ end
14
+
15
+ def extract_tld(hostname)
16
+ tld = nil
17
+
18
+ possible_domains_for_host(hostname).each do |domain|
19
+ if tlds.include?(domain) || domain_matches_wildcard_tld?(domain)
20
+ tld = domain
21
+ end
22
+ end
23
+
24
+ tld
25
+ end
26
+
27
+ private
28
+
29
+ attr_reader :tlds
30
+
31
+ def load_tlds(dat_source)
32
+ dat_source.each_line do |line|
33
+ line = line.strip
34
+ unless line.empty? || line.start_with?("//")
35
+ tlds << line
36
+ end
37
+ end
38
+ end
39
+
40
+ # Returns a breakdown of descending domains for a given host
41
+ #
42
+ # For example, members.example.co.uk would provide:
43
+ #
44
+ # uk
45
+ # co.uk
46
+ # example.co.uk
47
+ # members.example.co.uk
48
+ #
49
+ def possible_domains_for_host(host)
50
+ host_parts = host.split(".")
51
+ domain = nil
52
+
53
+ Enumerator.new do |yielder|
54
+ while part = host_parts.pop
55
+ domain = "#{part}.#{domain}".sub(/\.$/, '')
56
+ yielder << domain
57
+ end
58
+ end
59
+ end
60
+
61
+ def domain_matches_wildcard_tld?(domain)
62
+ tlds.include?(domain.sub(/.+?\./, "*."))
63
+ end
64
+ end
@@ -0,0 +1,3 @@
1
+ class DomainExtraction
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: domain_extraction
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Brendon Murphy
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-06-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.12'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.12'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description: Extract domain information from hostnames.
56
+ email:
57
+ - xternal1+github@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
65
+ - Gemfile
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - bin/console
70
+ - bin/setup
71
+ - data/public_suffix_list.dat
72
+ - domain_extraction.gemspec
73
+ - lib/domain_extraction.rb
74
+ - lib/domain_extraction/version.rb
75
+ homepage: https://github.com/Kajabi/domain_extraction
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.4.5
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: Extract domain information from hostnames.
99
+ test_files: []