shadowbq-domainatrix 0.0.12 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +16 -0
- data/lib/domainatrix.rb +14 -4
- data/lib/domainatrix/domain_parser.rb +26 -10
- data/lib/domainatrix/version.rb +3 -0
- data/spec/domainatrix_spec.rb +24 -0
- metadata +22 -4
data/README.textile
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
h1. Domainatrix
|
2
2
|
|
3
|
+
!https://badge.fury.io/rb/shadowbq-domainatrix.png!:http://badge.fury.io/rb/shadowbq-domainatrix "!https://codeclimate.com/github/shadowbq/domainatrix.png!":https://codeclimate.com/github/shadowbq/domainatrix "!https://secure.travis-ci.org/shadowbq/domainatrix.png?branch=master!":http://travis-ci.org/shadowbq/domainatrix
|
4
|
+
|
5
|
+
PaulDix's Original Repo
|
6
|
+
|
3
7
|
"http://github.com/pauldix/domainatrix":http://github.com/pauldix/domainatrix
|
4
8
|
|
5
9
|
h2. Summary
|
@@ -33,6 +37,7 @@ h2. Use
|
|
33
37
|
require 'rubygems'
|
34
38
|
require 'domainatrix'
|
35
39
|
|
40
|
+
# Common Usage
|
36
41
|
url = Domainatrix.parse("http://www.pauldix.net")
|
37
42
|
url.url # => "http://www.pauldix.net/" (the original url)
|
38
43
|
url.host # => "www.pauldix.net"
|
@@ -40,6 +45,7 @@ url.public_suffix # => "net"
|
|
40
45
|
url.domain # => "pauldix"
|
41
46
|
url.canonical # => "net.pauldix"
|
42
47
|
|
48
|
+
# Looking at scheme and paths
|
43
49
|
url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
|
44
50
|
url.public_suffix # => "co.uk"
|
45
51
|
url.domain # => "pauldix"
|
@@ -48,6 +54,16 @@ url.path # => "/asdf.html?q=arg"
|
|
48
54
|
url.canonical # => "uk.co.pauldix.bar.foo/asdf.html?q=arg"
|
49
55
|
url.scheme #=> "http"
|
50
56
|
|
57
|
+
# ICANN section only suffix search using DynDNS'
|
58
|
+
url = Domainatrix.icann_parse('www.foo.dyndns.org')
|
59
|
+
url.host #=> 'www.foo.dyndns.org' }
|
60
|
+
url.url #=> 'http://www.foo.dyndns.org/' }
|
61
|
+
url.public_suffix #=>'org' }
|
62
|
+
url.domain #=>'dyndns' }
|
63
|
+
url.subdomain #=> 'www.foo' }
|
64
|
+
url.domain_with_tld #=> 'dyndns.org' }
|
65
|
+
|
66
|
+
# Scanning text line
|
51
67
|
urls = Domainatrix.scan("wikipedia (http://en.wikipedia.org/wiki/Popular_culture): lol") do |match|
|
52
68
|
match.url # Given a block, works like 'map'
|
53
69
|
end
|
data/lib/domainatrix.rb
CHANGED
@@ -11,11 +11,20 @@ rescue LoadError
|
|
11
11
|
end
|
12
12
|
|
13
13
|
module Domainatrix
|
14
|
-
|
15
|
-
|
14
|
+
|
15
|
+
#Keep Constant for backwards compat
|
16
16
|
DOMAIN_PARSER = DomainParser.new("#{File.dirname(__FILE__)}/effective_tld_names.dat")
|
17
|
-
|
17
|
+
|
18
|
+
def self.icann_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
|
19
|
+
Url.new(DomainParser.new(dat, sections).parse(url))
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.custom_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
|
23
|
+
Url.new(DomainParser.new(dat, sections).parse(url))
|
24
|
+
end
|
25
|
+
|
18
26
|
def self.parse(url)
|
27
|
+
#Url.new(DomainParser.parse(url)) #<-- Still slow implementation at this point
|
19
28
|
Url.new(DOMAIN_PARSER.parse(url))
|
20
29
|
end
|
21
30
|
|
@@ -45,4 +54,5 @@ module Domainatrix
|
|
45
54
|
urls.map!(&block) if block
|
46
55
|
urls
|
47
56
|
end
|
48
|
-
|
57
|
+
|
58
|
+
end
|
@@ -5,11 +5,17 @@ module Domainatrix
|
|
5
5
|
class DomainParser
|
6
6
|
include Addressable
|
7
7
|
|
8
|
-
attr_reader :public_suffixes
|
8
|
+
attr_reader :public_suffixes, :approved_sections, :found_sections
|
9
9
|
VALID_SCHEMA = /^http[s]{0,1}$/
|
10
|
-
|
11
|
-
def
|
10
|
+
|
11
|
+
def self.parse(url)
|
12
|
+
self.new("#{File.dirname(__FILE__)}/../effective_tld_names.dat").parse(url)
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(file_name, approved_sections = (Array.new << "*"))
|
12
16
|
@public_suffixes = {}
|
17
|
+
@found_sections =[]
|
18
|
+
@approved_sections = approved_sections
|
13
19
|
read_dat_file(file_name)
|
14
20
|
end
|
15
21
|
|
@@ -20,17 +26,27 @@ module Domainatrix
|
|
20
26
|
else
|
21
27
|
dat_file = File.open(file_name)
|
22
28
|
end
|
23
|
-
|
29
|
+
section = ""
|
30
|
+
|
24
31
|
dat_file.each_line do |line|
|
25
32
|
line = line.strip
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
33
|
+
#// ===BEGIN ICANN DOMAINS===
|
34
|
+
if line =~ /^\/\/ ===BEGIN/
|
35
|
+
section = /^\/\/ ===BEGIN(.*)===/.match(line)[1].strip
|
36
|
+
@found_sections << section
|
37
|
+
end
|
38
|
+
|
39
|
+
if @approved_sections.include?(section) or @approved_sections.include?("*")
|
40
|
+
unless (line =~ /^\/\//) || line.empty?
|
41
|
+
parts = line.split(".").reverse
|
42
|
+
|
43
|
+
sub_hash = @public_suffixes
|
44
|
+
parts.each do |part|
|
45
|
+
sub_hash = (sub_hash[part] ||= {})
|
46
|
+
end
|
32
47
|
end
|
33
48
|
end
|
49
|
+
|
34
50
|
end
|
35
51
|
end
|
36
52
|
|
data/spec/domainatrix_spec.rb
CHANGED
@@ -102,5 +102,29 @@ describe Domainatrix do
|
|
102
102
|
its(:path) { should == '' }
|
103
103
|
its(:domain_with_tld) { should == '' }
|
104
104
|
end
|
105
|
+
|
106
|
+
context 'without ICANN only suffix using DynDNS' do
|
107
|
+
subject { Domainatrix.custom_parse('www.foo.dyndns.org') }
|
108
|
+
its(:scheme) { should == 'http' }
|
109
|
+
its(:host) { should == 'www.foo.dyndns.org' }
|
110
|
+
its(:url) { should == 'http://www.foo.dyndns.org/' }
|
111
|
+
its(:public_suffix) { should == 'org' }
|
112
|
+
its(:domain) { should == 'dyndns' }
|
113
|
+
its(:subdomain) { should == 'www.foo' }
|
114
|
+
its(:path) { should == '' }
|
115
|
+
its(:domain_with_tld) { should == 'dyndns.org' }
|
116
|
+
end
|
117
|
+
|
118
|
+
context 'without ICANN only suffix using DynDNS' do
|
119
|
+
subject { Domainatrix.icann_parse('www.foo.dyndns.org') }
|
120
|
+
its(:scheme) { should == 'http' }
|
121
|
+
its(:host) { should == 'www.foo.dyndns.org' }
|
122
|
+
its(:url) { should == 'http://www.foo.dyndns.org/' }
|
123
|
+
its(:public_suffix) { should == 'org' }
|
124
|
+
its(:domain) { should == 'dyndns' }
|
125
|
+
its(:subdomain) { should == 'www.foo' }
|
126
|
+
its(:path) { should == '' }
|
127
|
+
its(:domain_with_tld) { should == 'dyndns.org' }
|
128
|
+
end
|
105
129
|
|
106
130
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shadowbq-domainatrix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -48,6 +48,22 @@ dependencies:
|
|
48
48
|
- - ! '>='
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
name: rake
|
53
|
+
requirement: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
type: :development
|
60
|
+
prerelease: false
|
61
|
+
version_requirements: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
51
67
|
- !ruby/object:Gem::Dependency
|
52
68
|
name: bump
|
53
69
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,6 +91,7 @@ files:
|
|
75
91
|
- lib/effective_tld_names.dat
|
76
92
|
- lib/domainatrix/domain_parser.rb
|
77
93
|
- lib/domainatrix/url.rb
|
94
|
+
- lib/domainatrix/version.rb
|
78
95
|
- CHANGELOG.md
|
79
96
|
- README.textile
|
80
97
|
- spec/spec.opts
|
@@ -83,7 +100,8 @@ files:
|
|
83
100
|
- spec/domainatrix/domain_parser_spec.rb
|
84
101
|
- spec/domainatrix/url_spec.rb
|
85
102
|
homepage: http://github.com/shadowbq/domainatrix
|
86
|
-
licenses:
|
103
|
+
licenses:
|
104
|
+
- MIT
|
87
105
|
post_install_message:
|
88
106
|
rdoc_options: []
|
89
107
|
require_paths:
|
@@ -99,12 +117,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
117
|
requirements:
|
100
118
|
- - ! '>='
|
101
119
|
- !ruby/object:Gem::Version
|
102
|
-
version:
|
120
|
+
version: 1.8.1
|
103
121
|
requirements: []
|
104
122
|
rubyforge_project:
|
105
123
|
rubygems_version: 1.8.24
|
106
124
|
signing_key:
|
107
|
-
specification_version:
|
125
|
+
specification_version: 3
|
108
126
|
summary: A cruel mistress that uses the public suffix domain list to dominate URLs
|
109
127
|
by canonicalizing, finding the public suffix, and breaking them into their domain
|
110
128
|
parts.
|