shadowbq-domainatrix 0.0.12 → 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +16 -0
- data/lib/domainatrix.rb +14 -4
- data/lib/domainatrix/domain_parser.rb +26 -10
- data/lib/domainatrix/version.rb +3 -0
- data/spec/domainatrix_spec.rb +24 -0
- metadata +22 -4
data/README.textile
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
h1. Domainatrix
|
2
2
|
|
3
|
+
!https://badge.fury.io/rb/shadowbq-domainatrix.png!:http://badge.fury.io/rb/shadowbq-domainatrix "!https://codeclimate.com/github/shadowbq/domainatrix.png!":https://codeclimate.com/github/shadowbq/domainatrix "!https://secure.travis-ci.org/shadowbq/domainatrix.png?branch=master!":http://travis-ci.org/shadowbq/domainatrix
|
4
|
+
|
5
|
+
PaulDix's Original Repo
|
6
|
+
|
3
7
|
"http://github.com/pauldix/domainatrix":http://github.com/pauldix/domainatrix
|
4
8
|
|
5
9
|
h2. Summary
|
@@ -33,6 +37,7 @@ h2. Use
|
|
33
37
|
require 'rubygems'
|
34
38
|
require 'domainatrix'
|
35
39
|
|
40
|
+
# Common Usage
|
36
41
|
url = Domainatrix.parse("http://www.pauldix.net")
|
37
42
|
url.url # => "http://www.pauldix.net/" (the original url)
|
38
43
|
url.host # => "www.pauldix.net"
|
@@ -40,6 +45,7 @@ url.public_suffix # => "net"
|
|
40
45
|
url.domain # => "pauldix"
|
41
46
|
url.canonical # => "net.pauldix"
|
42
47
|
|
48
|
+
# Looking at scheme and paths
|
43
49
|
url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
|
44
50
|
url.public_suffix # => "co.uk"
|
45
51
|
url.domain # => "pauldix"
|
@@ -48,6 +54,16 @@ url.path # => "/asdf.html?q=arg"
|
|
48
54
|
url.canonical # => "uk.co.pauldix.bar.foo/asdf.html?q=arg"
|
49
55
|
url.scheme #=> "http"
|
50
56
|
|
57
|
+
# ICANN section only suffix search using DynDNS'
|
58
|
+
url = Domainatrix.icann_parse('www.foo.dyndns.org')
|
59
|
+
url.host #=> 'www.foo.dyndns.org' }
|
60
|
+
url.url #=> 'http://www.foo.dyndns.org/' }
|
61
|
+
url.public_suffix #=>'org' }
|
62
|
+
url.domain #=>'dyndns' }
|
63
|
+
url.subdomain #=> 'www.foo' }
|
64
|
+
url.domain_with_tld #=> 'dyndns.org' }
|
65
|
+
|
66
|
+
# Scanning text line
|
51
67
|
urls = Domainatrix.scan("wikipedia (http://en.wikipedia.org/wiki/Popular_culture): lol") do |match|
|
52
68
|
match.url # Given a block, works like 'map'
|
53
69
|
end
|
data/lib/domainatrix.rb
CHANGED
@@ -11,11 +11,20 @@ rescue LoadError
|
|
11
11
|
end
|
12
12
|
|
13
13
|
module Domainatrix
|
14
|
-
|
15
|
-
|
14
|
+
|
15
|
+
#Keep Constant for backwards compat
|
16
16
|
DOMAIN_PARSER = DomainParser.new("#{File.dirname(__FILE__)}/effective_tld_names.dat")
|
17
|
-
|
17
|
+
|
18
|
+
def self.icann_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
|
19
|
+
Url.new(DomainParser.new(dat, sections).parse(url))
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.custom_parse(url, dat = "#{File.dirname(__FILE__)}/effective_tld_names.dat", sections = ["ICANN DOMAINS"])
|
23
|
+
Url.new(DomainParser.new(dat, sections).parse(url))
|
24
|
+
end
|
25
|
+
|
18
26
|
def self.parse(url)
|
27
|
+
#Url.new(DomainParser.parse(url)) #<-- Still slow implementation at this point
|
19
28
|
Url.new(DOMAIN_PARSER.parse(url))
|
20
29
|
end
|
21
30
|
|
@@ -45,4 +54,5 @@ module Domainatrix
|
|
45
54
|
urls.map!(&block) if block
|
46
55
|
urls
|
47
56
|
end
|
48
|
-
|
57
|
+
|
58
|
+
end
|
@@ -5,11 +5,17 @@ module Domainatrix
|
|
5
5
|
class DomainParser
|
6
6
|
include Addressable
|
7
7
|
|
8
|
-
attr_reader :public_suffixes
|
8
|
+
attr_reader :public_suffixes, :approved_sections, :found_sections
|
9
9
|
VALID_SCHEMA = /^http[s]{0,1}$/
|
10
|
-
|
11
|
-
def
|
10
|
+
|
11
|
+
def self.parse(url)
|
12
|
+
self.new("#{File.dirname(__FILE__)}/../effective_tld_names.dat").parse(url)
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(file_name, approved_sections = (Array.new << "*"))
|
12
16
|
@public_suffixes = {}
|
17
|
+
@found_sections =[]
|
18
|
+
@approved_sections = approved_sections
|
13
19
|
read_dat_file(file_name)
|
14
20
|
end
|
15
21
|
|
@@ -20,17 +26,27 @@ module Domainatrix
|
|
20
26
|
else
|
21
27
|
dat_file = File.open(file_name)
|
22
28
|
end
|
23
|
-
|
29
|
+
section = ""
|
30
|
+
|
24
31
|
dat_file.each_line do |line|
|
25
32
|
line = line.strip
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
33
|
+
#// ===BEGIN ICANN DOMAINS===
|
34
|
+
if line =~ /^\/\/ ===BEGIN/
|
35
|
+
section = /^\/\/ ===BEGIN(.*)===/.match(line)[1].strip
|
36
|
+
@found_sections << section
|
37
|
+
end
|
38
|
+
|
39
|
+
if @approved_sections.include?(section) or @approved_sections.include?("*")
|
40
|
+
unless (line =~ /^\/\//) || line.empty?
|
41
|
+
parts = line.split(".").reverse
|
42
|
+
|
43
|
+
sub_hash = @public_suffixes
|
44
|
+
parts.each do |part|
|
45
|
+
sub_hash = (sub_hash[part] ||= {})
|
46
|
+
end
|
32
47
|
end
|
33
48
|
end
|
49
|
+
|
34
50
|
end
|
35
51
|
end
|
36
52
|
|
data/spec/domainatrix_spec.rb
CHANGED
@@ -102,5 +102,29 @@ describe Domainatrix do
|
|
102
102
|
its(:path) { should == '' }
|
103
103
|
its(:domain_with_tld) { should == '' }
|
104
104
|
end
|
105
|
+
|
106
|
+
context 'without ICANN only suffix using DynDNS' do
|
107
|
+
subject { Domainatrix.custom_parse('www.foo.dyndns.org') }
|
108
|
+
its(:scheme) { should == 'http' }
|
109
|
+
its(:host) { should == 'www.foo.dyndns.org' }
|
110
|
+
its(:url) { should == 'http://www.foo.dyndns.org/' }
|
111
|
+
its(:public_suffix) { should == 'org' }
|
112
|
+
its(:domain) { should == 'dyndns' }
|
113
|
+
its(:subdomain) { should == 'www.foo' }
|
114
|
+
its(:path) { should == '' }
|
115
|
+
its(:domain_with_tld) { should == 'dyndns.org' }
|
116
|
+
end
|
117
|
+
|
118
|
+
context 'without ICANN only suffix using DynDNS' do
|
119
|
+
subject { Domainatrix.icann_parse('www.foo.dyndns.org') }
|
120
|
+
its(:scheme) { should == 'http' }
|
121
|
+
its(:host) { should == 'www.foo.dyndns.org' }
|
122
|
+
its(:url) { should == 'http://www.foo.dyndns.org/' }
|
123
|
+
its(:public_suffix) { should == 'org' }
|
124
|
+
its(:domain) { should == 'dyndns' }
|
125
|
+
its(:subdomain) { should == 'www.foo' }
|
126
|
+
its(:path) { should == '' }
|
127
|
+
its(:domain_with_tld) { should == 'dyndns.org' }
|
128
|
+
end
|
105
129
|
|
106
130
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shadowbq-domainatrix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -48,6 +48,22 @@ dependencies:
|
|
48
48
|
- - ! '>='
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
name: rake
|
53
|
+
requirement: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
type: :development
|
60
|
+
prerelease: false
|
61
|
+
version_requirements: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
51
67
|
- !ruby/object:Gem::Dependency
|
52
68
|
name: bump
|
53
69
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,6 +91,7 @@ files:
|
|
75
91
|
- lib/effective_tld_names.dat
|
76
92
|
- lib/domainatrix/domain_parser.rb
|
77
93
|
- lib/domainatrix/url.rb
|
94
|
+
- lib/domainatrix/version.rb
|
78
95
|
- CHANGELOG.md
|
79
96
|
- README.textile
|
80
97
|
- spec/spec.opts
|
@@ -83,7 +100,8 @@ files:
|
|
83
100
|
- spec/domainatrix/domain_parser_spec.rb
|
84
101
|
- spec/domainatrix/url_spec.rb
|
85
102
|
homepage: http://github.com/shadowbq/domainatrix
|
86
|
-
licenses:
|
103
|
+
licenses:
|
104
|
+
- MIT
|
87
105
|
post_install_message:
|
88
106
|
rdoc_options: []
|
89
107
|
require_paths:
|
@@ -99,12 +117,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
117
|
requirements:
|
100
118
|
- - ! '>='
|
101
119
|
- !ruby/object:Gem::Version
|
102
|
-
version:
|
120
|
+
version: 1.8.1
|
103
121
|
requirements: []
|
104
122
|
rubyforge_project:
|
105
123
|
rubygems_version: 1.8.24
|
106
124
|
signing_key:
|
107
|
-
specification_version:
|
125
|
+
specification_version: 3
|
108
126
|
summary: A cruel mistress that uses the public suffix domain list to dominate URLs
|
109
127
|
by canonicalizing, finding the public suffix, and breaking them into their domain
|
110
128
|
parts.
|